More than 5 years have passed since last update.

Python Design Pattern - テンプレートメソッド

Last updated at 2014-12-24Posted at 2014-12-24

Python in Practice: Create Better Programs Using Concurrencyより

# !/usr/bin/env python
# -*- coding: utf-8 -*-

# http://www.amazon.co.jp/gp/product/B00EO3TRL2
# Python in Practice: Create Better Programs Using Concurrency, Libraries, and Patterns (Developer's Library) 

import abc
import re
import HTMLParser


class AbstractWordCounter(object):
    __metaclass__ = abc.ABCMeta

    @staticmethod
    @abc.abstractmethod
    def can_count(filename):
        pass

    @staticmethod
    @abc.abstractmethod
    def count(filename):
        pass


class PlainTextWordCounter(AbstractWordCounter):
    @staticmethod
    def count(filename):
        if not PlainTextWordCounter.can_count(filename):
            return 0
        regex = re.compile(r"\w+")
        total = 0
        with open(filename) as readfile:
            for line in readfile:
                for _ in regex.finditer(line):
                    total += 1
        return total


    @staticmethod
    def can_count(filename):
        return filename.lower().endswith((".py", ".txt"))


class HtmlWordCounter(AbstractWordCounter):
    @staticmethod
    def count(filename):
        if not HtmlWordCounter.can_count(filename):
            return 0
        parser = MyHTMLParser()
        with open(filename) as readfile:
            parser.feed(readfile.read())
        return parser.count


    @staticmethod
    def can_count(filename):
        return filename.lower().endswith((".html", ".htm"))


class MyHTMLParser(HTMLParser.HTMLParser):
    def __init__(self):
        HTMLParser.HTMLParser.__init__(self)
        self.regex = re.compile(r"\w+")
        self.inText = True
        self.text = []
        self.count = 0

    def handle_starttag(self, tag, attrs):
        if tag in {"script", "style"}:
            self.inText = False

    def handle_endtag(self, tag):
        if tag in {"script", "style"}:
            self.inText = True
        else:
            for _ in self.regex.findall(" ".join(self.text)):
                self.count += 1
            self.text = []

    def handle_data(self, text):
        if self.inText:
            text = text.rstrip()
            if text:
                self.text.append(text)


def count_word(filename):
    for wordCounter in (PlainTextWordCounter, HtmlWordCounter):
        if wordCounter.can_count(filename):
            return wordCounter.count(filename)


c = count_word("/tmp/sample.txt")
print "c=" + str(c) + "\n"

h = count_word("/tmp/sample.html")
print "h=" + str(h) + "\n"

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up