More than 5 years have passed since last update.

Python Design Pattern - テンプレートメソッド

Last updated at Posted at 2014-12-24

Python in Practice: Create Better Programs Using Concurrencyより

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# http://www.amazon.co.jp/gp/product/B00EO3TRL2
# Python in Practice: Create Better Programs Using Concurrency, Libraries, and Patterns (Developer's Library) 

import abc
import re
import HTMLParser

class AbstractWordCounter(object):
    __metaclass__ = abc.ABCMeta

    def can_count(filename):

    def count(filename):

class PlainTextWordCounter(AbstractWordCounter):
    def count(filename):
        if not PlainTextWordCounter.can_count(filename):
            return 0
        regex = re.compile(r"\w+")
        total = 0
        with open(filename) as readfile:
            for line in readfile:
                for _ in regex.finditer(line):
                    total += 1
        return total

    def can_count(filename):
        return filename.lower().endswith((".py", ".txt"))

class HtmlWordCounter(AbstractWordCounter):
    def count(filename):
        if not HtmlWordCounter.can_count(filename):
            return 0
        parser = MyHTMLParser()
        with open(filename) as readfile:
        return parser.count

    def can_count(filename):
        return filename.lower().endswith((".html", ".htm"))

class MyHTMLParser(HTMLParser.HTMLParser):
    def __init__(self):
        self.regex = re.compile(r"\w+")
        self.inText = True
        self.text = []
        self.count = 0

    def handle_starttag(self, tag, attrs):
        if tag in {"script", "style"}:
            self.inText = False

    def handle_endtag(self, tag):
        if tag in {"script", "style"}:
            self.inText = True
            for _ in self.regex.findall(" ".join(self.text)):
                self.count += 1
            self.text = []

    def handle_data(self, text):
        if self.inText:
            text = text.rstrip()
            if text:

def count_word(filename):
    for wordCounter in (PlainTextWordCounter, HtmlWordCounter):
        if wordCounter.can_count(filename):
            return wordCounter.count(filename)

c = count_word("/tmp/sample.txt")
print "c=" + str(c) + "\n"

h = count_word("/tmp/sample.html")
print "h=" + str(h) + "\n"

