6
8

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 5 years have passed since last update.

Python Design Pattern - テンプレートメソッド

Last updated at Posted at 2014-12-24

Python in Practice: Create Better Programs Using Concurrencyより

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# http://www.amazon.co.jp/gp/product/B00EO3TRL2
# Python in Practice: Create Better Programs Using Concurrency, Libraries, and Patterns (Developer's Library) 

import abc
import re
import HTMLParser


class AbstractWordCounter(object):
    __metaclass__ = abc.ABCMeta

    @staticmethod
    @abc.abstractmethod
    def can_count(filename):
        pass

    @staticmethod
    @abc.abstractmethod
    def count(filename):
        pass


class PlainTextWordCounter(AbstractWordCounter):
    @staticmethod
    def count(filename):
        if not PlainTextWordCounter.can_count(filename):
            return 0
        regex = re.compile(r"\w+")
        total = 0
        with open(filename) as readfile:
            for line in readfile:
                for _ in regex.finditer(line):
                    total += 1
        return total


    @staticmethod
    def can_count(filename):
        return filename.lower().endswith((".py", ".txt"))


class HtmlWordCounter(AbstractWordCounter):
    @staticmethod
    def count(filename):
        if not HtmlWordCounter.can_count(filename):
            return 0
        parser = MyHTMLParser()
        with open(filename) as readfile:
            parser.feed(readfile.read())
        return parser.count


    @staticmethod
    def can_count(filename):
        return filename.lower().endswith((".html", ".htm"))


class MyHTMLParser(HTMLParser.HTMLParser):
    def __init__(self):
        HTMLParser.HTMLParser.__init__(self)
        self.regex = re.compile(r"\w+")
        self.inText = True
        self.text = []
        self.count = 0

    def handle_starttag(self, tag, attrs):
        if tag in {"script", "style"}:
            self.inText = False

    def handle_endtag(self, tag):
        if tag in {"script", "style"}:
            self.inText = True
        else:
            for _ in self.regex.findall(" ".join(self.text)):
                self.count += 1
            self.text = []

    def handle_data(self, text):
        if self.inText:
            text = text.rstrip()
            if text:
                self.text.append(text)


def count_word(filename):
    for wordCounter in (PlainTextWordCounter, HtmlWordCounter):
        if wordCounter.can_count(filename):
            return wordCounter.count(filename)


c = count_word("/tmp/sample.txt")
print "c=" + str(c) + "\n"

h = count_word("/tmp/sample.html")
print "h=" + str(h) + "\n"
6
8
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
6
8

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?