More than 5 years have passed since last update.

Effective Python 学習備忘録 8日目【8/100】

Last updated at 2021-01-10Posted at 2021-01-09

はじめに

Twitterで一時期流行していた 100 Days Of Code なるものを先日知りました。本記事は、初学者である私が100日の学習を通してどの程度成長できるか記録を残すこと、アウトプットすることを目的とします。誤っている点、読みにくい点多々あると思います。ご指摘いただけると幸いです！

今回学習する教材

Effective Python
- 8章構成
- 本章216ページ

今日の進捗

進行状況：64-69ページ
第3章：クラスと継承
本日学んだことの中で、よく忘れるところ、知らなかったところを書いていきます。

@classmethodポリモルフィズムを使ってオブジェクトをジェネリックに構築する

ポリモルフィズムとは、ある階層の複数のクラスが、あるメソッドのそれぞれのバージョンを実装する方式の一つ

例えば、MapReduceの実装を書いていて、入力データを表す共通クラスが欲しいとします。サブクラスで定義する必要のあるreadメソッドを持つ共通クラスを次のように定義します。
ここで、MapReduceとは「分割された大量のデータをクラスタで分散処理するためのプログラムのことです。

class InputData(object):
    def read(self):
        raise NotImplementedError

InputDataクラスを継承してサブクラスを定義

class PathInputData(InputData):
    def __init__(self, path):
        super().__init__()       # はInputDataの__init__()を実行するはずですが、InputDataに__init__は無いのですが、必要なのでしょうか...?
        self.path = path
    
    def read(self):
        return open(self.path).read()

続いて、入力データを利用するMapReduceのWorkerも定義します。

class Worker(object):
    def __init_(self, input_data):
        self.input_data = input_data
        self.result = None

    def map(self):
        raise NotImplementedError

    def reduce(self, other):
        raise NotImplementedError

続いて、Workerクラスを継承して改行のカウンタを定義します。

class LineCountWorker(Worker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')

    def reduce(self, other):
        self.result += other.result

これまで定義してきたクラスを統合する方法を考えます。1つ目は、ヘルパー関数を使ってオブジェクトを構築し、連携する作業を手作業で行うことです。

import os
from threading import Thread

# ディレクトリの内容をリストして、そこに含まれる各ファイルに対するPathInputDataインスタンスを生成
def generate_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name))

# generate_inputsで返されたinputDataインスタンスを用いてLineCountWorkerインスタンスを生成
def create_workers(input_list):
    workers = []
    for input_data in input_list:
        workers.append(LineCountWorker(input_data))
    return workers

def execute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads: thread.start()
    for thread in threads: thread.join()

    first, rest = workers[0], workers[1:]
    for worker in rest:
        first.reduce(worker)
        return first.result

# 最後に、これらをまとめて、各ステップを実行する関数に
def mapreduce(data_dir):
    inputs = generate_inputs(data_dir)
    workers = create_workers(inputs)
    return execute(workers)

この統合方法の問題は、他のInputDataやWorkerといったサブクラスを書いたら、generate_inputsやcreate_workersを書き直して、mapreduce関数がを対応させなければいけないという点です。
この問題を解決する方法は@classmethodを使うことです。これは、高知宇されたオブジェクトにではなく、クラス全体に適用されます。
この方式をMapReduceクラスに適用したコードが以下になります。

import os
from threading import Thread

class InputData(object):
    def read(self):
        raise NotImplementedError

class PathInputData(InputData):
    def __init__(self, path):
        super().__init__()
        self.path = path
    
    def read(self):
        return open(self.path).read()

class Worker(object):
    def __init_(self, input_data):
        self.input_data = input_data
        self.result = None

    def map(self):
        raise NotImplementedError

    def reduce(self, other):
        raise NotImplementedError

class LineCountWorker(Worker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')

    def reduce(self, other):
        self.result += other.result

def generate_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name))

def create_workers(input_list):
    workers = []
    for input_data in input_list:
        workers.append(LineCountWorker(input_data))
    return workers

def execute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads: thread.start()
    for thread in threads: thread.join()

    first, rest = workers[0], workers[1:]
    for worker in rest:
        first.reduce(worker)
        return first.result

class GenericInputData(object):
    def rad(self):
        raise NotImplementedError

    @classmethod
    def generate_inputs(cls, config):
        raise NotImplementedError

class PathInputData(GenericInputData):
    def __init__(self, path):
        super().__init__()
        self.path = path

    def read(self):
        with open(self.path) as f:
            return f.read()

    @classmethod
    def generate_inputs(cls, config):
        data_dir = config['data_dir']
        for name in os.listdir(data_dir):
            yield cls(os.path.join(data_dir, name))

class GenericWorker:
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = None

    def map(self):
        raise NotImplementedError

    def reduce(self, other):
        raise NotImplementedError

    @classmethod
    def create_workers(cls, input_class, config):
        workers = []
        for input_data in input_class.generate_inputs(config):
            workers.append(cls(input_data))
        return workers

class LineCountWorker(GenericWorker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')

    def reduce(self, other):
        self.result += other.result      

def mapreduce(worker_class, input_class, config):
    workers = worker_class.create_workers(input_class, config)
    return execute(workers)

こちらのコードは前の実装と同じ結果を出力します。こちらの書き方であれば、GenericInputDataやGenericWorkerサブクラスを変更した際に、関係するコードを書き直す必要が無くなります。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up

Effective Python 学習備忘録 8日目 【8/100】

はじめに

今回学習する教材

今日の進捗

@classmethodポリモルフィズムを使ってオブジェクトをジェネリックに構築する

Effective Python 学習備忘録 8日目【8/100】