More than 5 years have passed since last update.

PythonでミニマルなLispを作る（構文解析編）

Last updated at 2018-03-26Posted at 2018-03-25

はじめに

前回の投稿でCommon LispでBrainfuckを実装しました．興が乗ったので今回はLisp以外の言語（Python）でLispを実装してみます．PythonでLispを実装するというのはもはや何番煎じかわかりませんが実用的なものではHyが有名でしょうか．そのほか有名なものだとあのPeter NorvigさんのLispy(lis.py)というのがあります．空行を覗いて90行ほどで実装されたScheme風のLispです．今回はそんなに高級なものではなくいわゆる純Lispと言われるようなシンプルなものにやや毛が生えたようなものを作りたいと思います．

実装の流れ

大きく2つの部分に分けて実装していきます．すなわち1.構文解析と2.評価です．1.構文解析では文字列で入力されたS式を文字列のリストに変換する処理を実装します．2.評価では文字列のリストを評価して式を実行する処理を実装します．

1.構文解析

(hoge (fuga fuga))のような入力に対して["hoge", ["fuga", "fuga"]]を返すような処理を実装します．ある程度拡張性をもたせたいのでCommon Lispを参考に幾つかのリーダマクロに相当する関数と文字のペアを予め定義しておきそれに従って処理を切り替えるようにします．

まず先立って以下ようなユーティリティを定義しておきます．

read.py(continue)

def read_char(stream):
    return stream.read(1)

def peek_char(stream):
    pos = stream.tell()
    char = read_char(stream)
    stream.seek(pos)
    return char

def make_seekable_stream(stream):
    from io import StringIO
    if not stream.seekable():
        if hasattr(stream, "name"):
            name = stream.name if stream.name else repr(stream)
        else:
            name = repr(stream)
        stream = StringIO(stream.read())
        stream.name = name
        return stream
    return stream

def describe_stream(stream):
    position = stream.tell() + 1
    stream.seek(0)
    buf = [stream.read(1) for _ in range(position)]
    row = buf.count("\n") + 1
    col = len("".join(buf).split("\n")[-1])
    char = repr(buf[-1])
    
    return {
        "name": stream.name,
        "position": position,
        "row": row,
        "col": col,
        "char": char,
    }

def eof_error(stream):
    "Unexpected EOF while reading {name} at file position {position} (row: {row}, col: {col})."
    raise EOFError(eof_error.__doc__.format(**describe_stream(stream)))

def syntax_error(stream):
    "Invalid character {char} in {name} at file position {position} (row: {row}, col: {col})"
    raise SyntaxError(syntax_error.__doc__.format(**describe_stream(stream)))

read_charはストリーム(file-like object)から一文字読み込みます．peek_charも同様ですがファイルポインタを進めません．peek_charを使うためにはストリームがseekableである必要があるので既存のストリームの内容をコピーしてseekableにするmake_seekable_streamを用意しておきます．その他の関数はエラー出力のための雑多なものです．

次に幾つかのリーダマクロとリーダマクロを管理する辞書(readtable)を実装します．

read.py(continue)


def read_list(stream):
    elements = []
    element = read(stream)
    while element:
        if element is ")":
            return elements
        else:
            elements.append(element)
        element = read(stream)
    eof_error(stream)

def right_paren_reader(stream):
    return ")"

def read_comment(stream):
    char = peek_char(stream)
    while char is not "\n":
        read_char(stream)
        char = peek_char(stream)
    return read(stream)

def read_quote(stream):
    return ["quote", read(stream)]

readtable = {
    "(": read_list,
    ")": right_paren_reader,
    ";": read_comment,
    "'": read_quote,
}

read_listは)が現れるまで読み込んだ結果をelementsに追加し，)が現れたらelementsを返します．もし)が現れるまでにEOFに到達した場合はEOFErrorを投げます．read_commentは文末まで読み飛ばします．read_quoteはシンボル（またはリスト）をquoteでラップします．

最後にreadです．readtableにしたがって処理を切り替えながら入力されたS式に対応する文字列または文字列のリストを作ります．

read.py(end)

import sys
import string

delimiters = string.whitespace
validchars = string.ascii_letters + string.digits + "-+*/%=!?<>^&"

def read(stream=sys.stdin):
    symbol = ""
    stream = make_seekable_stream(stream)
    char = peek_char(stream)
    while char:
        if char in readtable.keys():
            return symbol if symbol else readtable[read_char(stream)](stream)
        elif char in delimiters:
            read_char(stream)
            if symbol:
                return symbol
        elif char in validchars:
            symbol += read_char(stream)
        else:
            syntax_error(stream)
        char = peek_char(stream)
    eof_error(stream)

ストリームから一文字読んでリーダマクロに対応する文字列だった場合は対応する関数に処理を投げます．そうでなかった場合はdelimiters(空白文字)まで読み進めて結合した文字列をシンボルとして返します．なお，シンボルとして有効な文字列はvalidcharsに含まれるもののみです．

試しに以下のようなファイルを読み込んでみます．

test.lisp

; this line is comment
(cons 'a
; this line is also comment
      (cons 'b 'c))

read.pyに以下を追加します．

read.py(appendix)

if __name__ == "__main__":
    print(read())

$ python read.py < test.lisp
['cons', ['quote', 'a'], ['cons', ['quote', 'b'], ['quote', 'c']]]

良さそうですね．
最終的には以下のようになりました．

read.py

import sys
import string

def read_char(stream):
    return stream.read(1)

def peek_char(stream):
    pos = stream.tell()
    char = read_char(stream)
    stream.seek(pos)
    return char

def make_seekable_stream(stream):
    from io import StringIO
    if not stream.seekable():
        if hasattr(stream, "name"):
            name = stream.name if stream.name else repr(stream)
        else:
            name = repr(stream)
        stream = StringIO(stream.read())
        stream.name = name
        return stream
    return stream

def describe_stream(stream):
    position = stream.tell() + 1
    stream.seek(0)
    buf = [stream.read(1) for _ in range(position)]
    row = buf.count("\n") + 1
    col = len("".join(buf).split("\n")[-1])
    char = repr(buf[-1])
    
    return {
        "name": stream.name,
        "position": position,
        "row": row,
        "col": col,
        "char": char,
    }

def eof_error(stream):
    "Unexpected EOF while reading {name} at file position {position} (row: {row}, col: {col})."
    raise EOFError(eof_error.__doc__.format(**describe_stream(stream)))

def syntax_error(stream):
    "Invalid character {char} in {name} at file position {position} (row: {row}, col: {col})"
    raise SyntaxError(syntax_error.__doc__.format(**describe_stream(stream)))

def read_list(stream):
    elements = []
    element = read(stream)
    while element:
        if element is ")":
            return elements
        else:
            elements.append(element)
        element = read(stream)
    eof_error(stream)

def right_paren_reader(stream):
    return ")"

def read_comment(stream):
    char = peek_char(stream)
    while char is not "\n":
        read_char(stream)
        char = peek_char(stream)
    return read(stream)

def read_quote(stream):
    return ["quote", read(stream)]

readtable = {
    "(": read_list,
    ")": right_paren_reader,
    ";": read_comment,
    "'": read_quote,
}

delimiters = string.whitespace
validchars = string.ascii_letters + string.digits + "-+*/%=!?<>^&"

def read(stream=sys.stdin):
    symbol = ""
    stream = make_seekable_stream(stream)
    char = peek_char(stream)
    while char:
        if char in readtable.keys():
            return symbol if symbol else readtable[read_char(stream)](stream)
        elif char in delimiters:
            read_char(stream)
            if symbol:
                return symbol
        elif char in validchars:
            symbol += read_char(stream)
        else:
            syntax_error(stream)
        char = peek_char(stream)
    eof_error(stream)

if __name__ == "__main__":
    print(read())

書いてて疲れてきたので記事を分割します．ひとまずここまでお読み頂きありがとうございました．

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up