This article is a Private article. Only a writer and users who know the URL can access it.
Please change open range to public in publish setting if you want to share this article with other users.

More than 3 years have passed since last update.

PAIP19章のコードが再帰で作成されて難しい件

Last updated at 2022-02-08Posted at 2022-02-06

勉強会
『実用Common Lisp』読書会(93)にて
https://jitsucl.connpass.com/event/235954/

##どうでもよい話(1)
英語版はwebで読める
https://github.com/norvig/paip-lisp/blob/main/docs/chapter19.md

google翻訳すれば、英語が読めなくても意味はとれる
https://github-com.translate.goog/norvig/paip-lisp/blob/main/docs/chapter19.md?_x_tr_sl=en&_x_tr_tl=ja&_x_tr_hl=ja&_x_tr_pto=wapp

ただ、本の方が頭にすぐに取り込める日本語になっていて
やっぱり翻訳家の仕事にはかなわない

google web翻訳で良いと思った点は
common lispコードの訳せないところはスルーして
そのままにしているのは良い

ただ、コードの変数名を訳してしまっているのは良くない
ブロックがコードだと認識して、コメントだけを翻訳できれば良いと思う

欲を言うと、コメントの中身もコードの変数名や関数名に
登場する単語は訳さずにコメントを訳出すると
もっとサービスとしてよさげに思う
(皆さん、そんなサービスを見たことありますか ?)

#ここから本題
コードをファシリテーターの方が読んでくれるのだが
詰まることや、飛ばしてしまうことがあったように思う

このことは、コードが難しいので
自分が読んだとしても、満足に読めないと思うので
ファシリテータに責任は全く無いことは断言したい

しかし『実用Common Lisp』読書で
common lispエキスパートは目指さないとしても
他の言語で同レベルのコードを作成できるようになりたい
そうなるとコードの読解は避けて通ることができない
何とか克服したい

##どうでもよい話(2)
はじめはcommon lispに慣れてないせいで
見慣れないmapcanなどが原因だと思っていた

mapcanは、他の言語で言う所のflatmapだと言って
mapした結果をflattenしたものと同じと言えば
即了解されて、次に進めると安易に考えていた

(flatmapはscala, kotlin, swift, typescript, javascript
flat_mapとしてrubyにあるので流通しているのかと思ったが違った)
(javascriptにはArraysにあり、underscore.jsやlodashにある)

haskellにはconcatMapがあるが動作は全く同じ
https://hoogle.haskell.org/?hoogle=concatMap

・mapcanはmapcarの結果のリストにappendをする
　appendはリストどうしの加算
・flatmapはmapした結果をflattenしたもの
　flattenは2次元化した山を馴らして1次元のリストにする
・concatMapはmapした結果をconcatinateしたもの
　concatinateはリストの連結の意味
ことばの表現が違うだけで動きが同値なので議論は不要に思う

しかし、mapcanなどが馴染みないことも障害かもしれないが
それを解決しても
コードは明らかにhanoiの塔よりも複雑な再帰構造をしており
初見でサクサク読める代物ではないことに
やっと気が付いた
(パーサーは再帰で書くのが普通だが
例えばLL型の再帰下降型パーサーは
文法の再帰構造と全く同じに書くので
再帰のつながりは
予想しながら読める
だから今回の再帰より大きい場合もあるが
自然で、そこまで難しくない)

##また本題
このコードは lispでなければできないような
コーディングをされていない
また、pythonでも書けると勉強会で発言した

そこで、本のコンパクトなlispコードに沿って
少し冗長なpythonコードを起こしてみた

(本当は、テストコードからボトムアップに作り上げて
だれでも分かるコードピースに分解してから
積み上げて作るテストファースト作法で
作り上げる過程を見せると分かりやすい

しかし、それを行うには
再帰の部分を分解しなおして
本に沿わないコードに作り変えないと
テストファースト作法で作るのが
不可能なので断念しました)

以下が、そのコードです

ファイル名はなんでも良いのですが
parser0.py
としました

# lisp nilの代わり
NIL = []

# ============================
# grammar
# ============================
Sentence = 'Sentence'
NP = 'NP'
VP = 'VP'
Art = 'Art'
Noun = 'Noun'
Verb = 'Verb'
the = 'the'
a = 'a'
#
man = 'man'
ball = 'ball'
woman = 'woman'
table = 'table'
noun = 'noun'
verb = 'verb'
#
hit = 'hit'
took = 'took'
saw = 'saw'
liked = 'liked'

grammar = [
     [Sentence, '->', [NP, VP]]
    ,[NP, '->', [Art, Noun]]
    ,[VP, '->', [Verb, NP]]
    ,[Art, '->', the]
    ,[Art, '->', a]
    ,[Noun, '->', man]
    ,[Noun, '->', ball]
    ,[Noun, '->', woman]
    ,[Noun, '->', table]
    ,[Noun, '->', noun]
    ,[Noun, '->', verb]
    ,[Verb, '->', hit]
    ,[Verb, '->', took]
    ,[Verb, '->', saw]
    ,[Verb, '->', liked]
]

# ============================
# rule構造体
# rule : lhs -> rhs
# (defstruct (rule (:type list)) lhs -> rhs sem)
# (pythonにdefstructの:type listがない為に生やした)
# ============================
def rule_lhs(rule):
    lhs, _, rhs = rule
    return lhs

def rule_rhs(rule):
    lhs, _, rhs = rule
    return rhs

# ============================
# tree(dot対)
# tree : cat rhs
# ============================
#(defun new-tree (cat rhs) (cons cat rhs))
def new_tree(cat, rhs):
    return [cat, rhs]

#(defun tree-lhs (tree) (first tree))
def tree_lhs(tree):
    return tree[0]

#(defun tree-rhs (tree) (rest tree))
def tree_rhs(tree):
    return tree[1:]

# ============================
# parse構造体
# parse : tree rem
# (defstruct (parse) "A parse tree and a remainder." tree rem)
# ============================
def make_parse(tree, rem):
    return [tree, rem]

def parse_tree(parse):
    tree, rem = parse
    return tree

def parse_rem(parse):
    tree, rem = parse
    return rem

# 構造体treeの詳細のrem取得
#(defun parse-lhs (parse) (tree-lhs (parse-tree parse)))
def parse_lhs(parse):
    tree = parse_tree(parse)
    lhs = tree_lhs(tree)
    return lhs

# ============================
# ルール検索
# ============================
# --------------------------------
# 単語をキーに、ルールの展開結果に
# 単語が含まれるルール群を取得
# --------------------------------
#(defun lexical-rules (word)
#  "Return a list of rules with word on the right hand side."
#  (or (find-all word *grammar* :key #'rule-rhs :test #'equal)
#      (mapcar #'(lambda (cat) `(,cat -> ,word)) *open-categories*)))
#open_categories = [N, V, A, Name]
open_categories = []
def lexical_rules(word):
    # 右展開部にwordが含まれるルールを *grammar* から選択
    rules_find = [rule
                  for rule in grammar
                  if rule_rhs(rule) == word]

    # ルールが見つからない未知語の場合に
    # 可能なopen_categoriesを充ててルールを仮作する
    if len(rules_find) > 0:
        return rules_find
    def make_rule(cat):
        re = [cat, '->', word]
        return re
    rules_make = [make_rule(cat) for cat in open_categories]
    return rules_make

# --------------------------------
# 品詞や句種類のカテゴリーをキーに
# ルールの展開結果にそのカテゴリが
# 含まれるルール群の先頭を取得
# --------------------------------
#(defun rules-starting-with (cat)
#  "Return a list of rules where cat starts the rhs."
#  (find-all cat *grammar*
#            :key #'(lambda (rule) (first-or-nil (rule-rhs rule)))))
def rules_starting_with(cat):
    def first_rhs(rule):
        _rhs = rule_rhs(rule)
        _rhs_first = first_or_nil(_rhs)
        return _rhs_first
    _rules = [_rule
              for _rule in grammar
              if first_rhs(_rule) == cat]
    return _rules

# ============================
# ボトムアップパーサー
# ============================
# --------------------------------
# 単語リストのすべての解析結果
# (余りの単語が残らない様に解析
#  残った場合は、解析不良として弾く)
# --------------------------------
#(defun parser (words)
#  "Return all complete parses of a list of words."
#  (mapcar #'parse-tree (complete-parses (parse words))))
def parser(words):
    # パース結果
    # (rem(残り)が含まれることがある)
    _parses = parse(words)
    # パース結果
    # (rem(残り)が含まれる結果が取り除かれている)
    _parses_comp = complete_parses(_parses)
    # 解析結果のパース構造体から構文木以外のデータを除いたtreeのリスト
    _trees = [parse_tree(_parse) for _parse in _parses_comp]
    return _trees

# --------------------------------
# 解析結果のリストから
# 解析不良のものを弾く
# --------------------------------
#(defun complete-parses (parses)
#  "Those parses that are complete (have no remainder)."
#  (find-all-if #'null parses :key #'parse-rem))
def complete_parses(parses):
    #（余りのない）全部そろったパース結果
    _parses = [_parse
               for _parse in parses
               for _rem in [parse_rem(_parse)]
               if is_null(_rem)]
    return _parses

# --------------------------------
# 単語リストの先頭から展開されていく
# 可能性のある、すべての解析結果を
# リストアップ
# --------------------------------
#(defun parse (words)
#  "Bottom-up parse, returning all parses of any prefix of words."
#  (unless (null words)
#    (mapcan #'(lambda (rule)
#                (extend-parse (rule-lhs rule) (list (first words))
#                              (rest words) nil))
#            (lexical-rules (first words)))))
def parse(words):
    # 解析対象の単語が無いならNILで終了
    if is_null(words):
        return NIL

    # wordsの先頭を右辺の展開部とする
    # ルールを集めて
    # 各ルールにconsume_wordを適用
    word_first = words[0]
    lex_rules = lexical_rules(word_first)
    def consume_word(rule):
        # wordsの先頭を消費してextend_parseする
        # 例) rule: lhs:Art rhs:the
        #     words: [the, table]
        #     ⇒(lhs=Art, rhs=the, rem=[table], needed=NIL)
        words_rest = words[1:]
        _lhs = rule_lhs(rule)
        _parse = extend_parse(lhs=_lhs, rhs=word_first
                            , rem=words_rest, needed=NIL)
        return _parse
    _parses = flatmap(consume_word, lex_rules)
    return _parses

# --------------------------------
# 単語リストの先頭から展開されていく
# 可能性のある、すべての解析結果を
# リストアップ
# --------------------------------
#(defun extend-parse (lhs rhs rem needed)
#  "Look for the categories needed to complete the parse."
#  (if (null needed)
#      ;; If nothing needed, return parse and upward extensions
#      (省略)
#      ;; otherwise try to extend rightward
#      (省略)
def extend_parse(lhs, rhs, rem, needed):
    if is_null(needed):
        # parse作成
        _tree = new_tree(lhs, rhs)
        _parse = make_parse(tree=_tree, rem=rem)
        # lhsを右辺展開部の先頭とするルールを集めて
        # 各ルールにconsume_rhsを適用
        # (消費するのはgrammarから取り出した右辺展開部の先頭(_rhs[0]))
        _rules = rules_starting_with(lhs)
        def consume_rhs(rule):
            # 解決済のルールの右辺展開部の先頭と
            # 解決予定の右辺展開部の先頭除く残りで
            # 再帰呼び出しで残りを解決
            _tree2 = parse_tree(_parse)
            _rhs2 = rule_rhs(rule)
            _rhs2_rest = _rhs2[1:]
            # 各ルールの左辺頭部をlhsとし
            # 呼び出し元のflatmapのルートのtreeをrhsとし
            # 各ルールの右辺展開部の先頭を取り除いたものをneededとして
            # extend_parseを実行
            # このルートのtreeは、呼び出し元のlhsとrhsを合わせたもの
            # 例)(lhs:NP, rhs:[[Art, the]], rem:[table], needed:[Noun])
            #    neededはArt解析済時点で、単語の残りがtableで、パターンの残りのNounになる
            # grammar = [
            #      .....
            #     ,[NP,   '->', [Art, Noun]]
            #      .....
            # ]
            _lhs=rule_lhs(rule)
            _parse2 = extend_parse(lhs=_lhs, rhs=[_tree]
                                 , rem=rem, needed=_rhs2_rest)
            return _parse2
        _parses = flatmap(consume_rhs, _rules)
        return [_parse] + _parses
    else:
        # 残りの単語(未解析)を解析する
        # 同じ単語でも、複数の役割の可能性がある
        # (動詞にも名詞にもなり得る場合がある)
        # すべての可能性のルールが採取される為
        # 解析結果は複数になりflatmap(mapcan)する
        # 選好は一切していないので、すべて出る
        _parses = parse(rem)

        # parseした結果の新しい_remを消費
        # (消費してNILになる場合もあり)
        # その役割と同値なneededの先頭を消費
        def consume_needed(parse):
            # neededの先頭とparseした結果のlhsが
            # 一致したらneededの先頭を消費して
            # parseした結果をrhsに追加して更に解析
            # 一致しければ不発の意味のNIL
            # また、parseした結果の新しい_remを使用する(消費してNILになる場合もあり)
            _lhs = parse_lhs(parse)
            _needed_first = needed[0]
            if _lhs == _needed_first:
                # 例)(lhs:NP, rhs:[[Art, the], [Noun, table]], rem:NIL, needed:NIL)
                #     parse([table]) ⇒ [[Noun, table]:tree,[]:rem]
                #     _needed_rest ⇒ [Noun][1:] = [] = NIL
                _tree = parse_tree(parse)
                _rhs_add = append1(rhs, _tree)
                _rem = parse_rem(parse)
                _needed_rest = needed[1:]
                _parse = extend_parse(lhs=lhs, rhs=_rhs_add
                                    , rem=_rem, needed=_needed_rest)
                return _parse
            return NIL
        _parses_new = flatmap(consume_needed, _parses)
        return _parses_new

# ============================
# 下請け関数群
# (読めば分かるなんてことないものたち)
# ============================
def append1(items, item):
    re = items + [item]
    return re

def first_or_nil(x):
    if isinstance(x, list):
        if len(x) > 0:
            return x[0]
    return NIL

def flatmap(fun, lst):
    matrix2d = [fun(_rule) for _rule in lst]
    lst_new = flatten(matrix2d)
    return lst_new

def flatten(target):
    flat = [x
            for row in target
            for x in row]
    return flat

def is_null(x):
    if x is None or x == NIL:
        return True
    else:
        return False


if __name__ == '__main__':
    input_ = [the, table]
    expect = [[NP
               , [ [Art, the]
                 , [Noun, table]]]]
    assert parser(input_) == expect

    input_ = [the, ball, hit, the, table]
    expect = [
        [Sentence
            , [  [NP, [  [Art, the]
                         , [Noun, ball]]]
               , [VP, [  [Verb, hit]
                         , [NP, [  [Art, the]
                                   , [Noun, table]]]]]]]]
    assert parser(input_) == expect

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up