純Lisp実装シリーズ第3回です
第1回 小さなLisp
第2回 SECDマシン
今回は第1回で書いた物をrubyからhaskellへ書き直しただけですので悪しからず
なぜそのような事をしたかというとSECDマシン(VM)と素朴な再帰ベースの実装とを比べて見たかったからです
では見ていきましょう
source
AST
Abstract Syntax Tree略してAST
構文木ですね
S式は非常にシンプルでListとAtomで事足ります
今回は数字用にNatを追加
Posはソースコード中の位置情報です
エラーメッセージ用に入れておきます
module Lisp.AST where
import Data.List
type Pos = (Int,Int)
data AST
= Atom Pos String
| Nat Pos Int
| List Pos [AST]
instance Show AST where
show (Atom _ s) = s
show (Nat _ n) = show n
show (List _ ls) = "(" ++ (concat $ intersperse " " $ map show ls) ++ ")"
instance Eq AST where
Atom _ a == Atom _ b = a == b
Nat _ a == Nat _ b = a == b
List _ a == List _ b = a == b
a == b = False
Parser
文字列からASTへ変換するのがparseですね
parsecを使ってparseします
((,) <$> P.sourceLine <*> P.sourceColumn) <$> P.getPosition
位置情報はこれで取れます
{-# LANGUAGE TupleSections #-}
module Lisp.Parser where
import Lisp.AST
import Control.Applicative
import qualified Text.Parsec as P
parse :: String -> Either P.ParseError AST
parse = P.parse sexpr ""
-- top = List <$> pos <*> (P.many (sexpr <* spaces) <* P.eof)
sexpr = atom <|> list
pos = ((,) <$> P.sourceLine <*> P.sourceColumn) <$> P.getPosition
spaces = P.skipMany (P.space <|> P.tab <|> P.newline <|> comment)
comment = P.string ";" *> P.noneOf "\n" <* P.newline
atom
= Atom <$> pos <*> P.many1 (P.noneOf "0123456789()' \t\r\n")
<|> Nat <$> pos <*> (read <$> P.many1 P.digit)
list = P.string "(" *> (List <$> pos <*> P.many (sexpr <* spaces)) <* P.string ")"
Eval
実行系本体になります
VMと違いコンパイルしてVM用の命令を生成する必要がなく
素朴に逐次実行していきます
ひたすらパターンマッチング
{-# LANGUAGE RecordWildCards #-}
module Lisp.Eval where
import Lisp.AST
import Control.Monad
import qualified Data.Map as M
type Env = M.Map String AST
data LispState
= LispState {
ast :: AST,
env :: Env,
ref :: Maybe LispState
}
deriving Show
eval :: AST -> IO AST
eval a = do
ls <- evalState $ initLispState { ast = a }
pure $ ast ls
initLispState = LispState (Atom (0,0) "#nil") M.empty Nothing
evalError :: LispState -> String -> IO LispState
evalError ls msg = do
let
(p, a) = case ast ls of
Atom p a -> (p, show a)
Nat p n -> (p, show n)
List p l -> (p, show $ ast ls)
fail $ show p ++ " " ++ msg ++ " " ++ a
pure ls
evalState :: LispState -> IO LispState
evalState ls = case ast ls of
Atom _ a -> pure $ ls { ast = lookup $ env ls }
where
lookup e = case M.lookup a e of
Just a' -> a'
Nothing -> case ref ls of
Just e' -> lookup $ env e'
Nothing -> ast ls
Nat _ a -> pure ls
List _ [] -> pure ls
List _ (Atom _ "do" : body) -> foldM go ls body
where
go ls' expr = evalState $ ls' { ast = expr }
List _ [Atom _ "def", Atom _ name, body] ->
pure $ ls { env = M.insert name body $ env ls }
List pos (Atom _ "def" : _) ->
evalError ls "syntax error"
List _ [Atom _ "lam", List _ names, body] ->
pure ls
List pos (Atom _ "lam" : _) ->
evalError ls "syntax error"
List _ [Atom pos "+", a, b] -> do
a' <- evalState $ ls { ast = a }
b' <- evalState $ ls { ast = b }
case (ast a', ast b') of
(Nat _ a, Nat _ b) -> pure $ ls { ast = Nat pos (a + b) }
_ -> evalError ls "+ expect nat"
List _ [Atom pos "-", a, b] -> do
a' <- evalState $ ls { ast = a }
b' <- evalState $ ls { ast = b }
case (ast a', ast b') of
(Nat _ a, Nat _ b) -> pure $ ls { ast = Nat pos (a - b) }
_ -> evalError ls "- expect nat"
List _ (Atom _ "puts" : args) -> mapM_ puts args >> pure ls
where
puts arg = do
a <- evalState $ ls { ast = arg }
print a
List _ [Atom _ "if", bool, true, false] -> do
b <- evalState $ ls { ast = bool }
case ast b of
Atom _ "#t" -> evalState $ ls { ast = true }
Atom _ "#f" -> evalState $ ls { ast = false }
_ -> evalError ls "if expect #t or #f"
List pos [Atom _ "atom", a] -> do
a' <- evalState $ ls { ast = a }
case ast a' of
Atom _ _ -> pure $ ls { ast = Atom pos "#t" }
List _ _ -> pure $ ls { ast = Atom pos "#f" }
List _ ((Atom _ "atom"):_) ->
evalError ls "syntax error"
List _ [Atom pos "eq", a, b] -> do
a' <- evalState $ ls { ast = a }
b' <- evalState $ ls { ast = b }
if ast a' == ast b' then
pure $ ls { ast = Atom pos "#t" }
else
pure $ ls { ast = Atom pos "#f" }
List _ ((Atom _ "eq"):_) ->
evalError ls "syntax error"
List p1 [Atom p2 "cons", a, b] -> do
a' <- evalState $ ls { ast = a }
b' <- evalState $ ls { ast = b }
pure $ ls { ast = List p1 [Atom p2 "cons", ast a', ast b'] }
List p1 ((Atom p2 "cons"):_) ->
evalError ls "syntax error"
List _ [Atom _ "car", a] -> do
a' <- evalState $ ls { ast = a }
case ast a' of
List _ [Atom _ "cons", a'', b] -> pure $ ls { ast = a'' }
_ -> evalError ls "car expect cons"
List _ ((Atom _ "car"):_) ->
evalError ls "syntax error"
List _ [Atom _ "cdr", a] -> do
a' <- evalState $ ls { ast = a }
case ast a' of
List _ [Atom _ "cons", a'', b] -> pure $ ls { ast = b }
_ -> evalError ls "car expect cons"
List _ ((Atom _ "cdr"):_) ->
evalError ls "syntax error"
-- apply
List pos (lambda : args) -> do
lambda' <- evalState $ ls { ast = lambda }
case ast lambda' of
List _ [Atom _ "lam", List _ names, body] ->
if length names == length args then do
args' <- mapM (\a -> ast <$> (evalState $ ls { ast = a })) args
let env' = M.union (M.fromList $ zip (map (\(Atom _ n) -> n) names) args') $ env ls
evalState $ ls { ast = body, env = env' }
else
evalError ls $
"lam expect " ++ (show $ length names) ++ " unexpect " ++ (show $ length args)
_ -> evalError lambda' "apply expect lam"
Lisp
トップレベルのインターフェイス関数群
文字列から実行したり、ファイルから実行したり
module Lisp where
import Lisp.AST
import Lisp.Parser
import Lisp.Eval
import System.Directory
runLisp' :: String -> String -> IO ()
runLisp' fn s = do
case parse s of
Left err -> print err
Right ast -> do
ast' <- eval ast
print ast'
runLisp = runLisp' "runLisp"
runLispFile f = do
bool <- doesFileExist f
if bool then do
s <- readFile f
runLisp' f s
else
fail $ f ++ " does not exist"
比較
と言っても単純なベンチマークです
フィボナッチ数計算で比べます
まず今回の再帰ベースのlispから
(do
(def fib
(lam (n)
(if (eq n 0)
0
(if (eq n 1)
1
(+ (fib (- n 2)) (fib (- n 1)))))))
(fib 30))
❯ time stack exec lisp fib.lisp
832040
stack exec lisp example/fib.lisp 6.91s user 0.75s system 120% cpu 6.383 total
次に前回のSECDマシンのlisp
少々言語仕様が違いますが
(letrec
(fib
(lam n
(if (eq n 0)
0
(if (eq n 1)
1
(+ (fib (+ n -2)) (fib (+ n -1)))))))
(fib 30))
❯ time stack exec lisp fib.lisp
832040
stack exec lisp lisp/fib.lisp 4.80s user 0.71s system 128% cpu 4.276 total
ふむ
- 素朴再帰
6.91s user 0.75s system 120% cpu 6.383 total
- SECDマシン
4.80s user 0.71s system 128% cpu 4.276 total
意外と開きましたね
コンパイルのオーバーヘッドがあるSECDマシンが不利かと思ったんですがそうでも無いみたいですね
SECDマシンすごい
次回
3impのヒープモデルを実装予定(未定(多分やらない))