Rust
lisp

RustでLispインタープリター

More than 1 year has passed since last update.

純Lisp実装シリーズ第四回です

第一回 小さなLisp

第二回 SECDマシン

第三回 素朴なlispインタープリター

SECDマシンをRustで実装しその上でLispを動かします

記事中のコードが古くなってしまったのでレポジトリの方を参照してください

source code: kmtoki/secd-rs


純Lispとは、SECDマシンとは

wikipedia:純Lisp

wikipedia:SECDマシン

要約すると

純Lispとは最小かつ、最初のLisp

SECDマシンとは古い関数言語向けVM

詳しくはwikipediaへ


コード

全部載せると長ったらしくなるので適度に省略していきます

流れとしては

Source -> Parser -> Compiler -> VM -> Result

と言う感じです


data.rs

共用するstructやtypeなど

SECDマシンのSECDとは見ての通り

S: Stack

E: Env

C: Code

D: Dump

の略で、この4つのレジスタを使用し処理していくわけです

RcやRefCellと言う謎の呪文が見て取れます

他の言語では当たり前のことがRustでは、煩わしく扱わねばなりませんね

use std::fmt;

use std::rc::Rc;
use std::cell::RefCell;
use std::collections::HashMap;

#[derive(Debug, PartialEq)]
pub struct SECD {
pub stack: Stack,
pub env: Env,
pub code: Code,
pub dump: Dump
}

pub type Stack = RefCell<Vec<Rc<Lisp>>>;
pub type Code = RefCell<Vec<CodeOPInfo>>;
pub type Env = RefCell<HashMap<Rc<String>, Rc<Lisp>>>;
pub type Dump = RefCell<Vec<DumpOP>>;

pub type Info = [usize; 2];

#[derive(Debug, PartialEq)]
pub struct AST {
pub info: Info,
pub sexpr: SExpr
}

#[derive(Debug, PartialEq)]
pub enum SExpr {
Atom(Rc<String>),
Int(i32),
List(Rc<Vec<AST>>)
}

#[derive(Debug, Clone)]
pub struct CodeOPInfo {
pub info: Info,
pub op: CodeOP
}

#[derive(Debug, PartialEq, Clone)]
pub enum CodeOP {
LET(Rc<String>),
LD(Rc<String>),
LDC(Rc<Lisp>),
LDF(Rc<Vec<Rc<String>>>, Code),
SEL(Code,Code),
JOIN,
RET,
AP,
RAP,
ARGS(usize),
PUTS,
EQ,
ADD,
SUB,
CONS,
CAR,
CDR
}

#[derive(Debug, PartialEq)]
pub enum DumpOP {
DumpAP(Stack, Env, Code),
DumpSEL(Code)
}

#[derive(Debug, PartialEq)]
pub enum Lisp {
Nil,
False,
True,
Int(i32),
List(Rc<Vec<Rc<Lisp>>>),
Closure(Rc<Vec<Rc<String>>>, Code, Env),
Cons(Rc<Lisp>, Rc<Lisp>),
}


parser.rs

LexerとParser

ネストしたS式も再帰を使わず、二次元Vecをいじいじするだけで処理できるの良いです

use data::{Info, AST, SExpr};

use std::cell::Cell;
use std::rc::Rc;
use std::error::Error;

pub struct Parser {
src: String,
pos: Cell<usize>,
info: Cell<Info>
}

pub struct Token {
pub token: String,
pub kind: &'static str,
pub info: Info
}

type LexerResult = Result<Option<Token>, Box<Error>>;
type ParserResult = Result<AST, Box<Error>>;

fn is_id(c: char) -> bool {
"1234567890!#$%&-^=~|@`;:+*,./_<>?_qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM"
.find(c).is_some()
}

impl Parser {
pub fn new(s: &String) -> Parser {
return Parser {
src: s.clone(),
pos: Cell::new(0),
info: Cell::new([1; 2])
};
}

fn lex(&self, is_peek: bool) -> LexerResult {
let prev_pos = self.pos.clone();
let prev_info = self.info.clone();
let mut t = Ok(None);

while self.src.len() > self.pos.get() {
match self.src.as_bytes()[self.pos.get()] as char {
'(' => {
self.inc_width();
self.inc_pos();
t = Ok(Some(Token {
token: String::from("("),
kind: "(",
info: self.info.get()
}));
break;
}

')' => {
self.inc_width();
self.inc_pos();
t = Ok(Some(Token {
token: String::from(")"),
kind: ")",
info: self.info.get()
}));
break;
}

' ' => {
self.inc_width();
self.inc_pos();
}

'\n' => {
self.inc_line();
self.inc_pos();
}

c if c.is_numeric() => {
self.inc_width();
self.inc_pos();

let mut s = String::new();
s.push(c);

while self.src.len() > self.pos.get() {
let cc = self.src.as_bytes()[self.pos.get()] as char;
if cc.is_numeric() {
self.inc_width();
self.inc_pos();

s.push(cc);
}
else {
t = Ok(Some(Token {
token: s,
kind: "int",
info: self.info.get()
}));
break;
}
}

break;
}

c if is_id(c) => {
self.inc_width();
self.inc_pos();

let mut s = String::new();
s.push(c);

while self.src.len() > self.pos.get() {
let cc = self.src.as_bytes()[self.pos.get()] as char;
if is_id(cc) {
self.inc_width();
self.inc_pos();

s.push(cc);
}
else {
t = Ok(Some(Token {
token: s,
kind: "id",
info: self.info.get()
}));
break;
}
}

break;
}

c => {
t = Err(
From::from(
format!("lex unexpect token '{}' in {:?}", c, self.info.get())
)
);
break;
}
}
}

if is_peek {
self.pos.set(prev_pos.get());
self.info.set(prev_info.get());
}

return t;
}

pub fn parse(&self) -> ParserResult {
let mut ps = 0;
let mut list: Vec<Vec<AST>> = vec!(vec!());

loop {
match try!(self.next()) {
None => break,

Some(t) => {
match t.kind {
"id" => {
list.last_mut().unwrap().push(
AST { info: t.info, sexpr: SExpr::Atom(Rc::new(t.token)) }
);
}

"int" => {
list.last_mut().unwrap().push(
AST { info: t.info, sexpr: SExpr::Int(t.token.parse().unwrap()) }
)
}

"(" => {
list.push(vec!());
ps += 1;
}

")" => {
let node = list.pop().unwrap();
list.last_mut().unwrap().push(
AST {
info: t.info,
sexpr: SExpr::List(Rc::new(node))
}
);
ps -= 1;
}

_ => unimplemented!()
}

if ps < 0 {
return Err(From::from("many ')'".to_string()));
}
}
}
}

if ps > 0 {
return Err(From::from("many '('".to_string()));
}
else {
return Ok(list.pop().unwrap().pop().unwrap());
}
}
}


compiler.rs

letrec_id_listは関数適用をコンパイル時に

その変数がletrecによって束縛されたのかどうかを判断するために使います

letであればAP、letrecであればRAPと言うオペコードを生成します

use data::{AST, SExpr, Lisp, Code, CodeOPInfo, CodeOP};

use std::rc::Rc;
use std::cell::RefCell;
use std::error::Error;

pub struct Compiler {
pub code: Code,
letrec_id_list: RefCell<Vec<Rc<String>>>
}

type CompilerResult = Result<(), Box<Error>>;

impl Compiler {
pub fn new() -> Self {
return Compiler {
code: RefCell::new(vec!()),
letrec_id_list: RefCell::new(vec!()),
};
}

pub fn compile_(&self, ast: &AST) -> CompilerResult {
match ast.sexpr {
SExpr::Int(n) => {
return self.compile_int(ast, n);
}

SExpr::Atom(ref id) => {
return self.compile_atom(ast, id);
}

SExpr::List(ref ls) => {
if ls.len() == 0 {
return self.compile_nil(ast);
}
else {
match ls[0].sexpr {
SExpr::Int(_) => {
return self.error(&ls[0], "apply unexpect int");
}

SExpr::Atom(ref id) => {
match id.as_str() {
"lambda" => {
return self.compile_lambda(ls);
}

"let" => {
return self.compile_let(ls);
}

"letrec" => {
return self.compile_letrec(ls);
}

"puts" => {
return self.compile_puts(ls);
}

"if" => {
return self.compile_if(ls);
}

"eq" => {
return self.compile_eq(ls);
}

"+" => {
return self.compile_add(ls);
}

"-" => {
return self.compile_sub(ls);
}

"cons" => {
return self.compile_cons(ls);
}

"car" => {
return self.compile_car(ls);
}

"cdr" => {
return self.compile_cdr(ls);
}

_ => {
return self.compile_apply(ls);
}
}
}

SExpr::List(_) => {
return self.compile_apply(&ls);
}
}
}
}
}
}

fn compile_atom(&self, ast: &AST, id: &Rc<String>) -> CompilerResult {
match id.as_str() {
"nil" => {
self.code.borrow_mut().push(
CodeOPInfo {
info: ast.info,
op: CodeOP::LDC(Rc::new(Lisp::Nil))
}
);
}

"true" => {
self.code.borrow_mut().push(
CodeOPInfo {
info: ast.info,
op: CodeOP::LDC(Rc::new(Lisp::True))
}
);
}

"false" => {
self.code.borrow_mut().push(
CodeOPInfo {
info: ast.info,
op: CodeOP::LDC(Rc::new(Lisp::False))
}
);
}

_ => {
self.code.borrow_mut().push(
CodeOPInfo {
info: ast.info,
op: CodeOP::LD(id.clone())
}
);
}
}

return Ok(());
}

fn compile_lambda(&self, ls: &Rc<Vec<AST>>) -> CompilerResult {
if ls.len() != 3 {
return self.error(&ls[0], "lambda syntax");
}

let mut args: Vec<Rc<String>> = vec!();
match ls[1].sexpr {
SExpr::Atom(ref a) => {
args.push(a.clone());
}

SExpr::List(ref aa) => {
for ast in aa.iter() {
match ast.sexpr {
SExpr::Atom(ref a) => {
args.push(a.clone());
}

_ => {
return self.error(&ast, "lambda args");
}
}
}
}

_ => {
return self.error(&ls[1], "lambda args");
}
}

let mut body = Compiler::new();
body.letrec_id_list = self.letrec_id_list.clone();
try!(body.compile_(&ls[2]));
body.code.borrow_mut().push(
CodeOPInfo {
info: ls[0].info,
op: CodeOP::RET
}
);

self.code.borrow_mut().push(
CodeOPInfo {
info: ls[0].info,
op: CodeOP::LDF(Rc::new(args), body.code)
}
);

return Ok(());
}

fn compile_let(&self, ls: &Rc<Vec<AST>>) -> CompilerResult {
if ls.len() != 4 {
return self.error(&ls[0], "let syntax");
}

let id = match ls[1].sexpr {
SExpr::Atom(ref id) => id.clone(),
_ => return self.error(&ls[0], "let bind id sytax")
};

self.letrec_id_list.borrow_mut().retain(|a| *a != id);

try!(self.compile_(&ls[2]));
self.code.borrow_mut().push(
CodeOPInfo {
info: ls[0].info,
op: CodeOP::LET(id)
}
);

try!(self.compile_(&ls[3]));

return Ok(());
}

fn compile_letrec(&self, ls: &Rc<Vec<AST>>) -> CompilerResult {
if ls.len() != 4 {
return self.error(&ls[0], "let syntax");
}

let id = match ls[1].sexpr {
SExpr::Atom(ref id) => id.clone(),
_ => return self.error(&ls[0], "let bind id sytax")
};

self.letrec_id_list.borrow_mut().push(id.clone());

try!(self.compile_(&ls[2]));
self.code.borrow_mut().push(
CodeOPInfo {
info: ls[0].info,
op: CodeOP::LET(id)
}
);
try!(self.compile_(&ls[3]));

return Ok(());
}

fn compile_apply(&self, ls: &Rc<Vec<AST>>) -> CompilerResult {
let (lambda, args) = ls.split_first().unwrap();
for arg in args {
try!(self.compile_(arg));
}
self.code.borrow_mut().push(
CodeOPInfo {
info: ls[0].info,
op: CodeOP::ARGS(args.len())
}
);
try!(self.compile_(lambda));

match lambda.sexpr {
SExpr::Atom(ref id) => {
if self.letrec_id_list.borrow().iter().any(|a| a == id) {
self.code.borrow_mut().push(
CodeOPInfo {
info: ls[0].info,
op: CodeOP::RAP
}
);
}
else {
self.code.borrow_mut().push(
CodeOPInfo {
info: ls[0].info,
op: CodeOP::AP
}
);
}
}

_ => {
self.code.borrow_mut().push(
CodeOPInfo {
info: ls[0].info,
op: CodeOP::AP
}
);
}
}

return Ok(());
}

fn compile_if(&self, ls: &Rc<Vec<AST>>) -> CompilerResult {
if ls.len() != 4 {
return self.error(&ls[0], "if syntax");
}

try!(self.compile_(&ls[1]));

let mut tc = Compiler::new();
tc.letrec_id_list = self.letrec_id_list.clone();
try!(tc.compile_(&ls[2]));
tc.code.borrow_mut().push(
CodeOPInfo {
info: ls[2].info,
op: CodeOP::JOIN
}
);

let mut fc = Compiler::new();
fc.letrec_id_list = self.letrec_id_list.clone();
try!(fc.compile_(&ls[3]));
fc.code.borrow_mut().push(
CodeOPInfo {
info: ls[3].info,
op: CodeOP::JOIN
}
);

self.code.borrow_mut().push(
CodeOPInfo {
info: ls[0].info,
op: CodeOP::SEL(
tc.code,
fc.code
)
}
);

return Ok(());
}

fn compile_eq(&self, ls: &Rc<Vec<AST>>) -> CompilerResult {
if ls.len() != 3 {
return self.error(&ls[0], "eq syntax");
}

try!(self.compile_(&ls[1]));
try!(self.compile_(&ls[2]));
self.code.borrow_mut().push(
CodeOPInfo {
info: ls[0].info,
op: CodeOP::EQ
}
);

return Ok(());
}
}


vm.rs

SECDマシン本体

無駄なエラー処理が多いですが、自分で書いたコンパイラを信じきれていないのでこうなっています

基本的にはただのスタックマシンです

Dumpレジスタに関数適用時と分岐時にスタックやコードを退避させておいて

関数にはRET

分岐にはJOIN

がそれぞれの末尾にコンパイラによって仕込まれているので

またDumpから退避させておいた物を戻して、元の処理に戻ることができるわけですね

use data::*;

use std::rc::Rc;
use std::cell::RefCell;
use std::collections::HashMap;
use std::error::Error;

type VMResult = Result<(), Box<Error>>;

impl SECD {
pub fn new(c: Code) -> SECD {
return SECD {
stack: RefCell::new(vec!()),
env: RefCell::new(HashMap::new()),
code: c,
dump: RefCell::new(vec!()),
};
}

fn run_(&self) -> VMResult {
while self.code.borrow().len() > 0 {
let ref c = self.code.borrow_mut().remove(0);
match c.op {
CodeOP::LET(ref id) => {
try!(self.run_let(c, id));
}

CodeOP::LD(ref id) => {
try!(self.run_ld(c, id));
}

CodeOP::LDC(ref lisp) => {
try!(self.run_ldc(c, lisp));
}

CodeOP::LDF(ref names, ref code) => {
try!(self.run_ldf(c, names, code));
}

CodeOP::RET => {
try!(self.run_ret(c));
}

CodeOP::AP => {
try!(self.run_ap(c));
}

CodeOP::RAP => {
try!(self.run_rap(c));
}

CodeOP::ARGS(n) => {
try!(self.run_args(c, n));
}

CodeOP::PUTS => {
try!(self.run_puts(c));
}

CodeOP::SEL(ref t, ref f) => {
try!(self.run_sel(c, t, f));
}

CodeOP::JOIN => {
try!(self.run_join(c));
}

CodeOP::EQ => {
try!(self.run_eq(c));
}

CodeOP::ADD => {
try!(self.run_add(c));
}

CodeOP::SUB => {
try!(self.run_sub(c));
}

CodeOP::CONS => {
try!(self.run_cons(c));
}

CodeOP::CAR => {
try!(self.run_car(c));
}

CodeOP::CDR => {
try!(self.run_cdr(c));
}
}
}

return Ok(());
}

fn run_let(&self, c: &CodeOPInfo, id: &Rc<String>) -> VMResult {
if let Some(expr) = self.stack.borrow_mut().pop() {
self.env.borrow_mut().insert(id.clone(), expr);
return Ok(());
}
else {
return self.error(c, "LET: stack is empty");
}
}

fn run_ld(&self, c: &CodeOPInfo, id: &Rc<String>) -> VMResult {
if let Some(expr) = self.env.borrow().get(id) {
self.stack.borrow_mut().push(expr.clone());
return Ok(());
}
else {
return self.error(c, format!("LD: found {}", id).as_str());
}
}

fn run_ldc(&self, _: &CodeOPInfo, lisp: &Rc<Lisp>) -> VMResult {
self.stack.borrow_mut().push(lisp.clone());
return Ok(());
}

fn run_ldf(&self, _: &CodeOPInfo, names: &Rc<Vec<Rc<String>>>, code: &Code)
-> VMResult {
self.stack.borrow_mut().push(
Rc::new(
Lisp::Closure(
names.clone(),
code.clone(),
self.env.clone()
)
)
);
return Ok(());
}

fn run_ap(&self, c: &CodeOPInfo) -> VMResult {
let s = self.stack.borrow_mut().pop();
if let Some(closure) = s {
if let Lisp::Closure(ref names, ref code, ref env) = *closure {
let s = self.stack.borrow_mut().pop();
if let Some(list) = s {
if let Lisp::List(ref vals) = *list {
let env = env.clone();
for i in 0 .. names.len() {
env.borrow_mut().insert(names[i].clone(), vals[i].clone());
}

self.dump.borrow_mut().push(
DumpOP::DumpAP(
self.stack.clone(),
self.env.clone(),
self.code.clone()
)
);

*self.stack.borrow_mut() = vec!();
*self.env.borrow_mut() = env.clone().into_inner();
*self.code.borrow_mut() = code.clone().into_inner();

return Ok(());
}
else {
return self.error(c, "AP: expected List");
}
}
else {
return self.error(c, "AP: stack is empty");
}
}
else {
return self.error(c, "AP: expected Closure");
}
}
else {
return self.error(c, "AP: stack is empty");
}
}

fn run_rap(&self, c: &CodeOPInfo) -> VMResult {
let s = self.stack.borrow_mut().pop();
if let Some(closure) = s {
if let Lisp::Closure(ref names, ref code, ref env) = *closure {
let ss = self.stack.borrow_mut().pop();
if let Some(list) = ss {
if let Lisp::List(ref vals) = *list {
let env = env.clone();
for i in 0 .. names.len() {
env.borrow_mut().insert(names[i].clone(), vals[i].clone());
}

self.dump.borrow_mut().push(
DumpOP::DumpAP(
self.stack.clone(),
self.env.clone(),
self.code.clone(),
)
);

*self.stack.borrow_mut() = vec!();
self.env.borrow_mut().extend(env.into_inner());
*self.code.borrow_mut() = code.clone().into_inner();

return Ok(());
}
else {
return self.error(c, "RAP: expected List");
}
}
else {
return self.error(c, "RAP: stack is empty");
}
}
else {
return self.error(c, "RAP: expected Closure");
}
}
else {
return self.error(c, "RAP: stack is empty");
}
}

fn run_ret(&self, c: &CodeOPInfo) -> VMResult {
let s = self.stack.borrow_mut().pop();
if let Some(val) = s {
if let Some(DumpOP::DumpAP(ref stack, ref env, ref code)) = self.dump.borrow_mut().pop() {

*self.stack.borrow_mut() = stack.clone().into_inner();
*self.env.borrow_mut() = env.clone().into_inner();
*self.code.borrow_mut() = code.clone().into_inner();

self.stack.borrow_mut().push(val.clone());

return Ok(());
}
else {
return self.error(c, "RET: dump is empty");
}
}
else {
return self.error(c, "RET: stack is empty");
}
}

fn run_args(&self, c: &CodeOPInfo, n: usize) -> VMResult {
let mut ls = vec!();
for _ in 0 .. n {
match self.stack.borrow_mut().pop() {
None => {
return self.error(c, &format!("ARGS: {}", n));
}

Some(a) => {
ls.insert(0, a);
}
}
}

self.stack.borrow_mut().push(Rc::new(Lisp::List(Rc::new(ls))));
return Ok(());
}

fn run_puts(&self, c: &CodeOPInfo) -> VMResult {
match self.stack.borrow().last() {
None => {
return self.error(c, "PUTS: expected args");
}

Some(a) => {
println!("{}", **a);
return Ok(());
}
}
}

fn run_sel(&self, c: &CodeOPInfo, t: &Code, f: &Code) -> VMResult {
let s = self.stack.borrow_mut().pop();
if let Some(b) = s {
let code = match *b {
Lisp::True => t,
Lisp::False => f,
_ => return self.error(c, "SEL: expected bool")
};

self.dump.borrow_mut().push(
DumpOP::DumpSEL(
self.code.clone(),
)
);

*self.code.borrow_mut() = code.clone().into_inner();

return Ok(());
}
else {
return self.error(c, "SEL: stack is empty");
}
}

fn run_join(&self, c: &CodeOPInfo) -> VMResult {
let d = self.dump.borrow_mut().pop();
if let Some(dump) = d {
if let DumpOP::DumpSEL(ref code) = dump {
*self.code.borrow_mut() = code.clone().into_inner();

return Ok(());
}
else {
return self.error(c, "JOIN: expected DumpSEL");
}
}
else {
return self.error(c, "JOIN: dump is empty");
}
}
}


果たして動くのか

(動きます)

単純なフィボナッチ数計算でもやって見ましょう

(letrec 

fib
(lambda n
(if (eq n 0)
0
(if (eq n 1)
1
(+ (fib (- n 2)) (fib (- n 1))))))
(fib 30))

以下追記参照

❯ time cargo run example/fib.lisp

Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
Running `target/debug/secd example/fib.lisp`
832040
cargo run example/fib.lisp 198.73s user 1.17s system 99% cpu 3:21.44 total

え?嘘でしょ?

3分半??


結論

遅い、マジで遅い

実装の難しさの割に合わない

簡単なベンチマークでHaskell版と比べ約40倍くらい遅い

なんでやんねん!!

RefCellの多用、無駄clone

慢心、環境の違い

ここまで遅いとただのバグな気もする

どなたかご教示ください


追記

Tamamuさんのコメントを元に修正、ありがとうございます

桁違いに早くなりましたがhaskell版には及ばず

❯ time cargo run ../example/fib.lisp --release

Finished release [optimized] target(s) in 0.0 secs
Running `/Users/tokiya/Documents/rust/secd/target/release/secd ../example/fib.lisp`
832040
cargo run ../example/fib.lisp --release 12.99s user 0.06s system 99% cpu 13.081 total

❯ time stack exec lisp lisp/fib.lisp

832040
stack exec lisp lisp/fib.lisp 4.67s user 0.76s system 131% cpu 4.134 total


追々記

blackenedgoldさん、わざわざコード見ていただき恐縮です

愚直にコメント通りにして見ました

があまり変わりないですね

❯ time cargo run example/fib.lisp --release

Finished release [optimized] target(s) in 0.0 secs
Running `target/release/secd example/fib.lisp`
832040
cargo run example/fib.lisp --release 11.94s user 0.05s system 99% cpu 12.018 total


追々々記

blackenedgoldさん、拙いコードに手を入れていただいてありがとうございます

素晴らしいです

大変参考になりました、本当にありがとうございます!