Rustで簡単なLLVMコンパイラ
Rustを使ってLLVMなコンパイラを書いてみました。Haskellの型システムが分かりやすく取り込んであってよいです。メモリ管理がめんどくさいのですが、コンパイラに怒られながら、怒られなくなるまで頑張れば、だいたいちゃんと動くようです。
C言語の層で、リファレンスカウンタ方式のメモリ管理をしながら、安全なプログラムを書く事が出来ます。オブジェクト指向ではないけど、コンパイラ書くのに元々オブジェクト指向はなくても書く技法は確立しているので、サクサク書けます。正規表現ライブラリが標準ではないので、お手軽にパーサを書く事は出来ないかもしれませんけど。
Rustは仕様がゴリゴリ書き換えられているらしいのですが、現状どの程度変わっているかは分かりません。
Rustで一番悩む所は、おそらく、&から~への変換です。
~は自分がオーナーだぜ。俺の物だぜ。って言う意味で、&は借り物なので私の物ではありません。
ってことなのですが、借り物である&から俺の物である~へ変換したいけどどーしたらいいんだー!
って困ります。
このプログラムはRust 0.7で書きました。
どうも、copyは消えて行く運命らしく、clone()やdeep_clone()を使うようになって行くようですが、tupleのcopyはまだどうしても必要っぽいです。
fn main() {
use ast::*;
let ast = ~EBlock(~Tv, ~[
~EPrint(~Ti(32), ~ELdc(~Ti(32), 11)),
~EPrint(~Ti(32),
~EAdd(~Ti(32), ~ELdc(~Ti(32), 11), ~ELdc(~Ti(32), 22)))
]);
println("ast=" + ast.to_str());
let ll = kNormal(ast);
println("ll=" + ll.to_str());
emit("e.ll", ll);
println(exec("llc e.ll -o e.s").to_str());
println(exec("llvm-gcc -m64 e.s -o e").to_str());
println(exec("./e").to_str());
}
pub mod ast {
pub enum E {
ELdc(~T, int),
EBin(~T, ~str, ~E, ~E),
EPrint(~T, ~E),
EBlock(~T, ~[~E]),
}
pub fn EAdd(t: ~T, a: ~E, b: ~E) -> E {
EBin(t, ~"add", a, b)
}
pub fn EMul(t: ~T, a: ~E, b: ~E) -> E {
EBin(t, ~"mul", a, b)
}
pub enum T {
Ti(int),
Tv,
TFun(~T, ~[~T]),
}
pub enum R {
pub RG(~T, ~str),
pub RL(~T, ~str),
pub RR(~T, ~str),
pub RN(~T, ~str),
}
pub enum LL {
LLCall(Option<~R>, ~R, ~[~R]),
LLBin(Option<~R>, ~str, ~R, ~R),
}
impl ToStr for ~E {
fn to_str(&self) -> ~str {
match *self {
~ELdc(ref t, i) => "ELdc("+t.to_str()+", "+i.to_str()+")",
~EBin(ref t, ref op, ref a, ref b) =>
"EBin("+t.to_str()+", "+*op+", "+a.to_str()+", "+b.to_str()+")",
~EPrint(ref t, ref e) =>
"EPrint("+t.to_str()+", "+e.to_str()+")",
~EBlock(ref t, ref ls) =>
"EBlock("+t.to_str()+", "+ls.to_str()+")"
}
}
}
impl ToStr for ~T {
fn to_str(&self) -> ~str {
match *self {
~Ti(i) => "Ti("+i.to_str()+")",
~Tv => ~"Tv",
~TFun(ref t, ref ls) =>
"TFun("+t.to_str()+","+ls.to_str()+")",
}
}
}
impl ToStr for T {
fn to_str(&self) -> ~str {
let t:~T = ~(copy *self);
t.to_str()
}
}
impl Eq for T {
pub fn eq(&self, t:&T) -> bool {
self.to_str() == t.to_str()
}
pub fn ne(&self, t:&T) -> bool {
!(self == t)
}
}
impl R {
pub fn t(&self) -> ~T {
match *self {
RG(ref t, _) => copy *t,
RL(ref t, _) => copy *t,
RR(ref t, _) => copy *t,
RN(ref t, _) => copy *t,
}
}
}
impl ToStr for ~R {
fn to_str(&self) -> ~str {
match *self {
~RG(ref t, ref s) => "RG("+t.to_str()+","+*s+")",
~RL(ref t, ref s) => "RL("+t.to_str()+","+*s+")",
~RR(ref t, ref s) => "RR("+t.to_str()+","+*s+")",
~RN(ref t, ref s) => "RN("+t.to_str()+","+*s+")",
}
}
}
impl ToStr for Option<~R> {
fn to_str(&self) -> ~str {
match *self {
Some(ref r) => "Some("+r.to_str()+")",
None => ~"None",
}
}
}
impl ToStr for ~LL {
fn to_str(&self) -> ~str {
match *self {
~LLCall(ref id, ref name, ref prms) =>
"LLCall("+id.to_str()+","+name.to_str()+","+prms.to_str()+")",
~LLBin(ref id, ref op, ref a, ref b) =>
"LLBin("+id.to_str()+","+op.to_str()+","+a.to_str()+","+b.to_str()+")",
}
}
}
pub trait P {
fn p(&self) -> ~str;
}
impl P for T {
pub fn p(&self) -> ~str {
match *self {
Ti(ref i) => "i" + i.to_str(),
Tv => ~"void",
TFun(ref t, ref ls) => {
let mut str = ~"";
for ls.iter().advance |t| {
match str {
~"" => {str = ""+ t.p();}
_ => {str = str + ", " + t.p();}
}
}
t.p() + "(" + str + ")*"
}
}
}
}
impl P for R {
pub fn p(&self) -> ~str {
match *self {
RG(_,ref id) => "@" + *id,
RL(_,ref id) => "%" + *id,
RR(_,ref id) => "%." + *id,
RN(_,ref id) => "" + *id,
}
}
}
}
mod interpreter {
use ast::*;
fn eval(e:&E)->int {
match e {
&ELdc(_, i) => i,
&EBin(_, ~"add", ref a, ref b) => eval(*a) + eval(*b),
&EBin(_, ref op, _, _) => fail!("operator "+*op),
&EPrint(_, ref e) => {
let e = eval(*e);
println(e.to_str());
e
}
&EBlock(_, ref ls) => {
fn f(ls:&[~E],r:int)-> int {
match ls {
[] => r,
[ref a, ..rest] => f(rest,eval(*a))
}
}
f(*ls, 0)
}
}
}
}
mod kNormal {
use ast::*;
use std::vec;
fn gid(t:~T)-> ~R {
~RR(t,~"")
}
static mut ls:Option<~[~LL]> = None;
fn add(l:~LL) {
unsafe {
ls = Some(vec::append_one(ls.unwrap(), l));
}
}
fn f(a: &E)-> ~R {
match a {
&EBin(ref t,ref op, ref a1, ref b1) => {
let a = f(*a1);
let b = f(*b1);
let id = gid(copy *t);
if (*t != a.t() || *t != b.t()) {fail!("type mismatch " + t.to_str());}
add(~LLBin(Some(copy id), copy *op, a, b));
id
}
&ELdc(ref t, ref i) => ~RN(copy *t, i.to_str()),
&EPrint(ref t, ref a1) => {
let a = f(*a1);
if (*t != a.t()) {fail!("type mismatch t=" + t.to_str() + " ta=" + a.t().to_str())}
add(~LLCall(None, ~RG(~TFun(~Tv, ~[copy *t]), "print_" + t.p()), ~[copy a]));
a
}
&EBlock(_,ref ls) => {
fn f2(ls:&[~E],r:&R)-> ~R {
match ls {
[] => ~(copy *r),
[ref e, ..rest] => {
f2(rest,f(*e))
}
}
}
f2(*ls, ~RN(~Tv,~""))
}
}
}
pub fn apply(e: &E) -> ~[~LL] {
unsafe {
ls = Some(~[]);
f(e);
ls.unwrap()
}
}
}
fn kNormal(a: &ast::E) -> ~[~ast::LL] {
kNormal::apply(a)
}
pub fn join(ls:&[~str],sep:&str) -> ~str {
let mut s:~str = ~"";
for ls.iter().advance |t| {
match s {
~"" => {s = ""+ (*t);}
_ => {s = s + sep + (*t);}
}
}
s
}
/*
pub fn join<T:ToStr>(ls:&[~T],sep:&str) -> ~str {
let mut s:~str = ~"";
for ls.iter().advance |t| {
match s {
~"" => {s = ""+ (*t).to_str();}
_ => {s = s + sep + (*t).to_str();}
}
}
s
}
*/
pub mod emit {
use ast::*;
use asm;
use join;
fn o(id: &Option<~R>, out: &str) {
match id {
&Some(ref id) =>asm::__(id.p() + " = " + out),
&None => asm::__(out),
}
}
fn f(l: &LL) {
match l {
&LLCall(ref id, ref op, ref prms) => {
let ps:~[~str] = do prms.map |a|{a.t().p() + " " + a.p() };
o(id, "call " + op.t().p() + " " + op.p() + "(" + join(ps,", ") + ") nounwind")
}
&LLBin(ref id, ref op, ref a, ref b) => {
let id2:Option<~R> = copy *id;
o(id, *op + " " + id2.unwrap().t().p() + " " + a.p() + ", " + b.p())
}
}
}
pub fn apply(file: &str, ls: &[~LL]) {
asm::open(file);
asm("@.str = private constant [4 x i8] c\"%d\\0A\\00\"");
asm("define void @print_i32(i32 %a) nounwind ssp {");
asm("entry:");
asm::__("call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %a) nounwind");
asm::__("ret void");
asm("}");
asm("define void @print_i8(i8 %a) nounwind ssp {");
asm("entry:");
asm::__("call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8 %a) nounwind");
asm::__("ret void");
asm("}");
asm("declare i32 @printf(i8*, ...) nounwind");
asm("define i32 @main() nounwind ssp {");
asm("entry:");
for ls.iter().advance |l| {
f(*l);
}
asm::__("ret i32 0");
asm("}");
asm::close();
}
}
pub fn emit(file: &str, ls: &[~ast::LL]) {
emit::apply(file, ls);
}
pub fn genid(s:&str) -> ~str {
static mut id:int = 0;
unsafe {
id += 1;
s + id.to_str()
}
}
fn readAll(file:&str) -> ~str {
use std::io;
use std::result;
let reader = result::get(&io::file_reader(&Path(file)));
let mut s = ~"";
while !reader.eof() {
s = s + reader.read_line() + "\n";
}
s
}
fn writeAll(file:&str, s:&str) {
use std::io;
use std::result;
let writer = result::get(&io::buffered_file_writer(&Path(file)));
writer.write_str(s);
}
pub mod asm {
use std::io;
use std::result;
pub static mut writer: Option<@Writer> = None;
pub fn open(file:&str) {
unsafe {
writer = Some(result::get(&io::buffered_file_writer(&Path(file))));
}
}
pub fn println(s:&str) {
unsafe {
writer.unwrap().write_str(s + "\n");
}
}
pub fn close() {
unsafe {
writer = None;
}
}
pub fn __(s:&str) {
println(" " + s);
}
}
pub fn asm(s:&str) {
asm::println(s);
}
pub struct ExecOutput {
status: int,
output: ~str,
error: ~str,
}
impl ToStr for ExecOutput {
fn to_str(&self) -> ~str {
"(" + self.status.to_str()+","+self.output+","+self.error+")"
}
}
fn exec(cmd:&str) -> ExecOutput {
use std::str;
use std::run;
let mut cmds:~[&str] = cmd.split_str_iter(" ").collect();
let prog:~str = ""+cmds.shift();
let args:&[~str] = (do cmds.map |&s| {(s+"")});
let o = run::process_output(prog,args);
unsafe {
ExecOutput {
status : o.status,
output : str::raw::from_bytes(o.output),
error : str::raw::from_bytes(o.error),
}
}
}