231
社区成员
作业内容描述:
encode
函数。encode
函数任务1、3、4的代码实现见Github。
PPT中给出了encode
函数的主体框架。实际使用ReScript实现时需要进行一些细节上的改动。
注意Label(string)
指令是只有汇编器能够识别的伪指令,它对应的实际指令长度为0。
encode
函数将汇编指令处理了两遍。第一遍用于构建Label的名称到地址的映射表,同时计算最终生成的二进制代码的总长度。第二遍将汇编指令翻译为二进制代码,汇编指令中的Label名称都会使用第一遍处理得到的映射表转换为地址。
PPT中label映射表使用的是简单的HashMap,映射表的键类型为string,所以更适合使用Belt.HashMap.String
,这种特殊的HashMap不再需要我们手动设计string到int的Hash函数。
PPT中使用的Int32Array
,我在Belt库中也找到了类似的实现,但是该实现的make
函数比较难用,而且帮助文档也没有说清楚使用方法。作为替代,我使用Belt.Array
实现,该实现的make
函数需要指定Array的长度。第一遍处理结束后的position
变量可以作为Array初始化的总长度。
encode
函数代码如下:
let size_of_instr = (instr: instr) : int => switch instr {
| Cst(_) => 2
| Var(_) => 2
| Label(_) => 0
| Goto(_) => 2
| IfZero(_) => 2
| Call(_,_) => 3
| Ret(_) => 2
| _ => 1
}
let encode = (instrs: array<instr>) : array<int> => {
let position = ref(0)
let label_map = Belt.HashMap.String.make(~hintSize=10)
for cur in 0 to Belt.Array.length(instrs) -1 {
// construct label map
switch instrs[cur] {
| Label(l) => Belt.HashMap.String.set(label_map,l,position.contents)
| instr => position := position.contents + size_of_instr(instr)
}
// record the PC per earch label
}
let int_code:array<int> = Belt.Array.make(position.contents,0)
position := 0
for cur in 0 to Belt.Array.length(instrs) - 1 {
// translate to int_code
switch instrs[cur] {
| Cst(i) => {
int_code[position.contents] = 0
int_code[position.contents+1] = i
}
| Add => {
int_code[position.contents] = 1
}
| Mul => {
int_code[position.contents] = 2
}
| Var(i) => {
int_code[position.contents] = 3
int_code[position.contents+1] = i
}
| Pop => {
int_code[position.contents] = 4
}
| Swap => {
int_code[position.contents] = 5
}
| Call(l, n) => {
let label_addr = switch Belt.HashMap.String.get(label_map, l) {
| Some (addr) => addr
| _ => assert false
}
int_code[position.contents] = 6
int_code[position.contents+1] = label_addr
int_code[position.contents+2] = n
}
| Ret(n) => {
int_code[position.contents] = 7
int_code[position.contents+1] = n
}
| IfZero(l) => {
let label_addr = switch Belt.HashMap.String.get(label_map, l) {
| Some (addr) => addr
| _ => assert false
}
int_code[position.contents] = 8
int_code[position.contents+1] = label_addr
}
| Goto(l) => {
let label_addr = switch Belt.HashMap.String.get(label_map, l) {
| Some (addr) => addr
| _ => assert false
}
int_code[position.contents] = 9
int_code[position.contents+1] = label_addr
}
| Exit => {
int_code[position.contents] = 10
}
| Leq => {
int_code[position.contents] = 11
}
| Label(_) => {
assert true
}
}
position := position.contents + size_of_instr(instrs[cur])
}
int_code
}
我使用C++对虚拟机进行了简单的实现。任务2代码地址见GitHub。实现的思想和代码的主体框架基本与PPT中的一致。区别在于我的实现中SP
栈顶指针总是指向栈顶的有效值,程序初始化时初始化为-1
。
为了辅助任务3,测试斐波那契函数,在原有的指令基础上,增加了LEQ
指令,用来比较栈顶两数的大小。
堆栈机的driver.cpp
中预设了部分测试用例,如果测试全部通过,会提示测试通过。
编译器的整体实现分为预处理、函数编译、表达式编译三个大部分。
本节引入的TinyLanguage4新增了表达式类型Letfn
,声明并定义一个函数,表达式可以通过引用该函数名调用该函数。函数名的引入也使得函数能够递归地进行定义。
辅助函数remove_function
的实现,主要是递归遍历所有子表达式,将所有Letfn
类型的表达式的函数名、参数列表、函数体的内容都去掉。
辅助函数collect_function
的实现,主要是递归遍历所有子表达式,将所有Letfn
类型的表达式的函数名、参数列表、函数体的内容都提取出来。注意Letfn
类型的表达式的函数体部分仍然有可能包含嵌套的Letfn
类型表达式,所以在提取时需要使用remove_function
函数进行处理;Letfn
类型的表达式提取完成后,还需要递归地从函数体和函数作用域生效表达式中提取子表达式中的Letfn
类型表达式。
预处理部分的代码如下:
let rec remove_funs = (expr: expr): Flat.expr => switch expr {
| Cst(i) => Flat.Cst(i)
| Var(s) => Flat.Var(s)
| Let(name, def, body) => Flat.Let(name, remove_funs(def), remove_funs(body))
| Letfn(_, _, _, body) => remove_funs(body)
| App(fname, args) => Flat.App(fname, args->Belt.List.map(remove_funs))
| Prim(p, args) => Flat.Prim(p, args->Belt.List.map(remove_funs))
| If(cond, bTrue, bFalse) => Flat.If(remove_funs(cond), remove_funs(bTrue), remove_funs(bFalse))
}
let rec collect_funs = (expr: expr): list<fun> => switch expr {
| Cst(_) => list{}
| Var(_) => list{}
| Let(_, def, body) => Belt.List.concatMany([
collect_funs(def),
collect_funs(body)
])
| Letfn(fname, args, def, app) => Belt.List.concatMany([
list{(fname, args, remove_funs(def))},
collect_funs(def),
collect_funs(app)
])
| App(_, args) => Belt.List.concatMany(args->Belt.List.map(collect_funs)->Belt.List.toArray)
| Prim(_, args) => Belt.List.concatMany(args->Belt.List.map(collect_funs)->Belt.List.toArray)
| If(cond, bTrue, bFalse) => Belt.List.concatMany([
collect_funs(cond),
collect_funs(bTrue),
collect_funs(bFalse)
])
}
// Preprocessing
let preprocess = (expr: expr): list<fun> => {
let main = ("main", list{}, remove_funs(expr))
let rest = collect_funs(expr)
list{ main, ...rest }
}
经过预处理后,得到一个函数列表,其中每个函数都有自己的函数名、参数列表、函数体。编译的主表达式命名为main
函数。这一部分的代码在PPT中已经给出。
关于对main
函数添加参数的问题,需要修改main
函数的调用语句和参数列表。还需要执行环境在调用main
函数前向栈中压入参数。
// compile functions
let compile_fun = ((name, args, body): fun): list<instr> => {
let n = Belt.List.length(args)
let env = Belt.List.reverse(args->Belt.List.map((a) => Local(a)))
Belt.List.concatMany([
list{Assembler.StackMachine.Label(name)},
compile_expr(env, body),
list{Assembler.StackMachine.Ret(n)}
])
}
// compile the whole program
let compile = (expr: expr) : list<instr> => {
let funs = preprocess(expr)
let funs_code = Belt.List.concatMany(funs->Belt.List.map(compile_fun)->Belt.List.toArray)
list{
Call("main",0),
Exit,
...funs_code
}
}
如上所示,编译If
表达式时需要对每个表达式生成两个Label,分别用于标记假条件分支开始位置和If
表达式结束位置。使用变量记录生成Label的序号,每个Label以.L
开头,后加Label序号,保证Label不会冲突。
代码如下所示:
type var = Local(string) | Temp // Params and locals are treated uniformly
type env = list<var>
let get_index = (env: env, ele: string) : option<int> => {
let rec index_getter = (env: env, ele: string, level: int) : option<int> => switch env {
| list{} => None
| list{a, ..._} if a == Local(ele) => Some(level)
| list{_, ...b} => index_getter(b, ele, level+1)
}
index_getter(env, ele, 0)
}
let label_counter = ref(0)
let get_new_label = () : string => {
label_counter.contents = label_counter.contents + 1
".L" ++ Js.Int.toString(label_counter.contents)
}
// compile expression under a compile-time environment
let rec compile_expr = (env:env, expr: Flat.expr) : list<instr> => switch expr {
| Cst(i) => list{Assembler.StackMachine.Cst(i)}
| Var(x) => switch get_index(env, x) {
| Some(i) => list{Assembler.StackMachine.Var(i)}
| _ => assert false
}
| Let(x, def, app) => Belt.List.concatMany([
compile_expr(env, def),
compile_expr(list{Local(x), ...env}, app),
list{Assembler.StackMachine.Swap, Assembler.StackMachine.Pop}
])
| Prim(p, args) => {
let primary = switch p {
| Add => Assembler.StackMachine.Add
| Mul => Assembler.StackMachine.Mul
| Leq => Assembler.StackMachine.Leq
| _ => assert false
}
let args_code = compile_exprs(env, args)
Belt.List.concatMany([
args_code,
list{primary}
])
}
| App(fn, args) => {
let n = Belt.List.length(args)
let args_code = compile_exprs(env, args)
Belt.List.concatMany([
args_code,
list{Assembler.StackMachine.Call(fn, n)}
])
}
| If(cond, bTrue, bFalse) => {
let bFalse_label = get_new_label()
let bEnd_label = get_new_label()
Belt.List.concatMany([
compile_expr(env, cond),
list{Assembler.StackMachine.IfZero(bFalse_label)},
compile_expr(env, bTrue),
list{Assembler.StackMachine.Goto(bEnd_label)},
list{Assembler.StackMachine.Label(bFalse_label)},
compile_expr(env, bFalse),
list{Assembler.StackMachine.Label(bEnd_label)}
])
}
}
and compile_exprs = (env: env, exprs: list<Flat.expr>) : list<instr> => switch exprs {
| list{} => list{}
| list{expr, ...res} => Belt.List.concatMany([
compile_expr(env, expr),
compile_exprs(list{Temp, ...env}, res)
])
}
实现参考lecture 2第一部分的TinyLanguage3的解释器。
Letfn
类型表达式实际上声明、定义了一个函数,并给出了函数作用域生效的表达式。为了支持递归,需要在函数作用域生效的表达式的求值环境中增加从函数名到函数闭包的映射。这是与lecture 2 TinyLanguage3 中的匿名函数表达式类型fn
不同的地方。
通过实现解释器,可以发现TinyLanguage4 其实无法实现两函数交替调用的递归,但如果我们将交替调用的函数使用汇编指令实现,则可以在汇编层面上实现交替调用的递归。也许我们通过编译,将程序语义从表达能力较弱的语言中转换到了表达能力较强的语言中。
type rec value =
| Vint (int)
| Vclosure (env, list<string>, expr)
and env = list<(string, value)>
let vadd = (v1, v2) : value => switch (v1,v2) {
| (Vint(i), Vint(j)) => Vint(i+j)
| _ => assert false
}
let vmul = (v1, v2) : value => switch (v1,v2) {
| (Vint(i), Vint(j)) => Vint(i*j)
| _ => assert false
}
let vleq = (v1, v2) : value => switch (v1,v2) {
| (Vint(i), Vint(j)) => Vint(i <= j ? 1 : 0)
| _ => assert false
}
let eval = (expr: expr): int => {
let rec evalHelper = (expr:expr, env: env) : value => switch expr {
| Cst(i) => Vint(i)
| Prim(p, es) => {
let v1 = evalHelper(es->Belt.List.getExn(0),env)
let v2 = evalHelper(es->Belt.List.getExn(1),env)
switch p {
| Add => vadd(v1,v2)
| Mul => vmul(v1,v2)
| Leq => vleq(v1,v2)
| _ => assert false
}
}
| Var(x) => switch Belt.List.getAssoc(env, x, (a,b)=>a==b) {
| Some(v) => v
| _ => assert false
}
| Let(vname, def, app) => evalHelper(app, list{(vname,evalHelper(def, env)), ...env})
| Letfn(fn, args, body, app) => {
let closure = Vclosure(env, args, body)
evalHelper(app, list{(fn, closure), ...env})
}
| App(fn, args) => {
let Vclosure(env_closure, args_closure, body) = switch Belt.List.getAssoc(env, fn, (a,b)=>a==b) {
| Some(v) => v
| _ => assert false
}
let params = args->Belt.List.map((e)=> evalHelper(e, env))
let fun_env = Belt.List.concatMany([Belt.List.zip(args_closure, params), env])
evalHelper(body, fun_env)
}
| If(cond, bTrue, bFalse) => {
let Vint(c) = evalHelper(cond, env)
if c == 0 {
evalHelper(bFalse, env)
}
else {
evalHelper(bTrue, env)
}
}
}
let Vint(retval) = evalHelper(expr, list{})
retval
}
测试代码如下,主要对PPT中的两个函数的例子(cube
函数和fib
函数)进行测试。
module Test = {
module Comp = Compiler.Compile
module Lang = Compiler.TinyLang4
module Asm = Assembler.StackMachine
let testCases = {
open Lang
list{
Let("a", Cst(2),
Letfn("cube", list{"x"},
Letfn("square", list{"x"},
Prim(Mul,list{Var("x"),Var("x")}),
Prim(Mul,list{App("square",list{Var("x")}),Var("x")})),
App("cube",list{Var("a")})))
,
Letfn("fib", list{"n"},
If(Prim(Leq, list{Var("n"), Cst(1)}),
Cst(1),
Prim(Add, list{
App("fib", list{Prim(Add, list{Var("n"), Cst(-1)})}),
App("fib", list{Prim(Add, list{Var("n"), Cst(-2)})})
})),
App("fib",list{Cst(5)}))
}
}
let test = {
let unitTest = (expr: Lang.expr) => {
Js.log("---")
Js.log(Lang.toString(expr))
let evaluated = Lang.eval(expr)
Js.log("Evaluated to : " ++ Js.Int.toString(evaluated))
let asm = Comp.compile(expr)->Belt.List.toArray
Js.log("Compiles to : ")
Js.log(Asm.toString(asm))
let byte = Asm.encode(asm)
Js.log("Assembles to stack machine code: ")
Js.log(byte)
}
testCases->Belt.List.forEach(unitTest)
}
}
测试输出如下:
---
let a = {2} in {letfn cube(x) = {letfn square(x) = {x * x} in {square(x) * x}} in {cube(a)}}
Evaluated to : 8
Compiles to :
[Call(main,0);Exit;Label(main);Cst(2);Var(0);Call(cube,1);Swap;Pop;Ret(0);Label(cube);Var(0);Call(square,1);Var(1);Mul;Ret(1);Label(square);Var(0);Var(1);Mul;Ret(1)]
Assembles to stack machine code:
[
6, 4, 0, 10, 0, 2, 3, 0, 6,
15, 1, 5, 4, 7, 0, 3, 0, 6,
25, 1, 3, 1, 2, 7, 1, 3, 0,
3, 1, 2, 7, 1
]
---
letfn fib(n) = {if n <= 1 then {1} else {fib(n + -1) + fib(n + -2)}} in {fib(5)}
Evaluated to : 8
Compiles to :
[Call(main,0);Exit;Label(main);Cst(5);Call(fib,1);Ret(0);Label(fib);Var(0);Cst(1);Leq;IfZero(.L1);Cst(1);Goto(.L2);Label(.L1);Var(0);Cst(-1);Add;Call(fib,1);Var(1);Cst(-2);Add;Call(fib,1);Add;Label(.L2);Ret(1)]
Assembles to stack machine code:
[
6, 4, 0, 10, 0, 5, 6, 11, 1, 7,
0, 3, 0, 0, 1, 11, 8, 22, 0, 1,
9, 39, 3, 0, 0, -1, 1, 6, 11, 1,
3, 1, 0, -2, 1, 6, 11, 1, 1, 7,
1
]