From 5a9545766821548039f5577746c3b18fa4e2edec Mon Sep 17 00:00:00 2001 From: myfreess Date: Thu, 5 Dec 2024 15:03:10 +0800 Subject: [PATCH] G-Machine Part2 --- next/sources/gmachine/src/part2/ast.mbt | 87 +++++ next/sources/gmachine/src/part2/compile.mbt | 183 +++++++++ .../gmachine/src/part2/instruction.mbt | 26 ++ next/sources/gmachine/src/part2/moon.pkg.json | 1 + next/sources/gmachine/src/part2/programs.mbt | 41 ++ next/sources/gmachine/src/part2/syntax.mbt | 332 ++++++++++++++++ next/sources/gmachine/src/part2/top.mbt | 37 ++ next/sources/gmachine/src/part2/vm.mbt | 358 ++++++++++++++++++ next/tutorial/example/gmachine/gmachine-2.md | 213 +++-------- 9 files changed, 1113 insertions(+), 165 deletions(-) create mode 100644 next/sources/gmachine/src/part2/ast.mbt create mode 100644 next/sources/gmachine/src/part2/compile.mbt create mode 100644 next/sources/gmachine/src/part2/instruction.mbt create mode 100644 next/sources/gmachine/src/part2/moon.pkg.json create mode 100644 next/sources/gmachine/src/part2/programs.mbt create mode 100644 next/sources/gmachine/src/part2/syntax.mbt create mode 100644 next/sources/gmachine/src/part2/top.mbt create mode 100644 next/sources/gmachine/src/part2/vm.mbt diff --git a/next/sources/gmachine/src/part2/ast.mbt b/next/sources/gmachine/src/part2/ast.mbt new file mode 100644 index 00000000..65cdbe18 --- /dev/null +++ b/next/sources/gmachine/src/part2/ast.mbt @@ -0,0 +1,87 @@ +typealias List[E] = @immut/list.T[E] + + +enum RawExpr[T] { + Var(T) + Num(Int) + Constructor(tag~:Int, arity~:Int) // tag, arity + App(RawExpr[T], RawExpr[T]) + Let(Bool, List[(T, RawExpr[T])], RawExpr[T]) // isRec, Defs, Body + Case(RawExpr[T], List[(Int, List[T], RawExpr[T])]) +} derive(Show) + +struct ScDef[T] { + name : String + args : List[T] + body : RawExpr[T] +} derive(Show) + +fn is_atom[T](self : RawExpr[T]) -> Bool { + match self { + Var(_) => true + Num(_) => true + _ => false + } +} + +fn binders_of[L, R](l : List[(L, R)]) -> List[L] { + fn fst(pair) { + let (l, _) = pair + return l + } + + l.map(fst) +} + +fn rhss_of[L, R](l : List[(L, R)]) -> List[R] { + fn snd(pair) { + let (_, r) = pair + return r + } + + l.map(snd) +} + +fn ScDef::new[T]( + name : String, + args : List[T], + body : RawExpr[T] +) -> ScDef[T] { + { name : name, args : args, body : body } +} + +let prelude_defs : List[ScDef[String]] = { + let args : (FixedArray[String]) -> List[String] = List::of + let id = ScDef::new("I", args(["x"]), Var("x")) // id x = x + let k = + ScDef::new( + "K", + args(["x", "y"]), + Var("x") + ) // K x y = x + let k1 = + ScDef::new( + "K1", + args(["x", "y"]), + Var("y") + ) // K1 x y = y + let s = + ScDef::new( + "S", + args(["f", "g", "x"]), + App(App(Var("f"), Var("x")), App(Var("g"), Var("x"))) + ) // S f g x = f x (g x) + let compose = + ScDef::new( + "compose", + args(["f", "g", "x"]), + App(Var("f"), App(Var("g"), Var("x"))) + ) // compose f g x = f (g x) + let twice = + ScDef::new( + "twice", + args(["f"]), + App(App(Var("compose"), Var("f")), Var("f")) + ) // twice f = compose f f + List::of([id, k, k1, s, compose, twice]) +} \ No newline at end of file diff --git a/next/sources/gmachine/src/part2/compile.mbt b/next/sources/gmachine/src/part2/compile.mbt new file mode 100644 index 00000000..307b11b8 --- /dev/null +++ b/next/sources/gmachine/src/part2/compile.mbt @@ -0,0 +1,183 @@ +fn compileSC(self : ScDef[String]) -> (String, Int, List[Instruction]) { + let name = self.name + let body = self.body + let mut arity = 0 + fn gen_env(i : Int, args : List[String]) -> List[(String, Int)] { + match args { + Nil => { + arity = i + return Nil + } + Cons(s, ss) => Cons((s, i), gen_env(i + 1, ss)) + } + } + + let env = gen_env(0, self.args) + (name, arity, compileR(body, env, arity)) +} + + +fn compileR( + self : RawExpr[String], + env : List[(String, Int)], + arity : Int +) -> List[Instruction] { + if arity == 0 { + compileC(self, env) + List::of([Update(arity), Unwind]) + } else { + compileC(self, env) + List::of([Update(arity), Pop(arity), Unwind]) + } +} + +fn compileC( + self : RawExpr[String], + env : List[(String, Int)] +) -> List[Instruction] { + match self { + Var(s) => + match env.lookup(s) { + None => List::of([PushGlobal(s)]) + Some(n) => List::of([Push(n)]) + } + Num(n) => List::of([PushInt(n)]) + App(e1, e2) => + compileC(e2, env) + compileC(e1, argOffset(1, env)) + List::of([MkApp]) + Let(rec, defs, e) => + if rec { + compileLetrec(compileC, defs, e, env) + } else { + compileLet(compileC, defs, e, env) + } + _ => abort("not support yet") + } +} + +fn argOffset(n : Int, env : List[(String, Int)]) -> List[(String, Int)] { + env.map(fn { (name, offset) => (name, offset + n) }) +} + +// start compile_let definition +fn compileLet( + comp : (RawExpr[String], List[(String, Int)]) -> List[Instruction], + defs : List[(String, RawExpr[String])], + expr : RawExpr[String], + env : List[(String, Int)] +) -> List[Instruction] { + let (env, codes) = loop env, List::Nil, defs { + env, acc, Nil => (env, acc) + env, acc, Cons((name, expr), rest) => { + let code = compileC(expr, env) + let env = List::Cons((name, 0), argOffset(1, env)) + continue env, acc + code, rest + } + } + codes + comp(expr, env) + List::of([Slide(defs.length())]) +} +// end compile_let definition + +// start compile_letrec definition +fn compileLetrec( + comp : (RawExpr[String], List[(String, Int)]) -> List[Instruction], + defs : List[(String, RawExpr[String])], + expr : RawExpr[String], + env : List[(String, Int)] +) -> List[Instruction] { + let mut env = env + loop defs { + Nil => () + Cons((name, _), rest) => { + env = Cons((name, 0), argOffset(1, env)) + continue rest + } + } + let n = defs.length() + fn compileDefs( + defs : List[(String, RawExpr[String])], + offset : Int + ) -> List[Instruction] { + match defs { + Nil => comp(expr, env) + List::of([Slide(n)]) + Cons((_, expr), rest) => + compileC(expr, env) + + Cons(Update(offset), compileDefs(rest, offset - 1)) + } + } + + Cons(Alloc(n), compileDefs(defs, n - 1)) +} +// end compile_letrec definition + +// start prim definition +let compiled_primitives : List[(String, Int, List[Instruction])] = List::of( + [ + // Arith + ( + "add", + 2, + List::of([Push(1), Eval, Push(1), Eval, Add, Update(2), Pop(2), Unwind]), + ), + ( + "sub", + 2, + List::of([Push(1), Eval, Push(1), Eval, Sub, Update(2), Pop(2), Unwind]), + ), + ( + "mul", + 2, + List::of([Push(1), Eval, Push(1), Eval, Mul, Update(2), Pop(2), Unwind]), + ), + ( + "div", + 2, + List::of([Push(1), Eval, Push(1), Eval, Div, Update(2), Pop(2), Unwind]), + ), + // Compare + ( + "eq", + 2, + List::of([Push(1), Eval, Push(1), Eval, Eq, Update(2), Pop(2), Unwind]), + ), + ( + "neq", + 2, + List::of([Push(1), Eval, Push(1), Eval, Ne, Update(2), Pop(2), Unwind]), + ), + ( + "ge", + 2, + List::of([Push(1), Eval, Push(1), Eval, Ge, Update(2), Pop(2), Unwind]), + ), + ( + "gt", + 2, + List::of([Push(1), Eval, Push(1), Eval, Gt, Update(2), Pop(2), Unwind]), + ), + ( + "le", + 2, + List::of([Push(1), Eval, Push(1), Eval, Le, Update(2), Pop(2), Unwind]), + ), + ( + "lt", + 2, + List::of([Push(1), Eval, Push(1), Eval, Lt, Update(2), Pop(2), Unwind]), + ), + // MISC + ("negate", 1, List::of([Push(0), Eval, Neg, Update(1), Pop(1), Unwind])), + ( + "if", + 3, + List::of( + [ + Push(0), + Eval, + Cond(List::of([Push(1)]), List::of([Push(2)])), + Update(3), + Pop(3), + Unwind, + ], + ), + ), + ], +) +// end prim definition \ No newline at end of file diff --git a/next/sources/gmachine/src/part2/instruction.mbt b/next/sources/gmachine/src/part2/instruction.mbt new file mode 100644 index 00000000..d98a6542 --- /dev/null +++ b/next/sources/gmachine/src/part2/instruction.mbt @@ -0,0 +1,26 @@ +// start instr definition +enum Instruction { + Unwind + PushGlobal(String) + PushInt(Int) + Push(Int) + MkApp + Slide(Int) + Update(Int) + Pop(Int) + Alloc(Int) + Eval + Add + Sub + Mul + Div + Neg + Eq // == + Ne // != + Lt // < + Le // <= + Gt // > + Ge // >= + Cond(List[Instruction], List[Instruction]) +} derive (Eq, Show) +// end instr definition \ No newline at end of file diff --git a/next/sources/gmachine/src/part2/moon.pkg.json b/next/sources/gmachine/src/part2/moon.pkg.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/next/sources/gmachine/src/part2/moon.pkg.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/next/sources/gmachine/src/part2/programs.mbt b/next/sources/gmachine/src/part2/programs.mbt new file mode 100644 index 00000000..bb6b38b3 --- /dev/null +++ b/next/sources/gmachine/src/part2/programs.mbt @@ -0,0 +1,41 @@ +let programs : @hashmap.T[String, String] = { + let programs = @hashmap.new(capacity=40) + programs["square"] = + #| (defn square[x] (mul x x)) + programs["fix"] = + #| (defn fix[f] (letrec ([x (f x)]) x)) + programs["isNil"] = + #| (defn isNil[x] + #| (case x [(Nil) 1] [(Cons n m) 0])) + programs["tail"] = + #| (defn tail[l] (case l [(Cons x xs) xs])) + programs["fibs"] = + // fibs = 0 : 1 : zipWith (+) fibs (tail fibs) + #| (defn fibs[] (Cons 0 (Cons 1 (zipWith add fibs (tail fibs))))) + programs["take"] = + #| (defn take[n l] + #| (case l + #| [(Nil) Nil] + #| [(Cons x xs) + #| (if (le n 0) Nil (Cons x (take (sub n 1) xs)))])) + programs["zipWith"] = + #| (defn zipWith[op l1 l2] + #| (case l1 + #| [(Nil) Nil] + #| [(Cons x xs) + #| (case l2 + #| [(Nil) Nil] + #| [(Cons y ys) (Cons (op x y) (zipWith op xs ys))])])) + programs["factorial"] = + #| (defn factorial[n] + #| (if (eq n 0) 1 (mul n (factorial (sub n 1))))) + programs["abs"] = + #| (defn abs[n] + #| (if (lt n 0) (negate n) n)) + programs["length"] = + #| (defn length[l] + #| (case l + #| [(Nil) 0] + #| [(Cons x xs) (add 1 (length xs))])) + programs +} diff --git a/next/sources/gmachine/src/part2/syntax.mbt b/next/sources/gmachine/src/part2/syntax.mbt new file mode 100644 index 00000000..3a59bf26 --- /dev/null +++ b/next/sources/gmachine/src/part2/syntax.mbt @@ -0,0 +1,332 @@ +enum Token { + DefFn + Let + NIL + CONS + Case + Letrec + Open(Char) // { [ ( + Close(Char) // } ] ) + Id(String) + Number(Int) + EOF +} derive(Eq, Show) + +fn between(this : Char, lw : Char, up : Char) -> Bool { + this >= lw && this <= up +} + +fn isDigit(this : Char) -> Bool { + between(this, '0', '9') +} + +fn isAlpha(this : Char) -> Bool { + between(this, 'A', 'Z') || between(this, 'a', 'z') +} + +fn isIdChar(this : Char) -> Bool { + isAlpha(this) || isDigit(this) || this == '_' || this == '-' +} + +fn isWhiteSpace(this : Char) -> Bool { + this == ' ' || this == '\t' || this == '\n' +} + +fn to_number(this : Char) -> Int { + this.to_int() - 48 +} + +fn isOpen(this : Char) -> Bool { + this == '(' || this == '[' || this == '{' +} + +fn isClose(this : Char) -> Bool { + this == ')' || this == ']' || this == '}' +} + +struct Tokens { + tokens : Array[Token] + mut current : Int +} derive(Show) + +fn Tokens::new(tokens : Array[Token]) -> Tokens { + Tokens::{ tokens, current: 0 } +} + +fn peek(self : Tokens) -> Token { + if self.current < self.tokens.length() { + return self.tokens[self.current] + } else { + return EOF + } +} + +type! ParseError String + +fn next(self : Tokens, loc~ : SourceLoc = _) -> Unit { + self.current = self.current + 1 + if self.current > self.tokens.length() { + abort("Tokens::next(): \{loc}") + } +} + +fn eat(self : Tokens, tok : Token, loc~ : SourceLoc = _) -> Unit!ParseError { + let __tok = self.peek() + // assert tok_ != EOF + if __tok != tok { + raise ParseError("\{loc} - Tokens::eat(): expect \{tok} but got \{__tok}") + } else { + self.next() + } +} + +fn tokenize(source : String) -> Tokens { + let tokens : Array[Token] = Array::new(capacity=source.length() / 2) + let mut current = 0 + let source = source.to_array() + fn peek() -> Char { + source[current] + } + + fn next() -> Unit { + current = current + 1 + } + + while current < source.length() { + let ch = peek() + if isWhiteSpace(ch) { + next() + continue + } else if isDigit(ch) { + let mut num = to_number(ch) + next() + while current < source.length() && isDigit(peek()) { + num = num * 10 + to_number(peek()) + next() + } + tokens.push(Number(num)) + continue + } else if isOpen(ch) { + next() + tokens.push(Open(ch)) + continue + } else if isClose(ch) { + next() + tokens.push(Close(ch)) + continue + } else if isAlpha(ch) { + let identifier = @buffer.new(size_hint=42) + identifier.write_char(ch) + next() + while current < source.length() && isIdChar(peek()) { + identifier.write_char(peek()) + next() + } + let identifier = identifier.to_unchecked_string() + match identifier { + "let" => tokens.push(Let) + "letrec" => tokens.push(Letrec) + "Nil" => tokens.push(NIL) + "Cons" => tokens.push(CONS) + "case" => tokens.push(Case) + "defn" => tokens.push(DefFn) + _ => tokens.push(Id(identifier)) + } + } else { + abort("error : invalid Character '\{ch}' in [\{current}]") + } + } else { + return Tokens::new(tokens) + } +} + +test "tokenize" { + inspect!(tokenize("").tokens, content="[]") + inspect!(tokenize("12345678").tokens, content="[Number(12345678)]") + inspect!(tokenize("1234 5678").tokens, content="[Number(1234), Number(5678)]") + inspect!( + tokenize("a0 a_0 a-0").tokens, + content= + #|[Id("a0"), Id("a_0"), Id("a-0")] + , + ) + inspect!( + tokenize("(Cons 0 (Cons 1 Nil))").tokens, + content="[Open('('), CONS, Number(0), Open('('), CONS, Number(1), NIL, Close(')'), Close(')')]", + ) +} + +fn parse_num(self : Tokens) -> Int!ParseError { + match self.peek() { + Number(n) => { + self.next() + return n + } + other => raise ParseError("parse_num(): expect a number but got \{other}") + } +} + +fn parse_var(self : Tokens) -> String!ParseError { + match self.peek() { + Id(s) => { + self.next() + return s + } + other => raise ParseError("parse_var(): expect a variable but got \{other}") + } +} + +fn parse_cons(self : Tokens) -> RawExpr[String]!ParseError { + match self.peek() { + CONS => { + self.next() + let x = self.parse_expr!() + let xs = self.parse_expr!() + return App(App(Constructor(tag=1, arity=2), x), xs) + } + other => raise ParseError("parse_cons(): expect Cons but got \{other}") + } +} + +fn parse_let(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Let) + self.eat!(Open('(')) + let defs = self.parse_defs!() + self.eat!(Close(')')) + let exp = self.parse_expr!() + Let(false, defs, exp) +} + +fn parse_letrec(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Letrec) + self.eat!(Open('(')) + let defs = self.parse_defs!() + self.eat!(Close(')')) + let exp = self.parse_expr!() + Let(true, defs, exp) +} + +fn parse_case(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Case) + let exp = self.parse_expr!() + let alts = self.parse_alts!() + Case(exp, alts) +} + +fn parse_alts( + self : Tokens +) -> List[(Int, List[String], RawExpr[String])]!ParseError { + let acc : List[(Int, List[String], RawExpr[String])] = Nil + loop self.peek(), acc { + Open('['), acc => { + self.next() + self.eat!(Open('(')) + let (tag, variables) = match self.peek() { + NIL => { + self.next() + (0, List::Nil) + } + CONS => { + self.next() + let x = self.parse_var!() + let xs = self.parse_var!() + (1, List::of([x, xs])) + } + other => + raise ParseError("parse_alts(): expect NIL or CONS but got \{other}") + } + self.eat!(Close(')')) + let exp = self.parse_expr!() + let alt = (tag, variables, exp) + self.eat!(Close(']')) + continue self.peek(), Cons(alt, acc) + } + _, acc => acc.rev() + } +} + +fn parse_defs(self : Tokens) -> List[(String, RawExpr[String])]!ParseError { + let acc : List[(String, RawExpr[String])] = Nil + loop self.peek(), acc { + Open('['), acc => { + self.next() + let var = self.parse_var!() + let value = self.parse_expr!() + self.eat!(Close(']')) + continue self.peek(), Cons((var, value), acc) + } + _, acc => acc.rev() + } +} + +fn parse_apply(self : Tokens) -> RawExpr[String]!ParseError { + let mut res = self.parse_expr!() + while self.peek() != Close(')') { + res = App(res, self.parse_expr!()) + } + return res +} + +fn parse_expr(self : Tokens) -> RawExpr[String]!ParseError { + match self.peek() { + EOF => + raise ParseError( + "parse_expr() : expect a token but got a empty token stream", + ) + Number(n) => { + self.next() + Num(n) + } + Id(s) => { + self.next() + Var(s) + } + NIL => { + self.next() + Constructor(tag=0, arity=0) + } + Open('(') => { + self.next() + let exp = match self.peek() { + Let => self.parse_let!() + Letrec => self.parse_letrec!() + Case => self.parse_case!() + CONS => self.parse_cons!() + Id(_) | Open('(') => self.parse_apply!() + other => + raise ParseError("parse_expr(): cant parse \{other} behind a '('") + } + self.eat!(Close(')')) + return exp + } + other => raise ParseError("parse_expr(): cant parse \{other}") + } +} + +fn parse_sc(self : Tokens) -> ScDef[String]!ParseError { + self.eat!(Open('(')) + self.eat!(DefFn) + let fn_name = self.parse_var!() + self.eat!(Open('[')) + let args = loop self.peek(), List::Nil { + tok, acc => + if tok != Close(']') { + let var = self.parse_var!() + continue self.peek(), Cons(var, acc) + } else { + acc.rev() + } + } + self.eat!(Close(']')) + let body = self.parse_expr!() + self.eat!(Close(')')) + ScDef::{ name: fn_name, args, body } +} + +test "parse scdef" { + let test_ = fn!(s) { ignore(tokenize(s).parse_sc!()) } + for p in programs { + let (_, p) = p + test_!(p) + } +} diff --git a/next/sources/gmachine/src/part2/top.mbt b/next/sources/gmachine/src/part2/top.mbt new file mode 100644 index 00000000..7eef55e9 --- /dev/null +++ b/next/sources/gmachine/src/part2/top.mbt @@ -0,0 +1,37 @@ +fn run(codes : List[String]) -> Node { + fn parse_then_compile(code : String) -> (String, Int, List[Instruction]) { + let tokens = tokenize(code) + let code = + try { + tokens.parse_sc!() + } catch { + ParseError(s) => abort(s) + } else { + expr => expr + } + let code = compileSC(code) + return code + } + let codes = codes.map(parse_then_compile) + prelude_defs.map(compileSC) + let codes = compiled_primitives + codes + let (heap, globals) = build_initial_heap(codes) + let initialState : GState = { + heap : heap, + stack : Nil, + // start init definition + code : List::of([PushGlobal("main"), Eval]), + // end init definition + globals : globals, + stats : 0, + dump : Nil + } + initialState.reify() +} + + +test "basic eval" { + let main = "(defn main[] (let ([add1 (add 1)]) (add1 1)))" + inspect!(run(List::of([main])), content = "NNum(2)") + let main = "(defn main[] (let ([x 4] [y 5]) (sub x y)))" + inspect!(run(List::of([main])), content = "NNum(-1)") +} \ No newline at end of file diff --git a/next/sources/gmachine/src/part2/vm.mbt b/next/sources/gmachine/src/part2/vm.mbt new file mode 100644 index 00000000..6f0d6a93 --- /dev/null +++ b/next/sources/gmachine/src/part2/vm.mbt @@ -0,0 +1,358 @@ + +// Use the 'type' keyword to encapsulate an address type. +type Addr Int derive(Eq, Show) + +// Describe graph nodes with an enumeration type. +enum Node { + NNum(Int) + // The application node + NApp(Addr, Addr) + // To store the number of parameters and + // the corresponding sequence of instructions for a super combinator + NGlobal(String, Int, List[Instruction]) + // The Indirection nodeļ¼ŒThe key component of implementing lazy evaluation + NInd(Addr) +} derive(Eq, Show) + +struct GHeap { + // The heap uses an array, + // and the space with None content in the array is available as free memory. + mut object_count : Int + memory : Array[Node?] +} + +// Allocate heap space for nodes. +fn alloc(self : GHeap, node : Node) -> Addr { + let heap = self + fn next(n : Int) -> Int { + (n + 1) % heap.memory.length() + } + + fn free(i : Int) -> Bool { + match heap.memory[i] { + None => true + _ => false + } + } + + let mut i = heap.object_count + while not(free(i)) { + i = next(i) + } + heap.memory[i] = Some(node) + heap.object_count = heap.object_count + 1 + return Addr(i) +} + + +fn op_get(self : GHeap, key : Addr) -> Node { + let Addr(i) = key + match self.memory[i] { + Some(node) => node + None => abort("GHeap::get(): index \{i} was empty") + } +} + +fn op_set(self : GHeap, key : Addr, val : Node) -> Unit { + self.memory[key._] = Some(val) +} + + +struct GState { + mut stack : List[Addr] + heap : GHeap + globals : @hashmap.T[String, Addr] + mut dump : List[(List[Instruction], List[Addr])] + mut code : List[Instruction] + mut stats : GStats +} + +type GStats Int + +fn stat_incr(self : GState) -> Unit { + self.stats = self.stats._ + 1 +} + +fn put_stack(self : GState, addr : Addr) -> Unit { + self.stack = Cons(addr, self.stack) +} + +fn put_dump(self : GState, codes : List[Instruction], stack : List[Addr]) -> Unit { + self.dump = Cons((codes, stack), self.dump) +} + +fn put_code(self : GState, instrs : List[Instruction]) -> Unit { + self.code = instrs + self.code +} + +fn pop1(self : GState) -> Addr { + match self.stack { + Cons(addr, reststack) => { + self.stack = reststack + addr + } + Nil => abort("pop1(): stack size smaller than 1") + } +} + +// e1 e2 ..... -> (e1, e2) ...... +fn pop2(self : GState) -> (Addr, Addr) { + match self.stack { + Cons(addr1, Cons(addr2, reststack)) => { + self.stack = reststack + (addr1, addr2) + } + _ => abort("pop2(): stack size smaller than 2") + } +} + +fn push_int(self : GState, num : Int) -> Unit { + let addr = self.heap.alloc(NNum(num)) + self.put_stack(addr) +} + +fn push_global(self : GState, name : String) -> Unit { + let sc = self.globals[name] + match sc { + None => abort("push_global(): cant find supercombinator \{name}") + Some(addr) => self.put_stack(addr) + } +} + +// start push definition +fn push(self : GState, offset : Int) -> Unit { + // Push(n) a0 : . . . : an : s + // => an : a0 : . . . : an : s + let addr = self.stack.unsafe_nth(offset) + self.put_stack(addr) +} +// end push definition + +// start slide definition +fn slide(self : GState, n : Int) -> Unit { + let addr = self.pop1() + self.stack = Cons(addr, self.stack.drop(n)) +} +// end slide definition + +// start rearrange definition +fn rearrange(self : GState, n : Int) -> Unit { + let appnodes = self.stack.take(n) + let args = appnodes.map(fn (addr) { + let NApp(_, arg) = self.heap[addr] + arg + }) + self.stack = args + appnodes.drop(n - 1) +} +// end rearrange definition + +fn mk_apply(self : GState) -> Unit { + let (a1, a2) = self.pop2() + let appaddr = self.heap.alloc(NApp(a1, a2)) + self.put_stack(appaddr) +} +fn update(self : GState, n : Int) -> Unit { + let addr = self.pop1() + let dst = self.stack.unsafe_nth(n) + self.heap[dst] = NInd(addr) +} + +// start unwind definition +fn unwind(self : GState) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(_) => { + match self.dump { + Nil => self.put_stack(addr) + Cons((instrs, stack), rest_dump) => { + self.stack = stack + self.put_stack(addr) + self.dump = rest_dump + self.code = instrs + } + } + } + NApp(a1, _) => { + self.put_stack(addr) + self.put_stack(a1) + self.put_code(List::of([Unwind])) + } + NGlobal(_, n, c) => { + let k = self.stack.length() + if k < n { + match self.dump { + Nil => abort("Unwinding with too few arguments") + Cons((i, s), rest) => { + // a1 : ...... : ak + // || + // ak : s + self.stack = self.stack.drop(k - 1) + s + self.dump = rest + self.code = i + } + } + } else { + if n != 0 { + self.rearrange(n) + } else { + self.put_stack(addr) + } + self.put_code(c) + } + } + NInd(a) => { + self.put_stack(a) + self.put_code(List::of([Unwind])) + } + otherwise => + abort("unwind() : wrong kind of node \{otherwise}, address \{addr}") + } +} +// end unwind definition + +// start alloc definition +fn alloc_nodes(self : GState, n : Int) -> Unit { + let dummynode : Node = NInd(Addr(-1)) + for i = 0; i < n; i = i + 1 { + let addr = self.heap.alloc(dummynode) + self.put_stack(addr) + } +} +// end alloc definition + +// start eval definition +fn eval(self : GState) -> Unit { + let addr = self.pop1() + self.put_dump(self.code, self.stack) + self.stack = List::of([addr]) + self.code = List::of([Unwind]) +} +// end eval definition + +// start cond definition +fn condition(self : GState, i1 : List[Instruction], i2 : List[Instruction]) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(0) => { + // false + self.code = i2 + self.code + } + NNum(1) => { + // true + self.code = i1 + self.code + } + otherwise => abort("cond : \{addr} = \{otherwise}") + } +} +// end cond definition + +// start op definition +fn negate(self : GState) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(n) => { + let addr = self.heap.alloc(NNum(-n)) + self.put_stack(addr) + } + otherwise => { + abort("negate: wrong kind of node \{otherwise}, address \{addr}") + } + } +} + +fn lift_arith2(self : GState, op : (Int, Int) -> Int) -> Unit { + let (a1, a2) = self.pop2() + match (self.heap[a1], self.heap[a2]) { + (NNum(n1), NNum(n2)) => { + let newnode = Node::NNum(op(n1, n2)) + let addr = self.heap.alloc(newnode) + self.put_stack(addr) + } + (node1, node2) => abort("liftArith2: \{a1} = \{node1} \{a2} = \{node2}") + } +} + +fn lift_cmp2(self : GState, op : (Int, Int) -> Bool) -> Unit { + let (a1, a2) = self.pop2() + match (self.heap[a1], self.heap[a2]) { + (NNum(n1), NNum(n2)) => { + let flag = op(n1, n2) + let newnode = if flag { Node::NNum(1) } else { Node::NNum(0) } + let addr = self.heap.alloc(newnode) + self.put_stack(addr) + } + (node1, node2) => abort("liftCmp2: \{a1} = \{node1} \{a2} = \{node2}") + } +} +// end op definition + +fn build_initial_heap( + scdefs : List[(String, Int, List[Instruction])] +) -> (GHeap, @hashmap.T[String, Addr]) { + let heap = { object_count: 0, memory: Array::make(10000, None) } + let globals = @hashmap.new(capacity=50) + loop scdefs { + Nil => () + Cons((name, arity, instrs), rest) => { + let addr = heap.alloc(NGlobal(name, arity, instrs)) + globals[name] = addr + continue rest + } + } + return (heap, globals) +} + + +// start step definition +fn step(self : GState) -> Bool { + match self.code { + Nil => return false + Cons(i, is) => { + self.code = is + self.stat_incr() + match i { + PushGlobal(f) => self.push_global(f) + PushInt(n) => self.push_int(n) + Push(n) => self.push(n) + MkApp => self.mk_apply() + Unwind => self.unwind() + Update(n) => self.update(n) + Pop(n) => self.stack = self.stack.drop(n) + Alloc(n) => self.alloc_nodes(n) + Eval => self.eval() + Slide(n) => self.slide(n) + Add => self.lift_arith2(fn (x, y) { x + y}) + Sub => self.lift_arith2(fn (x, y) { x - y}) + Mul => self.lift_arith2(fn (x, y) { x * y}) + Div => self.lift_arith2(fn (x, y) { x / y}) + Neg => self.negate() + Eq => self.lift_cmp2(fn (x, y) { x == y }) + Ne => self.lift_cmp2(fn (x, y) { x != y }) + Lt => self.lift_cmp2(fn (x, y) { x < y }) + Le => self.lift_cmp2(fn (x, y) { x <= y }) + Gt => self.lift_cmp2(fn (x, y) { x > y }) + Ge => self.lift_cmp2(fn (x, y) { x >= y }) + Cond(i1, i2) => self.condition(i1, i2) + } + return true + } + } +} +// end step definition + + +fn reify(self : GState) -> Node { + if self.step() { + self.reify() + } else { + let stack = self.stack + match stack { + Cons(addr, Nil) => { + let res = self.heap[addr] + return res + } + _ => abort("wrong stack \{stack}") + } + } +} + diff --git a/next/tutorial/example/gmachine/gmachine-2.md b/next/tutorial/example/gmachine/gmachine-2.md index 824dcd18..d1639b9c 100644 --- a/next/tutorial/example/gmachine/gmachine-2.md +++ b/next/tutorial/example/gmachine/gmachine-2.md @@ -36,29 +36,20 @@ Before implementing `let` (and the more complex `letrec`), we first need to modi The adjustment is done in the implementation of the `Unwind` instruction. If the supercombinator has no parameters, it is the same as the original unwind. When there are parameters, the top address of the supercombinator node is discarded, and the `rearrange` function is called. -```moonbit -fn rearrange(self : GState, n : Int) -> Unit { - let appnodes = take(self.stack, n) - let args = map(fn (addr) { - let NApp(_, arg) = self.heap[addr] - arg - }, appnodes) - self.stack = append(args, drop(appnodes, n - 1)) -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start rearrange definition +:end-before: end rearrange definition ``` The `rearrange` function assumes that the first N addresses on the stack point to a series of `NApp` nodes. It keeps the bottommost one (used as Redex update), cleans up the top N-1 addresses, and then places N addresses that directly point to the parameters. After this, both parameters and local variables can be accessed using the same command by changing the `PushArg` instruction to a more general `Push` instruction. -```rust -fn push(self : GState, offset : Int) -> Unit { - // Copy the address at offset + 1 to the top of the stack - // Push(n) a0 : . . . : an : s - // => an : a0 : . . . : an : s - let appaddr = nth(self.stack, offset) - self.putStack(appaddr) -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start push definition +:end-before: end push definition ``` The next issue is that we need something to clean up. Consider the following expression: @@ -83,42 +74,28 @@ After constructing the graph corresponding to the expression `expr`, the stack s Therefore, we need a new instruction to clean up these no longer needed addresses. It is called `Slide`. As the name suggests, the function of `Slide(n)` is to skip the first address and delete the following N addresses. -```rust -fn slide(self : GState, n : Int) -> Unit { - let addr = self.pop1() - self.stack = Cons(addr, drop(self.stack, n)) -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start slide definition +:end-before: end slide definition ``` Now we can compile `let`. We will compile the expressions corresponding to local variables using the `compileC` function. Then, traverse the list of variable definitions (`defs`), compile and update the corresponding offsets in order. Finally, use the passed `comp` function to compile the main expression and add the `Slide` instruction to clean up the unused addresses. > Compiling the main expression using the passed function makes it easy to reuse when adding subsequent features. -```rust -fn compileLet(comp : (RawExpr[String], List[(String, Int)]) -> List[Instruction], defs : List[(String, RawExpr[String])], expr : RawExpr[String], env : List[(String, Int)]) -> List[Instruction] { - let (env, codes) = loop env, List::Nil, defs { - env, acc, Nil => (env, acc) - env, acc, Cons((name, expr), rest) => { - let code = compileC(expr, env) - // Update offsets and add offsets for local variables corresponding to name - let env = List::Cons((name, 0), argOffset(1, env)) - continue env, append(acc, code), rest - } - } - append(codes, append(comp(expr, env), List::[Slide(length(defs))])) -} +```{literalinclude} /sources/gmachine/src/part2/compile.mbt +:language: moonbit +:start-after: start compile_let definition +:end-before: end compile_let definition ``` The semantics of `letrec` are more complex - it allows the N variables within the expression to reference each other, so we need to pre-allocate N addresses and place them on the stack. We need a new instruction: `Alloc(N)`, which pre-allocates N `NInd` nodes and pushes the addresses onto the stack sequentially. The addresses in these indirect nodes are negative and only serve as placeholders. -```rust -fn allocNodes(self : GState, n : Int) -> Unit { - let dummynode : Node = NInd(Addr(-1)) - for i = 0; i < n; i = i + 1 { - let addr = self.heap.alloc(dummynode) - self.putStack(addr) - } -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start alloc definition +:end-before: end alloc definition ``` The steps to compile letrec are similar to `let`: @@ -128,24 +105,10 @@ The steps to compile letrec are similar to `let`: - Compile the local variables in `defs`, using the `Update` instruction to update the results to the pre-allocated addresses after compiling each one. - Compile the main expression and use the `Slide` instruction to clean up. -```rust -fn compileLetrec(comp : (RawExpr[String], List[(String, Int)]) -> List[Instruction], defs : List[(String, RawExpr[String])], expr : RawExpr[String], env : List[(String, Int)]) -> List[Instruction] { - let env = loop env, defs { - env, Nil => env - env, Cons((name, _), rest) => { - let env = List::Cons((name, 0), argOffset(1, env)) - continue env, rest - } - } - let n = length(defs) - fn compileDefs(defs : List[(String, RawExpr[String])], offset : Int) -> List[Instruction] { - match defs { - Nil => append(comp(expr, env), List::[Slide(n)]) - Cons((_, expr), rest) => append(compileC(expr, env), Cons(Update(offset), compileDefs(rest, offset - 1))) - } - } - Cons(Alloc(n), compileDefs(defs, n - 1)) -} +```{literalinclude} /sources/gmachine/src/part2/compile.mbt +:language: moonbit +:start-after: start compile_letrec definition +:end-before: end compile_letrec definition ``` ## Adding Primitives @@ -201,130 +164,50 @@ The implementation of the `Eval` instruction is not complicated: > This is similar to how strict evaluation languages handle saving caller contexts, but practical implementations would use more efficient methods. -```rust -fn eval(self : GState) -> Unit { - let addr = self.pop1() - self.putDump(self.code, self.stack) - self.stack = List::[addr] - self.code = List::[Unwind] -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start eval definition +:end-before: end eval definition ``` This simple definition requires modifying the `Unwind` instruction to restore the context when `Unwind` in the `NNum` branch finds that there is a recoverable context (`dump` is not empty). -```rust -fn unwind(self : GState) -> Unit { - let addr = self.pop1() - match self.heap[addr] { - NNum(_) => { - match self.dump { - Nil => self.putStack(addr) - Cons((instrs, stack), restDump) => { - // Restore the stack - self.stack = stack - self.putStack(addr) - self.dump = restDump - // Return to original code execution - self.code = instrs - } - } - } - ...... - } -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start unwind definition +:end-before: end unwind definition ``` Next, we need to implement arithmetic and comparison instructions. We use two functions to simplify the form of binary operations. The result of the comparison instruction is a boolean value, and for simplicity, we use numbers to represent it: 0 for `false`, 1 for `true`. -```rust -fn liftArith2(self : GState, op : (Int, Int) -> Int) -> Unit { - // Binary arithmetic operations - let (a1, a2) = self.pop2() - match (self.heap[a1], self.heap[a2]) { - (NNum(n1), NNum(n2)) => { - let newnode = Node::NNum(op(n1, n2)) - let addr = self.heap.alloc(newnode) - self.putStack(addr) - } - (node1, node2) => abort("liftArith2: \{a1} = \{node1} \{a2} = \{node2}") - } -} - -fn liftCmp2(self : GState, op : (Int, Int) -> Bool) -> Unit { - // Binary comparison operations - let (a1, a2) = self.pop2() - match (self.heap[a1], self.heap[a2]) { - (NNum(n1), NNum(n2)) => { - let flag = op(n1, n2) - let newnode = if flag { Node::NNum(1) } else { Node::NNum(0) } - let addr = self.heap.alloc(newnode) - self.putStack(addr) - } - (node1, node2) => abort("liftCmp2: \{a1} = \{node1} \{a2} = \{node2}") - } -} - -// Implement negation separately -fn negate(self : GState) -> Unit { - let addr = self.pop1() - match self.heap[addr] { - NNum(n) => { - let addr = self.heap.alloc(NNum(-n)) - self.putStack(addr) - } - otherwise => { - // If not NNum, throw an error - abort("negate: wrong kind of node \{otherwise}, address \{addr} ") - } - } -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start op definition +:end-before: end op definition ``` Finally, implement branching: -```rust -fn condition(self : GState, i1 : List[Instruction], i2 : List[Instruction]) -> Unit { - let addr = self.pop1() - match self.heap[addr] { - NNum(0) => { - // If false, jump to i2 - self.code = append(i2, self.code) - } - NNum(1) => { - // If true, jump to i1 - self.code = append(i1, self.code) - } - otherwise => abort("cond : \{addr} = \{otherwise}") - } -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start cond definition +:end-before: end cond definition ``` No major adjustments are needed in the compilation part, just add some predefined programs: -```rust -let compiledPrimitives : List[(String, Int, List[Instruction])] = List::[ - // Arithmetic - ("add", 2, List::[Push(1), Eval, Push(1), Eval, Add, Update(2), Pop(2), Unwind]), - ("sub", 2, List::[Push(1), Eval, Push(1), Eval, Sub, Update(2), Pop(2), Unwind]), - ("mul", 2, List::[Push(1), Eval, Push(1), Eval, Mul, Update(2), Pop(2), Unwind]), - ("div", 2, List::[Push(1), Eval, Push(1), Eval, Div, Update(2), Pop(2), Unwind]), - // Comparison - ("eq", 2, List::[Push(1), Eval, Push(1), Eval, Eq, Update(2), Pop(2), Unwind]), - ("neq", 2, List::[Push(1), Eval, Push(1), Eval, Ne, Update(2), Pop(2), Unwind]), - ("ge", 2, List::[Push(1), Eval, Push(1), Eval, Ge, Update(2), Pop(2), Unwind]), - ("gt", 2, List::[Push(1), Eval, Push(1), Eval, Gt, Update(2), Pop(2), Unwind]), - ("le", 2, List::[Push(1), Eval, Push(1), Eval, Le, Update(2), Pop(2), Unwind]), - ("lt", 2, List::[Push(1), Eval, Push(1), Eval, Lt, Update(2), Pop(2), Unwind]), - // Miscellaneous - ("negate", 1, List::[Push(0), Eval, Neg, Update(1), Pop(1), Unwind]), - ("if", 3, List::[Push(0), Eval, Cond(List::[Push(1)], List::[Push(2)]), Update(3), Pop(3), Unwind]) -] +```{literalinclude} /sources/gmachine/src/part2/compile.mbt +:language: moonbit +:start-after: start prim definition +:end-before: end prim definition ``` and modify the initial instruction sequence -```rust -let initialCode : List[Instruction] = List::[PushGlobal("main"), Eval] +```{literalinclude} /sources/gmachine/src/part2/top.mbt +:language: moonbit +:start-after: start init definition +:end-before: end init definition ``` ## Conclusion