diff --git a/next/sources/gmachine/src/part3/ast.mbt b/next/sources/gmachine/src/part3/ast.mbt new file mode 100644 index 00000000..617a7aa2 --- /dev/null +++ b/next/sources/gmachine/src/part3/ast.mbt @@ -0,0 +1,69 @@ +typealias List[E] = @immut/list.T[E] + + +enum RawExpr[T] { + Var(T) + Num(Int) + Constructor(tag~:Int, arity~:Int) // tag, arity + App(RawExpr[T], RawExpr[T]) + Let(Bool, List[(T, RawExpr[T])], RawExpr[T]) // isRec, Defs, Body + Case(RawExpr[T], List[(Int, List[T], RawExpr[T])]) +} derive(Show) + +struct ScDef[T] { + name : String + args : List[T] + body : RawExpr[T] +} derive(Show) + +fn is_atom[T](self : RawExpr[T]) -> Bool { + match self { + Var(_) => true + Num(_) => true + _ => false + } +} + +fn ScDef::new[T]( + name : String, + args : List[T], + body : RawExpr[T] +) -> ScDef[T] { + { name : name, args : args, body : body } +} + +let prelude_defs : List[ScDef[String]] = { + let args : (FixedArray[String]) -> List[String] = List::of + let id = ScDef::new("I", args(["x"]), Var("x")) // id x = x + let k = + ScDef::new( + "K", + args(["x", "y"]), + Var("x") + ) // K x y = x + let k1 = + ScDef::new( + "K1", + args(["x", "y"]), + Var("y") + ) // K1 x y = y + let s = + ScDef::new( + "S", + args(["f", "g", "x"]), + App(App(Var("f"), Var("x")), App(Var("g"), Var("x"))) + ) // S f g x = f x (g x) + let compose = + ScDef::new( + "compose", + args(["f", "g", "x"]), + App(Var("f"), App(Var("g"), Var("x"))) + ) // compose f g x = f (g x) + let twice = + ScDef::new( + "twice", + args(["f"]), + App(App(Var("compose"), Var("f")), Var("f")) + ) // twice f = compose f f + List::of([id, k, k1, s, compose, twice]) +} \ No newline at end of file diff --git a/next/sources/gmachine/src/part3/compile.mbt b/next/sources/gmachine/src/part3/compile.mbt new file mode 100644 index 00000000..f84a8074 --- /dev/null +++ b/next/sources/gmachine/src/part3/compile.mbt @@ -0,0 +1,287 @@ +fn compileSC(self : ScDef[String]) -> (String, Int, List[Instruction]) { + let name = self.name + let body = self.body + let mut arity = 0 + fn gen_env(i : Int, args : List[String]) -> List[(String, Int)] { + match args { + Nil => { + arity = i + return Nil + } + Cons(s, ss) => Cons((s, i), gen_env(i + 1, ss)) + } + } + + let env = gen_env(0, self.args) + (name, arity, compileR(body, env, arity)) +} + + +fn compileR( + self : RawExpr[String], + env : List[(String, Int)], + arity : Int +) -> List[Instruction] { + if arity == 0 { + compileE(self, env) + List::of([Update(arity), Unwind]) + } else { + compileE(self, env) + List::of([Update(arity), Pop(arity), Unwind]) + } +} + +fn compileC( + self : RawExpr[String], + env : List[(String, Int)] +) -> List[Instruction] { + match self { + Var(s) => + match env.lookup(s) { + None => List::of([PushGlobal(s)]) + Some(n) => List::of([Push(n)]) + } + Num(n) => List::of([PushInt(n)]) + // start c_constr definition + App(App(Constructor(tag = 1, arity = 2), x), xs) => { + // Cons(x, xs) + compileC(xs, env) + compileC(x, argOffset(1, env)) + List::of([Pack(1, 2)]) + } + // Nil + Constructor(tag = 0, arity = 0) => List::of([Pack(0, 0)]) + // end c_constr definition + App(e1, e2) => + compileC(e2, env) + compileC(e1, argOffset(1, env)) + List::of([MkApp]) + Let(rec, defs, e) => + if rec { + compileLetrec(compileC, defs, e, env) + } else { + compileLet(compileC, defs, e, env) + } + _ => abort("not support yet") + } +} + +fn compileE(self : RawExpr[String], env : List[(String, Int)]) -> List[Instruction] { + match self { + // start num definition + Num(n) => List::of([PushInt(n)]) + // end num definition + // start let definition + Let(rec, defs, e) => { + if rec { + compileLetrec(compileE, defs, e, env) + } else { + compileLet(compileE, defs, e, env) + } + } + // end let definition + // start if_and_neg definition + App(App(App(Var("if"), b), e1), e2) => { + let condition = compileE(b, env) + let branch1 = compileE(e1, env) + let branch2 = compileE(e2, env) + condition + List::of([Cond(branch1, branch2)]) + } + App(Var("negate"), e) => { + compileE(e, env) + List::of([Neg]) + } + // end if_and_neg definition + // start binop definition + App(App(Var(op), e0), e1) => { + match builtinOpS[op] { + None => compileC(self, env) + List::of([Eval]) + Some(instr) => { + let code1 = compileE(e1, env) + let code0 = compileE(e0, argOffset(1, env)) + code1 + code0 + List::of([instr]) + } + } + } + // end binop definition + // start e_constr_case definition + Case(e, alts) => { + compileE(e, env) + List::of([CaseJump(compileAlts(alts, env))]) + } + Constructor(tag = 0, arity = 0) => { + // Nil + List::of([Pack(0, 0)]) + } + App(App(Constructor(tag = 1, arity = 2), x), xs) => { + // Cons(x, xs) + compileC(xs, env) + compileC(x, argOffset(1, env)) + List::of([Pack(1, 2)]) + } + // end e_constr_case definition + // start default definition + _ => compileC(self, env) + List::of([Eval]) + // end default definition + } +} + +fn compileAlts(alts : List[(Int, List[String], RawExpr[String])], env : List[(String, Int)]) -> List[(Int, List[Instruction])] { + fn buildenv(variables : List[String], off : Int) -> List[(String, Int)] { + match variables { + Nil => Nil + Cons(v, vs) => { + Cons((v, off), buildenv(vs, off + 1)) + } + } + } + fn go(alts : List[(Int, List[String], RawExpr[String])]) -> List[(Int, List[Instruction])] { + match alts { + Nil => Nil + Cons(alt, rest) => { + let (tag, variables, body) = alt + let offset = variables.length() + let env = buildenv(variables, 0) + argOffset(offset, env) + let code = + List::of([Split]) + compileE(body, env) + List::of([Slide(offset)]) + Cons((tag, code), go(rest)) + } + } + } + go(alts) +} + + +fn argOffset(n : Int, env : List[(String, Int)]) -> List[(String, Int)] { + env.map(fn { (name, offset) => (name, offset + n) }) +} + + +fn compileLet( + comp : (RawExpr[String], List[(String, Int)]) -> List[Instruction], + defs : List[(String, RawExpr[String])], + expr : RawExpr[String], + env : List[(String, Int)] +) -> List[Instruction] { + let (env, codes) = loop env, List::Nil, defs { + env, acc, Nil => (env, acc) + env, acc, Cons((name, expr), rest) => { + let code = compileC(expr, env) + let env = List::Cons((name, 0), argOffset(1, env)) + continue env, acc + code, rest + } + } + codes + comp(expr, env) + List::of([Slide(defs.length())]) +} + +fn compileLetrec( + comp : (RawExpr[String], List[(String, Int)]) -> List[Instruction], + defs : List[(String, RawExpr[String])], + expr : RawExpr[String], + env : List[(String, Int)] +) -> List[Instruction] { + let mut env = env + loop defs { + Nil => () + Cons((name, _), rest) => { + env = Cons((name, 0), argOffset(1, env)) + continue rest + } + } + let n = defs.length() + fn compileDefs( + defs : List[(String, RawExpr[String])], + offset : Int + ) -> List[Instruction] { + match defs { + Nil => comp(expr, env) + List::of([Slide(n)]) + Cons((_, expr), rest) => + compileC(expr, env) + + Cons(Update(offset), compileDefs(rest, offset - 1)) + } + } + + Cons(Alloc(n), compileDefs(defs, n - 1)) +} + + + +let compiled_primitives : List[(String, Int, List[Instruction])] = List::of( + [ + // Arith + ( + "add", + 2, + List::of([Push(1), Eval, Push(1), Eval, Add, Update(2), Pop(2), Unwind]), + ), + ( + "sub", + 2, + List::of([Push(1), Eval, Push(1), Eval, Sub, Update(2), Pop(2), Unwind]), + ), + ( + "mul", + 2, + List::of([Push(1), Eval, Push(1), Eval, Mul, Update(2), Pop(2), Unwind]), + ), + ( + "div", + 2, + List::of([Push(1), Eval, Push(1), Eval, Div, Update(2), Pop(2), Unwind]), + ), + // Compare + ( + "eq", + 2, + List::of([Push(1), Eval, Push(1), Eval, Eq, Update(2), Pop(2), Unwind]), + ), + ( + "neq", + 2, + List::of([Push(1), Eval, Push(1), Eval, Ne, Update(2), Pop(2), Unwind]), + ), + ( + "ge", + 2, + List::of([Push(1), Eval, Push(1), Eval, Ge, Update(2), Pop(2), Unwind]), + ), + ( + "gt", + 2, + List::of([Push(1), Eval, Push(1), Eval, Gt, Update(2), Pop(2), Unwind]), + ), + ( + "le", + 2, + List::of([Push(1), Eval, Push(1), Eval, Le, Update(2), Pop(2), Unwind]), + ), + ( + "lt", + 2, + List::of([Push(1), Eval, Push(1), Eval, Lt, Update(2), Pop(2), Unwind]), + ), + // MISC + ("negate", 1, List::of([Push(0), Eval, Neg, Update(1), Pop(1), Unwind])), + ( + "if", + 3, + List::of( + [ + Push(0), + Eval, + Cond(List::of([Push(1)]), List::of([Push(2)])), + Update(3), + Pop(3), + Unwind, + ], + ), + ), + ], +) + +// start builtin definition +let builtinOpS : @hashmap.T[String, Instruction] = { + let table = @hashmap.new(capacity = 50) + table["add"] = Add + table["mul"] = Mul + table["sub"] = Sub + table["div"] = Div + table["eq"] = Eq + table["neq"] = Ne + table["ge"] = Ge + table["gt"] = Gt + table["le"] = Le + table["lt"] = Lt + table +} +// end builtin definition \ No newline at end of file diff --git a/next/sources/gmachine/src/part3/instruction.mbt b/next/sources/gmachine/src/part3/instruction.mbt new file mode 100644 index 00000000..c43ae64b --- /dev/null +++ b/next/sources/gmachine/src/part3/instruction.mbt @@ -0,0 +1,30 @@ +// start instr definition +enum Instruction { + Unwind + PushGlobal(String) + PushInt(Int) + Push(Int) + MkApp + Slide(Int) + Update(Int) + Pop(Int) + Alloc(Int) + Eval + Add + Sub + Mul + Div + Neg + Eq // == + Ne // != + Lt // < + Le // <= + Gt // > + Ge // >= + Cond(List[Instruction], List[Instruction]) + Pack(Int, Int) // tag, arity + CaseJump(List[(Int, List[Instruction])]) + Split + Print +} derive (Eq, Show) +// end instr definition \ No newline at end of file diff --git a/next/sources/gmachine/src/part3/moon.pkg.json b/next/sources/gmachine/src/part3/moon.pkg.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/next/sources/gmachine/src/part3/moon.pkg.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/next/sources/gmachine/src/part3/programs.mbt b/next/sources/gmachine/src/part3/programs.mbt new file mode 100644 index 00000000..bb6b38b3 --- /dev/null +++ b/next/sources/gmachine/src/part3/programs.mbt @@ -0,0 +1,41 @@ +let programs : @hashmap.T[String, String] = { + let programs = @hashmap.new(capacity=40) + programs["square"] = + #| (defn square[x] (mul x x)) + programs["fix"] = + #| (defn fix[f] (letrec ([x (f x)]) x)) + programs["isNil"] = + #| (defn isNil[x] + #| (case x [(Nil) 1] [(Cons n m) 0])) + programs["tail"] = + #| (defn tail[l] (case l [(Cons x xs) xs])) + programs["fibs"] = + // fibs = 0 : 1 : zipWith (+) fibs (tail fibs) + #| (defn fibs[] (Cons 0 (Cons 1 (zipWith add fibs (tail fibs))))) + programs["take"] = + #| (defn take[n l] + #| (case l + #| [(Nil) Nil] + #| [(Cons x xs) + #| (if (le n 0) Nil (Cons x (take (sub n 1) xs)))])) + programs["zipWith"] = + #| (defn zipWith[op l1 l2] + #| (case l1 + #| [(Nil) Nil] + #| [(Cons x xs) + #| (case l2 + #| [(Nil) Nil] + #| [(Cons y ys) (Cons (op x y) (zipWith op xs ys))])])) + programs["factorial"] = + #| (defn factorial[n] + #| (if (eq n 0) 1 (mul n (factorial (sub n 1))))) + programs["abs"] = + #| (defn abs[n] + #| (if (lt n 0) (negate n) n)) + programs["length"] = + #| (defn length[l] + #| (case l + #| [(Nil) 0] + #| [(Cons x xs) (add 1 (length xs))])) + programs +} diff --git a/next/sources/gmachine/src/part3/syntax.mbt b/next/sources/gmachine/src/part3/syntax.mbt new file mode 100644 index 00000000..3a59bf26 --- /dev/null +++ b/next/sources/gmachine/src/part3/syntax.mbt @@ -0,0 +1,332 @@ +enum Token { + DefFn + Let + NIL + CONS + Case + Letrec + Open(Char) // { [ ( + Close(Char) // } ] ) + Id(String) + Number(Int) + EOF +} derive(Eq, Show) + +fn between(this : Char, lw : Char, up : Char) -> Bool { + this >= lw && this <= up +} + +fn isDigit(this : Char) -> Bool { + between(this, '0', '9') +} + +fn isAlpha(this : Char) -> Bool { + between(this, 'A', 'Z') || between(this, 'a', 'z') +} + +fn isIdChar(this : Char) -> Bool { + isAlpha(this) || isDigit(this) || this == '_' || this == '-' +} + +fn isWhiteSpace(this : Char) -> Bool { + this == ' ' || this == '\t' || this == '\n' +} + +fn to_number(this : Char) -> Int { + this.to_int() - 48 +} + +fn isOpen(this : Char) -> Bool { + this == '(' || this == '[' || this == '{' +} + +fn isClose(this : Char) -> Bool { + this == ')' || this == ']' || this == '}' +} + +struct Tokens { + tokens : Array[Token] + mut current : Int +} derive(Show) + +fn Tokens::new(tokens : Array[Token]) -> Tokens { + Tokens::{ tokens, current: 0 } +} + +fn peek(self : Tokens) -> Token { + if self.current < self.tokens.length() { + return self.tokens[self.current] + } else { + return EOF + } +} + +type! ParseError String + +fn next(self : Tokens, loc~ : SourceLoc = _) -> Unit { + self.current = self.current + 1 + if self.current > self.tokens.length() { + abort("Tokens::next(): \{loc}") + } +} + +fn eat(self : Tokens, tok : Token, loc~ : SourceLoc = _) -> Unit!ParseError { + let __tok = self.peek() + // assert tok_ != EOF + if __tok != tok { + raise ParseError("\{loc} - Tokens::eat(): expect \{tok} but got \{__tok}") + } else { + self.next() + } +} + +fn tokenize(source : String) -> Tokens { + let tokens : Array[Token] = Array::new(capacity=source.length() / 2) + let mut current = 0 + let source = source.to_array() + fn peek() -> Char { + source[current] + } + + fn next() -> Unit { + current = current + 1 + } + + while current < source.length() { + let ch = peek() + if isWhiteSpace(ch) { + next() + continue + } else if isDigit(ch) { + let mut num = to_number(ch) + next() + while current < source.length() && isDigit(peek()) { + num = num * 10 + to_number(peek()) + next() + } + tokens.push(Number(num)) + continue + } else if isOpen(ch) { + next() + tokens.push(Open(ch)) + continue + } else if isClose(ch) { + next() + tokens.push(Close(ch)) + continue + } else if isAlpha(ch) { + let identifier = @buffer.new(size_hint=42) + identifier.write_char(ch) + next() + while current < source.length() && isIdChar(peek()) { + identifier.write_char(peek()) + next() + } + let identifier = identifier.to_unchecked_string() + match identifier { + "let" => tokens.push(Let) + "letrec" => tokens.push(Letrec) + "Nil" => tokens.push(NIL) + "Cons" => tokens.push(CONS) + "case" => tokens.push(Case) + "defn" => tokens.push(DefFn) + _ => tokens.push(Id(identifier)) + } + } else { + abort("error : invalid Character '\{ch}' in [\{current}]") + } + } else { + return Tokens::new(tokens) + } +} + +test "tokenize" { + inspect!(tokenize("").tokens, content="[]") + inspect!(tokenize("12345678").tokens, content="[Number(12345678)]") + inspect!(tokenize("1234 5678").tokens, content="[Number(1234), Number(5678)]") + inspect!( + tokenize("a0 a_0 a-0").tokens, + content= + #|[Id("a0"), Id("a_0"), Id("a-0")] + , + ) + inspect!( + tokenize("(Cons 0 (Cons 1 Nil))").tokens, + content="[Open('('), CONS, Number(0), Open('('), CONS, Number(1), NIL, Close(')'), Close(')')]", + ) +} + +fn parse_num(self : Tokens) -> Int!ParseError { + match self.peek() { + Number(n) => { + self.next() + return n + } + other => raise ParseError("parse_num(): expect a number but got \{other}") + } +} + +fn parse_var(self : Tokens) -> String!ParseError { + match self.peek() { + Id(s) => { + self.next() + return s + } + other => raise ParseError("parse_var(): expect a variable but got \{other}") + } +} + +fn parse_cons(self : Tokens) -> RawExpr[String]!ParseError { + match self.peek() { + CONS => { + self.next() + let x = self.parse_expr!() + let xs = self.parse_expr!() + return App(App(Constructor(tag=1, arity=2), x), xs) + } + other => raise ParseError("parse_cons(): expect Cons but got \{other}") + } +} + +fn parse_let(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Let) + self.eat!(Open('(')) + let defs = self.parse_defs!() + self.eat!(Close(')')) + let exp = self.parse_expr!() + Let(false, defs, exp) +} + +fn parse_letrec(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Letrec) + self.eat!(Open('(')) + let defs = self.parse_defs!() + self.eat!(Close(')')) + let exp = self.parse_expr!() + Let(true, defs, exp) +} + +fn parse_case(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Case) + let exp = self.parse_expr!() + let alts = self.parse_alts!() + Case(exp, alts) +} + +fn parse_alts( + self : Tokens +) -> List[(Int, List[String], RawExpr[String])]!ParseError { + let acc : List[(Int, List[String], RawExpr[String])] = Nil + loop self.peek(), acc { + Open('['), acc => { + self.next() + self.eat!(Open('(')) + let (tag, variables) = match self.peek() { + NIL => { + self.next() + (0, List::Nil) + } + CONS => { + self.next() + let x = self.parse_var!() + let xs = self.parse_var!() + (1, List::of([x, xs])) + } + other => + raise ParseError("parse_alts(): expect NIL or CONS but got \{other}") + } + self.eat!(Close(')')) + let exp = self.parse_expr!() + let alt = (tag, variables, exp) + self.eat!(Close(']')) + continue self.peek(), Cons(alt, acc) + } + _, acc => acc.rev() + } +} + +fn parse_defs(self : Tokens) -> List[(String, RawExpr[String])]!ParseError { + let acc : List[(String, RawExpr[String])] = Nil + loop self.peek(), acc { + Open('['), acc => { + self.next() + let var = self.parse_var!() + let value = self.parse_expr!() + self.eat!(Close(']')) + continue self.peek(), Cons((var, value), acc) + } + _, acc => acc.rev() + } +} + +fn parse_apply(self : Tokens) -> RawExpr[String]!ParseError { + let mut res = self.parse_expr!() + while self.peek() != Close(')') { + res = App(res, self.parse_expr!()) + } + return res +} + +fn parse_expr(self : Tokens) -> RawExpr[String]!ParseError { + match self.peek() { + EOF => + raise ParseError( + "parse_expr() : expect a token but got a empty token stream", + ) + Number(n) => { + self.next() + Num(n) + } + Id(s) => { + self.next() + Var(s) + } + NIL => { + self.next() + Constructor(tag=0, arity=0) + } + Open('(') => { + self.next() + let exp = match self.peek() { + Let => self.parse_let!() + Letrec => self.parse_letrec!() + Case => self.parse_case!() + CONS => self.parse_cons!() + Id(_) | Open('(') => self.parse_apply!() + other => + raise ParseError("parse_expr(): cant parse \{other} behind a '('") + } + self.eat!(Close(')')) + return exp + } + other => raise ParseError("parse_expr(): cant parse \{other}") + } +} + +fn parse_sc(self : Tokens) -> ScDef[String]!ParseError { + self.eat!(Open('(')) + self.eat!(DefFn) + let fn_name = self.parse_var!() + self.eat!(Open('[')) + let args = loop self.peek(), List::Nil { + tok, acc => + if tok != Close(']') { + let var = self.parse_var!() + continue self.peek(), Cons(var, acc) + } else { + acc.rev() + } + } + self.eat!(Close(']')) + let body = self.parse_expr!() + self.eat!(Close(')')) + ScDef::{ name: fn_name, args, body } +} + +test "parse scdef" { + let test_ = fn!(s) { ignore(tokenize(s).parse_sc!()) } + for p in programs { + let (_, p) = p + test_!(p) + } +} diff --git a/next/sources/gmachine/src/part3/top.mbt b/next/sources/gmachine/src/part3/top.mbt new file mode 100644 index 00000000..bb71c2ab --- /dev/null +++ b/next/sources/gmachine/src/part3/top.mbt @@ -0,0 +1,43 @@ +fn run(codes : List[String]) -> String { + fn parse_then_compile(code : String) -> (String, Int, List[Instruction]) { + let tokens = tokenize(code) + let code = + try { + tokens.parse_sc!() + } catch { + ParseError(s) => abort(s) + } else { + expr => expr + } + let code = compileSC(code) + return code + } + let codes = codes.map(parse_then_compile) + prelude_defs.map(compileSC) + let codes = compiled_primitives + codes + let (heap, globals) = build_initial_heap(codes) + let initialState : GState = { + output : @buffer.new(size_hint = 60), + heap : heap, + stack : Nil, + // start init definition + code : List::of([PushGlobal("main"), Eval, Print]), + // end init definition + globals : globals, + stats : 0, + dump : Nil + } + initialState.reify() +} + + +test "basic eval" { + let basic = [] + for kv in programs.iter() { + let (_, v) = kv + basic.push(v) + } + let basic = List::from_array(basic) + + let main = "(defn main[] (take 20 fibs))" + inspect!(run(Cons(main, basic)), content = "0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 1597 2584 4181 Nil") +} \ No newline at end of file diff --git a/next/sources/gmachine/src/part3/vm.mbt b/next/sources/gmachine/src/part3/vm.mbt new file mode 100644 index 00000000..5f32597f --- /dev/null +++ b/next/sources/gmachine/src/part3/vm.mbt @@ -0,0 +1,415 @@ + +// Use the 'type' keyword to encapsulate an address type. +type Addr Int derive(Eq, Show) + +// Describe graph nodes with an enumeration type. +enum Node { + NNum(Int) + // The application node + NApp(Addr, Addr) + // To store the number of parameters and + // the corresponding sequence of instructions for a super combinator + NGlobal(String, Int, List[Instruction]) + // The Indirection nodeļ¼ŒThe key component of implementing lazy evaluation + NInd(Addr) + NConstr(Int, List[Addr]) +} derive(Eq, Show) + +struct GHeap { + // The heap uses an array, + // and the space with None content in the array is available as free memory. + mut object_count : Int + memory : Array[Node?] +} + +// Allocate heap space for nodes. +fn alloc(self : GHeap, node : Node) -> Addr { + let heap = self + fn next(n : Int) -> Int { + (n + 1) % heap.memory.length() + } + + fn free(i : Int) -> Bool { + match heap.memory[i] { + None => true + _ => false + } + } + + let mut i = heap.object_count + while not(free(i)) { + i = next(i) + } + heap.memory[i] = Some(node) + heap.object_count = heap.object_count + 1 + return Addr(i) +} + + +fn op_get(self : GHeap, key : Addr) -> Node { + let Addr(i) = key + match self.memory[i] { + Some(node) => node + None => abort("GHeap::get(): index \{i} was empty") + } +} + +fn op_set(self : GHeap, key : Addr, val : Node) -> Unit { + self.memory[key._] = Some(val) +} + + +struct GState { + output : @buffer.T + mut stack : List[Addr] + heap : GHeap + globals : @hashmap.T[String, Addr] + mut dump : List[(List[Instruction], List[Addr])] + mut code : List[Instruction] + mut stats : GStats +} + +type GStats Int + +fn stat_incr(self : GState) -> Unit { + self.stats = self.stats._ + 1 +} + +fn put_stack(self : GState, addr : Addr) -> Unit { + self.stack = Cons(addr, self.stack) +} + +fn put_dump(self : GState, codes : List[Instruction], stack : List[Addr]) -> Unit { + self.dump = Cons((codes, stack), self.dump) +} + +fn put_code(self : GState, instrs : List[Instruction]) -> Unit { + self.code = instrs + self.code +} + +fn pop1(self : GState) -> Addr { + match self.stack { + Cons(addr, reststack) => { + self.stack = reststack + addr + } + Nil => abort("pop1(): stack size smaller than 1") + } +} + +// e1 e2 ..... -> (e1, e2) ...... +fn pop2(self : GState) -> (Addr, Addr) { + match self.stack { + Cons(addr1, Cons(addr2, reststack)) => { + self.stack = reststack + (addr1, addr2) + } + _ => abort("pop2(): stack size smaller than 2") + } +} + +fn push_int(self : GState, num : Int) -> Unit { + let addr = self.heap.alloc(NNum(num)) + self.put_stack(addr) +} + +fn push_global(self : GState, name : String) -> Unit { + let sc = self.globals[name] + match sc { + None => abort("push_global(): cant find supercombinator \{name}") + Some(addr) => self.put_stack(addr) + } +} + + +fn push(self : GState, offset : Int) -> Unit { + // Push(n) a0 : . . . : an : s + // => an : a0 : . . . : an : s + let addr = self.stack.unsafe_nth(offset) + self.put_stack(addr) +} + +fn slide(self : GState, n : Int) -> Unit { + let addr = self.pop1() + self.stack = Cons(addr, self.stack.drop(n)) +} + +fn rearrange(self : GState, n : Int) -> Unit { + let appnodes = self.stack.take(n) + let args = appnodes.map(fn (addr) { + match self.heap[addr] { + NApp(_, arg) => arg + _ => panic() + } + }) + self.stack = args + appnodes.drop(n - 1) +} + + +fn mk_apply(self : GState) -> Unit { + let (a1, a2) = self.pop2() + let appaddr = self.heap.alloc(NApp(a1, a2)) + self.put_stack(appaddr) +} +fn update(self : GState, n : Int) -> Unit { + let addr = self.pop1() + let dst = self.stack.unsafe_nth(n) + self.heap[dst] = NInd(addr) +} + + +fn unwind(self : GState) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(_) => { + match self.dump { + Nil => self.put_stack(addr) + Cons((instrs, stack), rest_dump) => { + self.stack = stack + self.put_stack(addr) + self.dump = rest_dump + self.code = instrs + } + } + } + NApp(a1, _) => { + self.put_stack(addr) + self.put_stack(a1) + self.put_code(List::of([Unwind])) + } + // start unwind_g definition + NGlobal(_, n, c) => { + let k = self.stack.length() + if k < n { + match self.dump { + Nil => abort("Unwinding with too few arguments") + Cons((i, s), rest) => { + // a1 : ...... : ak + // || + // ak : s + self.stack = self.stack.drop(k - 1) + s + self.dump = rest + self.code = i + } + } + } else { + if n != 0 { + self.rearrange(n) + } else { + self.put_stack(addr) + } + self.put_code(c) + } + } + // end unwind_g definition + NInd(a) => { + self.put_stack(a) + self.put_code(List::of([Unwind])) + } + NConstr(_, _) => { + match self.dump { + Nil => abort("Unwinding with too few arguments") + Cons((i, s), rest) => { + self.dump = rest + self.stack = s + self.code = i + self.put_stack(addr) + } + } + } + } +} + +fn alloc_nodes(self : GState, n : Int) -> Unit { + let dummynode : Node = NInd(Addr(-1)) + for i = 0; i < n; i = i + 1 { + let addr = self.heap.alloc(dummynode) + self.put_stack(addr) + } +} + +fn eval(self : GState) -> Unit { + let addr = self.pop1() + self.put_dump(self.code, self.stack) + self.stack = List::of([addr]) + self.code = List::of([Unwind]) +} + +fn condition(self : GState, i1 : List[Instruction], i2 : List[Instruction]) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(0) => { + // false + self.code = i2 + self.code + } + NNum(1) => { + // true + self.code = i1 + self.code + } + otherwise => abort("cond : \{addr} = \{otherwise}") + } +} + +fn negate(self : GState) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(n) => { + let addr = self.heap.alloc(NNum(-n)) + self.put_stack(addr) + } + otherwise => { + abort("negate: wrong kind of node \{otherwise}, address \{addr}") + } + } +} + +fn lift_arith2(self : GState, op : (Int, Int) -> Int) -> Unit { + let (a1, a2) = self.pop2() + match (self.heap[a1], self.heap[a2]) { + (NNum(n1), NNum(n2)) => { + let newnode = Node::NNum(op(n1, n2)) + let addr = self.heap.alloc(newnode) + self.put_stack(addr) + } + (node1, node2) => abort("liftArith2: \{a1} = \{node1} \{a2} = \{node2}") + } +} + +fn lift_cmp2(self : GState, op : (Int, Int) -> Bool) -> Unit { + let (a1, a2) = self.pop2() + match (self.heap[a1], self.heap[a2]) { + (NNum(n1), NNum(n2)) => { + let flag = op(n1, n2) + let newnode = if flag { Node::NNum(1) } else { Node::NNum(0) } + let addr = self.heap.alloc(newnode) + self.put_stack(addr) + } + (node1, node2) => abort("liftCmp2: \{a1} = \{node1} \{a2} = \{node2}") + } +} + +// start split_pack definition +fn pack(self : GState, t : Int, n : Int) -> Unit { + let addrs = self.stack.take(n) + self.stack = self.stack.drop(n) + let addr = self.heap.alloc(NConstr(t, addrs)) + self.put_stack(addr) +} + +fn split(self : GState) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NConstr(_, addrs) => { + // n == addrs.length() + self.stack = addrs + self.stack + } + _ => panic() + } +} +// end split_pack definition + +// start casejump definition +fn casejump(self : GState, table : List[(Int, List[Instruction])]) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NConstr(t, _) => { + match table.lookup(t) { + None => abort("casejump") + Some(instrs) => { + self.code = instrs + self.code + self.put_stack(addr) + } + } + } + otherwise => abort("casejump(): addr = \{addr} node = \{otherwise}") + } +} +// end casejump definition + +// start gprint definition +fn gprint(self : GState) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(n) => { + self.output.write_string(n.to_string()) + self.output.write_char(' ') + } + NConstr(0, Nil) => self.output.write_string("Nil") + NConstr(1, Cons(addr1, Cons(addr2, Nil))) => { + self.code = List::of([Instruction::Eval, Print, Eval, Print]) + self.code + self.put_stack(addr2) + self.put_stack(addr1) + } + _ => panic() + } +} +// end gprint definition + + +fn build_initial_heap( + scdefs : List[(String, Int, List[Instruction])] +) -> (GHeap, @hashmap.T[String, Addr]) { + let heap = { object_count: 0, memory: Array::make(10000, None) } + let globals = @hashmap.new(capacity=50) + loop scdefs { + Nil => () + Cons((name, arity, instrs), rest) => { + let addr = heap.alloc(NGlobal(name, arity, instrs)) + globals[name] = addr + continue rest + } + } + return (heap, globals) +} + + + +fn step(self : GState) -> Bool { + match self.code { + Nil => return false + Cons(i, is) => { + self.code = is + self.stat_incr() + match i { + PushGlobal(f) => self.push_global(f) + PushInt(n) => self.push_int(n) + Push(n) => self.push(n) + MkApp => self.mk_apply() + Unwind => self.unwind() + Update(n) => self.update(n) + Pop(n) => self.stack = self.stack.drop(n) + Alloc(n) => self.alloc_nodes(n) + Eval => self.eval() + Slide(n) => self.slide(n) + Add => self.lift_arith2(fn (x, y) { x + y}) + Sub => self.lift_arith2(fn (x, y) { x - y}) + Mul => self.lift_arith2(fn (x, y) { x * y}) + Div => self.lift_arith2(fn (x, y) { x / y}) + Neg => self.negate() + Eq => self.lift_cmp2(fn (x, y) { x == y }) + Ne => self.lift_cmp2(fn (x, y) { x != y }) + Lt => self.lift_cmp2(fn (x, y) { x < y }) + Le => self.lift_cmp2(fn (x, y) { x <= y }) + Gt => self.lift_cmp2(fn (x, y) { x > y }) + Ge => self.lift_cmp2(fn (x, y) { x >= y }) + Cond(i1, i2) => self.condition(i1, i2) + Pack(tag, arity) => self.pack(tag, arity) + CaseJump(alts) => self.casejump(alts) + Split => self.split() + Print => self.gprint() + } + return true + } + } +} + + + +fn reify(self : GState) -> String { + if self.step() { + self.reify() + } else { + self.output.to_unchecked_string() + } +} + diff --git a/next/tutorial/example/gmachine/gmachine-3.md b/next/tutorial/example/gmachine/gmachine-3.md index e6cb7625..c218cd40 100644 --- a/next/tutorial/example/gmachine/gmachine-3.md +++ b/next/tutorial/example/gmachine/gmachine-3.md @@ -4,26 +4,12 @@ This article is the third in a series on implementing Haskell's lazy evaluation ## Tracking Context -Let's review how we implemented primitives in the [last tutorial](https://www.moonbitlang.com/docs/examples/gmachine-2). - -```rust -let compiledPrimitives : List[(String, Int, List[Instruction])] = List::[ - // Arithmetic - ("add", 2, List::[Push(1), Eval, Push(1), Eval, Add, Update(2), Pop(2), Unwind]), - ("sub", 2, List::[Push(1), Eval, Push(1), Eval, Sub, Update(2), Pop(2), Unwind]), - ("mul", 2, List::[Push(1), Eval, Push(1), Eval, Mul, Update(2), Pop(2), Unwind]), - ("div", 2, List::[Push(1), Eval, Push(1), Eval, Div, Update(2), Pop(2), Unwind]), - // Comparison - ("eq", 2, List::[Push(1), Eval, Push(1), Eval, Eq, Update(2), Pop(2), Unwind]), - ("neq", 2, List::[Push(1), Eval, Push(1), Eval, Ne, Update(2), Pop(2), Unwind]), - ("ge", 2, List::[Push(1), Eval, Push(1), Eval, Ge, Update(2), Pop(2), Unwind]), - ("gt", 2, List::[Push(1), Eval, Push(1), Eval, Gt, Update(2), Pop(2), Unwind]), - ("le", 2, List::[Push(1), Eval, Push(1), Eval, Le, Update(2), Pop(2), Unwind]), - ("lt", 2, List::[Push(1), Eval, Push(1), Eval, Lt, Update(2), Pop(2), Unwind]), - // Miscellaneous - ("negate", 1, List::[Push(0), Eval, Neg, Update(1), Pop(1), Unwind]), - ("if", 3, List::[Push(0), Eval, Cond(List::[Push(1)], List::[Push(2)]), Update(3), Pop(3), Unwind]) -] +Let's review how we implemented primitives in the [last tutorial](gmachine-2.md). + +```{literalinclude} /sources/gmachine/src/part2/compile.mbt +:language: moonbit +:start-after: start prim definition +:end-before: end prim definition ``` This implementation introduces many `Eval` instructions, but they are not always necessary. For example: @@ -46,74 +32,50 @@ We use the `compileE` function to implement compilation in a strict context, ens For the default branch, we simply add an `Eval` instruction after the result of `compileC`. -```rust -append(compileC(self, env), List::[Eval]) +```{literalinclude} /sources/gmachine/src/part3/compile.mbt +:language: moonbit +:start-after: start default definition +:end-before: end default definition ``` Constants are pushed directly. -```rust -Num(n) => List::[PushInt(n)] +```{literalinclude} /sources/gmachine/src/part3/compile.mbt +:language: moonbit +:start-after: start num definition +:end-before: end num definition ``` For `let/letrec` expressions, the specially designed `compileLet` and `compileLetrec` become useful. Compiling a `let/letrec` expression in a strict context only requires using `compileE` to compile its main expression. -```rust -Let(rec, defs, e) => { - if rec { - compileLetrec(compileE, defs, e, env) - } else { - compileLet(compileE, defs, e, env) - } -} +```{literalinclude} /sources/gmachine/src/part3/compile.mbt +:language: moonbit +:start-after: start let definition +:end-before: end let definition ``` The `if` and `negate` functions, with 3 and 1 arguments respectively, require special handling. -```rust -App(App(App(Var("if"), b), e1), e2) => { - let condition = compileE(b, env) - let branch1 = compileE(e1, env) - let branch2 = compileE(e2, env) - append(condition, List::[Cond(branch1, branch2)]) -} -App(Var("negate"), e) => { - append(compileE(e, env), List::[Neg]) -} +```{literalinclude} /sources/gmachine/src/part3/compile.mbt +:language: moonbit +:start-after: start if_and_neg definition +:end-before: end if_and_neg definition ``` Basic binary operations can be handled uniformly through a lookup table. First, construct a hash table called `builtinOpS` to query the corresponding instructions by the name of the primitive. -```rust -let builtinOpS : RHTable[String, Instruction] = { - let table : RHTable[String, Instruction] = RHTable::new(50) - table["add"] = Add - table["mul"] = Mul - table["sub"] = Sub - table["div"] = Div - table["eq"] = Eq - table["neq"] = Ne - table["ge"] = Ge - table["gt"] = Gt - table["le"] = Le - table["lt"] = Lt - table -} +```{literalinclude} /sources/gmachine/src/part3/compile.mbt +:language: moonbit +:start-after: start builtin definition +:end-before: end builtin definition ``` The rest of the handling is not much different. -```rust -App(App(Var(op), e0), e1) => { - match builtinOpS[op] { - None => append(compileC(self, env), List::[Eval]) // Not a primitive op, use the default branch - Some(instr) => { - let code1 = compileE(e1, env) - let code0 = compileE(e0, argOffset(1, env)) - append(code1, append(code0, List::[instr])) - } - } -} +```{literalinclude} /sources/gmachine/src/part3/compile.mbt +:language: moonbit +:start-after: start binop definition +:end-before: end binop definition ``` Are we done? It seems so, but there's another WHNF besides integers: partially applied functions. @@ -128,26 +90,10 @@ Here, `(add 1)` is a partial application. To ensure that the code generated by the new compilation strategy works correctly, we need to modify the implementation of the `Unwind` instruction for the `NGlobal` branch. When the number of arguments is insufficient and the dump has saved stacks, we should only retain the original redex and restore the stack. -```rust -NGlobal(_, n, c) => { - let k = length(self.stack) - if k < n { - match self.dump { - Nil => abort("Unwinding with too few arguments") - Cons((i, s), rest) => { - // a1 : ...... : ak - // || - // ak : s - // Retain the redex and restore the stack - self.stack = append(drop(self.stack, k - 1), s) - self.dump = rest - self.code = i - } - } - } else { - ...... - } -} +```{literalinclude} /sources/gmachine/src/part3/vm.mbt +:language: moonbit +:start-after: start unwind_g definition +:end-before: end unwind_g definition ``` This context-based strictness analysis technique is useful but cannot do anything with supercombinator calls. Here we briefly introduce a strictness analysis technique based on boolean operations, which can analyze which arguments of a supercombinator call should be compiled using strict mode. @@ -198,103 +144,48 @@ The corresponding graph node for a list is `NConstr(Int, List[Addr])`, which con We need to add two instructions, `Split` and `Pack`, to deconstruct and construct lists. -```rust -fn split(self : GState, n : Int) -> Unit { - let addr = self.pop1() - match self.heap[addr] { - NConstr(_, addrs) => { - // n == addrs.length() - self.stack = addrs + self.stack - } - } -} - -fn pack(self : GState, t : Int, n : Int) -> Unit { - let addrs = self.stack.take(n) - // Assume the number of arguments is sufficient - self.stack = self.stack.drop(n) - let addr = self.heap.alloc(NConstr(t, addrs)) - self.putStack(addr) -} +```{literalinclude} /sources/gmachine/src/part3/vm.mbt +:language: moonbit +:start-after: start split_pack definition +:end-before: end split_pack definition ``` Additionally, a `CaseJump` instruction is needed to implement the `case` expression. -```rust -fn casejump(self : GState, table : List[(Int, List[Instruction])]) -> Unit { - let addr = self.pop1() - match self.heap[addr] { - NConstr(t, addrs) => { - match lookupENV(table, t) { - None => abort("casejump") - Some(instrs) => { - self.code = instrs + self.code - self.putStack(addr) - } - } - } - otherwise => abort("casejump(): addr = \{addr} node = \{otherwise}") - } -} +```{literalinclude} /sources/gmachine/src/part3/vm.mbt +:language: moonbit +:start-after: start casejump definition +:end-before: end casejump definition ``` After adding the above instructions, we need to modify the `compileC` and `compileE` functions. Since the object matched by the `case` expression needs to be evaluated to WHNF, only the `compileE` function can compile it. -```rust -// compileE - Case(e, alts) => { - compileE(e, env) + List::[CaseJump(compileAlts(alts, env))] - } - Constructor(0, 0) => { - // Nil - List::[Pack(0, 0)] - } - App(App(Constructor(1, 2), x), xs) => { - // Cons(x, xs) - compileC(xs, env) + compileC(x, argOffset(1, env)) + List::[Pack(1, 2)] - } - -// compileC - App(App(Constructor(1, 2), x), xs) => { - // Cons(x, xs) - compileC(xs, env) + compileC(x, argOffset(1, env)) + List::[Pack(1, 2)] - } - Constructor(0, 0) => { - // Nil - List::[Pack(0, 0)] - } +```{literalinclude} /sources/gmachine/src/part3/compile.mbt +:language: moonbit +:start-after: start c_constr definition +:end-before: end c_constr definition +``` + +```{literalinclude} /sources/gmachine/src/part3/compile.mbt +:language: moonbit +:start-after: start e_constr_case definition +:end-before: end e_constr_case definition ``` At this point, a new problem arises. Previously, printing the evaluation result only needed to handle simple `NNum` nodes, but `NConstr` nodes have substructures. When the list itself is evaluated to WHNF, its substructures are mostly unevaluated `NApp` nodes. We need to add a `Print` instruction, which will recursively evaluate and write the result into the `output` component of `GState`. -```rust -struct GState { - output : Buffer - ...... -} - -fn gprint(self : GState) -> Unit { - let addr = self.pop1() - match self.heap[addr] { - NNum(n) => { - self.output.write_string(n.to_string()) - self.output.write_char(' ') - } - NConstr(0, Nil) => self.output.write_string("Nil") - NConstr(1, Cons(addr1, Cons(addr2, Nil))) => { - // Force evaluation of addr1 and addr2 is required, so we execute Eval instructions first - self.code = List::[Instruction::Eval, Print, Eval, Print] + self.code - self.putStack(addr2) - self.putStack(addr1) - } - } -} +```{literalinclude} /sources/gmachine/src/part3/vm.mbt +:language: moonbit +:start-after: start gprint definition +:end-before: end gprint definition ``` Finally, change the initial code of the G-Machine to: -```rust -let initialCode : List[Instruction] = List::[PushGlobal("main"), Eval, Print] +```{literalinclude} /sources/gmachine/src/part3/top.mbt +:language: moonbit +:start-after: start init definition +:end-before: end init definition ``` Now, we can write some classic functional programs using lazy lists, such as the infinite Fibonacci sequence: