From dec3195753c9fa186ee4deee3de9f951e2142f00 Mon Sep 17 00:00:00 2001 From: Kirill Mitkin Date: Fri, 19 Apr 2024 07:38:01 +0000 Subject: [PATCH 1/2] solve A01 task solve A02 task solve A03 task solve A04 task solve A05 task solve A06-int task solve A06 task solve A07 task solve A08 task solve A09 task solve A10 task --- .gitignore | 17 +++ regression/Makefile | 4 +- runtime/gc_runtime.s | 11 +- runtime/runtime.c | 164 ++++++++++++++++++--- src/Builtins.lama | 22 ++- src/Driver.lama | 28 ++-- src/Expr.lama | 159 +++++++++++++++++++- src/Makefile | 2 +- src/Parser.lama | 184 ++++++++++++++++------- src/SM.lama | 337 +++++++++++++++++++++++++++++++++++++++---- src/State.lama | 13 +- src/X86.lama | 268 ++++++++++++++++++++++++++-------- 12 files changed, 1013 insertions(+), 196 deletions(-) create mode 100644 .gitignore mode change 100644 => 100755 runtime/runtime.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..a8cb0ad80b --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +* +!/**/ +!*.* +*.aux +*.log +!/regression/orig/*.log +*.out +*.o +*.a +*.sm +*.i +*.s +*~ +*.aux +*.fdb_* +*.fls +!Makefile diff --git a/regression/Makefile b/regression/Makefile index fa994295d9..6914754d67 100644 --- a/regression/Makefile +++ b/regression/Makefile @@ -10,8 +10,8 @@ check: $(TESTS)# expr_tests $(TESTS): %: %.lama @echo $@ -# @ulimit -s -S 32768; cat $@.input | $(LAMAC) $@.lama -i > $@.log && diff $@.log orig/$@.log -# @ulimit -s -S 32768; cat $@.input | $(LAMAC) $@.lama -s > $@.log && diff $@.log orig/$@.log + # @ulimit -s -S 32768; cat $@.input | $(LAMAC) $@.lama -i > $@.log && diff $@.log orig/$@.log + # @ulimit -s -S 32768; cat $@.input | $(LAMAC) $@.lama -s > $@.log && diff $@.log orig/$@.log $(LAMAC) $@.lama && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log # expr_tests: diff --git a/runtime/gc_runtime.s b/runtime/gc_runtime.s index c3d8ccab32..115dbb68e8 100644 --- a/runtime/gc_runtime.s +++ b/runtime/gc_runtime.s @@ -28,11 +28,18 @@ L__gc_init: movl %esp, __gc_stack_bottom // then set @__gc_stack_top to %ebp // else return __pre_gc: - call nimpl + cmpl $0, __gc_stack_top + jnz __ret_lab + movl %ebp, __gc_stack_top + ret // ================================================== // if __gc_stack_top was set by one of the callers // then return // else set __gc_stack_top to 0 __post_gc: - call nimpl + cmpl __gc_stack_top, %ebp + jnz __ret_lab + movl $0, __gc_stack_top +__ret_lab: + ret diff --git a/runtime/runtime.c b/runtime/runtime.c old mode 100644 new mode 100755 index 5439e43265..604815529d --- a/runtime/runtime.c +++ b/runtime/runtime.c @@ -1,3 +1,5 @@ +#define _GNU_SOURCE 1 + #include #include #include @@ -161,6 +163,7 @@ extern void *Bsexp(int bn, ...) sexp *r; data *d; int n = UNBOX(bn); + __pre_gc(); r = (sexp *)alloc(sizeof(int) * (n + 1)); d = &(r->contents); @@ -181,6 +184,7 @@ extern void *Bsexp(int bn, ...) r->tag = UNBOX(va_arg(args, int)); va_end(args); + __post_gc(); return d->contents; } @@ -192,11 +196,13 @@ void *Barray(int n0, ...) int i, ai; data *r; + __pre_gc(); + r = (data *)alloc(sizeof(int) * (n + 1)); r->tag = ARRAY_TAG | (n << 3); - va_start(args, n); + va_start(args, n0); for (i = 0; i < n; i++) { @@ -205,6 +211,8 @@ void *Barray(int n0, ...) } va_end(args); + __post_gc(); + return r->contents; } @@ -569,7 +577,7 @@ extern void Bmatch_failure(void *v, char *fname, int line, int col) /* Globals @__gc_data_end and @__gc_data_start are used to idenfity the begin and the end */ /* of the static data area. They are defined while generating X86 code in src/X86.lama. */ /* 2) Program stack. */ -/* Globals @__gc_stack_bottom and @__gc_stack_top (see runctime/gc_runtime.s) have to be set */ +/* Globals @__gc_stack_bottom and @__gc_stack_top (see runtime/gc_runtime.s) have to be set */ /* as the begin and the end of program stack or its part where roots can be found. */ /* 3) Traditionally, roots can be also found in registers but our compiler always saves all */ /* registers on program stack before any external function call. */ @@ -580,14 +588,15 @@ extern void Bmatch_failure(void *v, char *fname, int line, int col) // The begin and the end of static area (are specified in src/X86.lama fucntion genasm) extern const size_t __gc_data_end, __gc_data_start; +extern const size_t* __gc_stack_top, *__gc_stack_bottom; -// @L__gc_init is defined in runtime/runtime.s +// @L__gc_init is defined in runtime/gc_runtime.s // it sets up stack bottom and calls init_pool // it is called from the main function (see src/X86.lama function compileX86) extern void L__gc_init(); // You also have to define two functions @__pre_gc and @__post_gc in runtime/gc_runtime.s. -// These auxiliary functions have to be defined in oder to correctly set @__gc_stack_top. +// These auxiliary functions have to be defined in order to correctly set @__gc_stack_top. // Note that some of our functions (from runtime.c) activation records can be on top of the // program stack. These activation records contain usual values and thus we do not have a // way to distinguish pointers from non-pointers. And some of these values may accidentally be @@ -612,21 +621,26 @@ typedef struct static pool from_space; // From-space (active ) semi-heap static pool to_space; // To-space (passive) semi-heap -static size_t *current; // Pointer to the free space begin in active space + +const int WORD = sizeof(int); // initial semi-space size -static size_t SPACE_SIZE = 8; -#define POOL_SIZE (2 * SPACE_SIZE) +static size_t SPACE_SIZE = 1 * WORD; // Less than PAGE_SIZE will be rounded to PAGE_SIZE in mmap + // Small SPACE_SIZE for at least one gc call in tests -// @init_to_space initializes to_space -// @flag is a flag: if @SPACE_SIZE has to be increased or not -static void init_to_space(int flag) { NIMPL } +void init_space(pool* space); // @free_pool frees memory pool p -static int free_pool(pool *p) { NIMPL } +int free_pool(pool *p) { + munmap((void*)p->begin, p->size); +} // swaps active and passive spaces -static void gc_swap_spaces(void){NIMPL} +static void gc_swap_spaces(void) { + pool old_from = from_space; + from_space = to_space; + to_space = old_from; +} // checks if @p is a valid pointer to the active (from-) space #define IS_VALID_HEAP_POINTER(p) \ @@ -646,21 +660,135 @@ static void gc_swap_spaces(void){NIMPL} // @extend_spaces extends size of both from- and to- spaces static void extend_spaces(void) { - NIMPL + SPACE_SIZE <<= 1; + from_space.begin = mremap(from_space.begin, from_space.size, SPACE_SIZE, MREMAP_MAYMOVE); + if (from_space.begin == MAP_FAILED) { + perror("extend spaces: mremap from_space failed\n"); + exit(1); + } + to_space.begin = mremap(to_space.begin, to_space.size, SPACE_SIZE, 0); // move invalidate all pointers + // need to write moving of all pointers or call to gc + if (to_space.begin == MAP_FAILED) { + perror("extend spaces: mremap to_space failed\n"); + exit(1); + } +} + +void clear_space(pool* space) { + space->current = space->begin; + space->end = space->begin + SPACE_SIZE / WORD; + space->size = SPACE_SIZE; +} + +void init_space(pool* space) { + space->begin = mmap(NULL, SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + clear_space(space); } // @init_pool is a memory pools initialization function // (is called by L__gc_init from runtime/gc_runtime.s) -extern void init_pool(void) { NIMPL } +// two diffent mmaps for flexibility: +// it's allows us to remap free space if required without moving pointers +extern void init_pool(void) { + init_space(&from_space); + init_space(&to_space); +} + +size_t round_word(size_t num) { + return (num + WORD - 1) / WORD; +} + +void gc_test_and_copy_root(size_t** root); + +void *gc_copy(size_t *p) { + data* obj = TO_DATA(p); + if (IS_FORWARD_PTR((size_t*)obj->tag)) { + return (void*)obj->tag; + } + switch(TAG(obj->tag)) { + case ARRAY_TAG: { + size_t len = LEN(obj->tag); + memcpy((void*)to_space.current, (void*)obj, (len + 1) * WORD); + size_t* pos = to_space.current + 1; + obj->tag = (int)pos; + to_space.current += 1 + len; + size_t* first = pos; + for (size_t* it = first; it < first + len; ++it) { + gc_test_and_copy_root((size_t**)it); + } + return pos; + } + case SEXP_TAG: { + sexp* s = TO_SEXP(p); + size_t len = LEN(obj->tag); + memcpy((void*)to_space.current, (void*)s, (len + 2) * WORD); + size_t* pos = to_space.current + 2; + obj->tag = (int)pos; + to_space.current += 2 + len; + size_t* first = pos; + for (size_t* it = first; it < first + len; ++it) { + gc_test_and_copy_root((size_t**)it); + } + return pos; + } + case STRING_TAG: { + size_t len = LEN(obj->tag); + // [tag, c_str, '\0'] + memcpy((char*)to_space.current, (void*)obj, WORD + len + 1); + size_t* pos = to_space.current + 1; + obj->tag = (int)pos; + to_space.current += 1 + round_word(len); + return pos; + } + default: + failure("gc copy: tag: 0x%x\n",TAG(obj->tag)); + } +} + +void gc_test_and_copy_root(size_t **root) { + if (IS_VALID_HEAP_POINTER(*root)) { + *root = gc_copy(*root); + } +} // @gc performs stop-the-world copying garbage collection // and extends pools (i.e. calls @extend_spaces) if necessarily // @size is a size of the block that @alloc failed to allocate -// returns a pointer the new free block -static void *gc(size_t size) { NIMPL } +static void gc(size_t size) { + // fprintf(stderr, "gc called on %d\n", size); + // Static data + for (size_t* i = (size_t*)&__gc_data_start; i < &__gc_data_end; i++) { + gc_test_and_copy_root((size_t**)i); + } + // Stack + for (size_t* i = (size_t*)__gc_stack_top; i < __gc_stack_bottom; i++) { + gc_test_and_copy_root((size_t**)i); + } + // Extra roots + for (int i = 0; i < extra_roots.current_free; ++i) { + gc_test_and_copy_root((size_t**)extra_roots.roots[i]); + } + // Equal to avoid empty arrays and sexps (their content == space.end) + while (to_space.end - to_space.current <= size) { + extend_spaces(); + to_space.size = SPACE_SIZE; + to_space.end = to_space.begin + SPACE_SIZE / WORD; + } + clear_space(&from_space); + gc_swap_spaces(); +} // @alloc allocates @size memory words -// it enaibles garbage collection if out-of-memory, +// it enables garbage collection if out-of-memory, // i.e. calls @gc when @current + @size > @from_space.end // returns a pointer to the allocated block of size @size -extern void *alloc(size_t size) { NIMPL } +extern void *alloc(size_t size) { + size = round_word(size); + // Equal to avoid empty arrays and sexps (their content == space.end) + if (from_space.current + size >= from_space.end) { + gc(size); + } + size_t *free = from_space.current; + from_space.current += size; + return free; +} diff --git a/src/Builtins.lama b/src/Builtins.lama index ccb3623179..953159aae2 100644 --- a/src/Builtins.lama +++ b/src/Builtins.lama @@ -1,14 +1,24 @@ -- Builtins import World; +public fun isBuiltin (name) { + case name of + "stringval" -> true + | "length" -> true + | "read" -> true + | "write" -> true + | _ -> false + esac +} + public fun evalBuiltin (name, args, w) { case [name, args] of - ["stringval", {a}] -> [a.string, w] - | ["length" , {a@#array}] -> [a.length, w] - | ["length" , {a@#str}] -> [a.length, w] - | ["length" , {Sexp (_, vs)}] -> [vs.length, w] - | ["read" , {}] -> readWorld (w) - | ["write" , {x@#val}] -> [0, writeWorld (x, w)] + ["stringval", {a}] -> [a.string, w] + | ["length" , {a@#array}] -> [a.length, w] + | ["length" , {a@#str}] -> [a.length, w] + | ["length" , {Sexp (_, vals)}] -> [vals.length, w] + | ["read" , {}] -> readWorld (w) + | ["write" , {x@#val}] -> [0, writeWorld (x, w)] | _ -> failure ("no builtin ""%s"" or it can not be applied to %s\n", name, args.string) esac diff --git a/src/Driver.lama b/src/Driver.lama index 6ea799a250..337e3c3b16 100644 --- a/src/Driver.lama +++ b/src/Driver.lama @@ -26,12 +26,12 @@ fun parseArgs (args) { fun setDump (m) { case m of - SM -> smDump - | AST -> astDump - | TIME -> timeDump + SM -> smDump + | AST -> astDump + | TIME -> timeDump esac ::= true } - + fun setMode (m) { case deref (mode) of Comp -> mode ::= m @@ -45,7 +45,7 @@ fun parseArgs (args) { | _ -> failure ("omitting ""%s"", input file name already set to ""%s""\n", fn, deref (infile)) esac } - + fix (fun (rec) { fun (args) { case args of @@ -69,7 +69,7 @@ fun parseArgs (args) { fun () {deref (smDump)}, fun () {deref (astDump)}, fun () {deref (timeDump)} - ] + ] } -- Logging @@ -81,7 +81,7 @@ fun createLog (args) { x86Time = ref (None), evalTime = ref (None), evalSMTime = ref (None); - + fun logAST (p, t) { ast ::= p; parseTime ::= Some (t); @@ -132,7 +132,7 @@ fun createLog (args) { logEval, logSMEval, print - ] + ] } -- Accessor functions @@ -167,7 +167,7 @@ var args = parseArgs (arrayList (sysargs).tl), t = timer (), log = createLog (ar case parseString (syntax (parse -end), fread (args.getInFile)) of Succ (program) -> log.logAST (program, t ()); log.print; - t := timer (); + t := timer (); case args.getMode of Comp -> var sm = log.logSM (compileSM (program), t ()); t := timer (); @@ -188,16 +188,16 @@ case parseString (syntax (parse -end), fread (args.getInFile)) of Int -> var out = evalExpr (input, program); log.logEval (t ()); out - + | SM -> var sm = log.logSM (compileSM (program), t ()), out; t := timer (); out := evalSM (input, sm); log.logSMEval (t ()); out - esac) - esac; + esac) + esac; log.print - -| x@Fail (err, line, col) -> + +| x@Fail (err, line, col) -> failure ("%s at %d:%d\n", err.hd, line, col) esac diff --git a/src/Expr.lama b/src/Expr.lama index 193e8b9b53..d87ddf2222 100644 --- a/src/Expr.lama +++ b/src/Expr.lama @@ -57,7 +57,14 @@ public fun evalOp (op, l, r) { -- Array (expr list) | -- Elem (expr, expr) | -- ElemRef (expr, expr) | --- Builtin (string, expr list) +-- Builtin (string, expr list) | +-- Case (expr, (pat, expr)+) + +-- pat = Const (int) | +-- Wildcard | +-- Array (pat*) | +-- Sexp (string, pat*) | +-- Named (string, pat) -- Helper function: checks that given name designates a regular variable in -- a given state @@ -78,15 +85,15 @@ fun lookupFun (state, name) { } -- Helper function: adds a bunch of regular variables current scope -fun addNames (state, names) { - foldl (fun (s, name) {s.addName (name, Val (0))}, state, names) +fun addNamesConf ([state, world], names, values) { + [foldl (fun (s, [name, value]) {s.addName (name, value)}, state, zip(names, values)), world] } -- Helper function: adds a function in current scope fun addFunction (state, name, args, body) { state.addName (name, Fun (args, body)) } - + -- Evaluates a list of expressions, properly threading a configurations. -- Returns the final configuration and the list of values fun evalList (c, exprs) { @@ -101,10 +108,150 @@ fun evalList (c, exprs) { esac } -fun eval (c@[s, w], expr) { - failure ("evalExpr not implemented\n") +fun enterScopeConf (c@[s, w]) { + [enterScope(s), w] +} + +fun leaveScopeConf (c@[s, w]) { + [leaveScope(s), w] +} + +fun enterFunctionConf(c@[s, w]) { + [enterFunction(s), w] +} + +public fun unwrapSexp(v) { + case v of + Sexp(_, a) -> a + | a -> a + esac +} + +fun all(l, f) { + foldr(fun (a, el){ + a && f(el) + }, true, l) } +fun matchPatValueLists(pats, values) { + if size(pats) == values.length then + all(zip(pats, arrayList(values)), fun ([el, v]) {match(el, v)}) else false fi +} + +fun match(pat, value) { + -- fprintf(stderr, "Match %s %s\n", pat.string, value.string); + case pat of + Wildcard -> true + | Const(n) -> n == value + | Array(pats) -> matchPatValueLists(pats, value) + | Sexp(pName, pArgs) -> case value of + Sexp(vName, vArgs) -> if compare(vName, pName) == 0 then + matchPatValueLists(pArgs, vArgs) else false fi + | _ -> false + esac + | Named(_, pat) -> match(pat, value) + esac +} + +fun evalPats(c@[st, w], v, pats) { + case pats of + [p, e] : tail -> + if match(p, v) then eval(c, e) + else evalPats(c, v, tail) fi + | [] -> failure("Match failure") + esac +} + +fun eval (c@[st, w], e) { + -- fprintf(stderr, "%s\n", e.string); + case e of + Const (n) -> [c, n] + | Var (x) -> [c, lookup(st, x)] + | Ref (x) -> [c, Ref (x)] + | Binop (op, e1, e2) -> + case evalList(c, {e1, e2}) of + [c, {w, v}] -> [c, evalOp (op, w, v)] + esac + | Skip -> [c, Bottom] + | Assn (name, expr) -> + case evalList(c, {name, expr}) of + [[st, w], {Ref (x), value}] -> [[st <- [x, value], w], value] + | [[st, w], {ElemRef (arr, i), value}] -> [[st, w], arr[i] := value] + esac + | Read (name) -> + case readWorld (w) of + [z, w] -> [[st <- [name, z], w], Bottom] + esac + | Write (expr) -> + case eval(c, expr) of + [[st, w], value] -> [[st, writeWorld (value, w)], Bottom] + esac + | Seq (s1, s2) -> + case evalList(c, {s1, s2}) of + [c, {_, v2}] -> [c, v2] + esac + | If (e, s1, s2) -> + case eval(c, e) of + [c, 0] -> eval(c, s2) + | [c, n] -> eval(c, s1) + esac + | While (body, s) -> + case eval(c, body) of + [c, 0] -> [c, Bottom] + | [c, n] -> eval(c, Seq(s, e)) + esac + | DoWhile (s, e) -> + eval(eval(c, s)[0], While(e, s)) + | Ignore (e) -> [eval(c, e)[0], Bottom] + | Scope(defs, exprs) -> + case eval(foldl(fun ([acc_st, w], def) { + [case def of + Fun (name, args, body) -> addFunction(acc_st, name, args, body) + | Var (name) -> addName(acc_st, name, 0) + esac, w] + }, enterScopeConf(c), defs), exprs) of + [c, v] -> [leaveScopeConf(c), v] + esac + | Call (x, arg_vals) -> + case lookup(st, x) of + Fun(arg_names, body) -> + case evalList(c, arg_vals) of + [new_c, arg_vals] -> + case eval(addNamesConf(enterFunctionConf(new_c), arg_names, arg_vals), body) of + [[c, w], value] -> [[leaveFunction(new_c[0], getGlobal(c)), w], value] + esac + esac + esac + | Builtin(x, arg_vals) -> + case evalList(c, arg_vals) of + [c, arg_vals] -> + case evalBuiltin(x, arg_vals, w) of + [v, w] -> [[st, w], v] + esac + esac + | String(s) -> [c, s] + | Sexp(s, vals) -> + case evalList(c, vals) of + [c, vals] -> [c, Sexp(s, listArray(vals))] + esac + | Array(vals) -> + case evalList(c, vals) of + [c, vals] -> [c, listArray(vals)] + esac + | Elem(e, i) -> + case evalList(c, {e, i}) of + [c, {e, i}] -> [c, unwrapSexp(e)[i]] + esac + | ElemRef(e, i) -> + case evalList(c, {e, i}) of + [c, {e, i}] -> [c, ElemRef(unwrapSexp(e), i)] + esac + | Case(e, brs) -> + case eval(c, e) of + [c, v] -> evalPats(c, v, brs) + esac + esac +} -- Evaluates a program with a given input and returns an output public fun evalExpr (input, expr) { diff --git a/src/Makefile b/src/Makefile index c243d83e42..fa7b033dd3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -9,7 +9,7 @@ LAMAC=lamac all: Parser.o Lexer.o X86.o SM.o Manifest.o $(LAMAC) -g -I . -o lama-impl Driver.lama -Parser.o: Lexer.o +Parser.o: Lexer.o Builtins.o Builtins.o: World.o diff --git a/src/Parser.lama b/src/Parser.lama index df3da2b616..5f0f35fc4d 100644 --- a/src/Parser.lama +++ b/src/Parser.lama @@ -1,4 +1,4 @@ --- Parser +-- Parser import Ostap; import Lexer; @@ -8,6 +8,7 @@ import Matcher; import Ref; import Collection; import STM; +import Builtins; -- Signals an error; takes an error message and location info public fun error (msg, loc) { @@ -40,7 +41,7 @@ fun freshName (env) { -- Checks if a plain value "val" can be used in the context described by -- the attribute "atr". -fun assertValue (atr, vl, loc) { +fun assertWeakOrValue (atr, vl, loc) { case atr of Ref -> error ("reference expected", loc) | Void -> vl => fun (vl) {Ignore (vl)} @@ -50,7 +51,7 @@ fun assertValue (atr, vl, loc) { -- Checks if a void epxression can be used in the context described by -- the attribute "atr". -fun assertVoid (atr, vl, loc) { +fun assertWeakOrVoid (atr, vl, loc) { case atr of Void -> vl | Val -> error ("value expected", loc) @@ -65,11 +66,15 @@ fun inbr (l, p, r) { syntax (-l p -r) } +fun prs (p) { + syntax (p) +} + -- A helper function to be used with super-combinator "expr" fun binop (op) { [syntax (pos -s[op]), fun (l, loc, r) { fun (a) { - assertValue (a, l (Val) =>> fun (l) { + assertWeakOrValue (a, l (Val) =>> fun (l) { r (Val) => fun (r) { Binop (op, l, r) }}, loc) @@ -92,10 +97,10 @@ fun list (item) { -- initializers into assignments fun expandScope (defs, expr) { fun expandVarDefs (defs, expr) { - foldr (fun ([defs, expr], def) { + foldr (fun ([defs, expr], def) { case def of - [ident, None] -> [ident : defs, expr] - | [ident, Some (value)] -> [ident : defs, Seq (Ignore (Set (ident, value)), expr)] + [ident, None] -> [Var(ident) : defs, expr] + | [ident, Some (value)] -> [Var(ident) : defs, Seq (Ignore (Assn (Ref(ident), value)), expr)] esac }, [{}, expr], @@ -103,31 +108,31 @@ fun expandScope (defs, expr) { } fun expandValDefs (defs, expr) { - foldr (fun ([defs, expr], [ident, value]) { - [ident : defs, Seq (Ignore (Set (ident, value)), expr)] + foldr (fun ([defs, expr], [ident, value]) { + [Var(ident) : defs, Seq (Ignore (Assn (Ref(ident), value)), expr)] }, [{}, expr], defs) } - - case + + case foldr (fun ([defs, expr], def) { case def of f@Fun (_, _, _) -> [f : defs, expr] | Val (ds) -> case expandValDefs (ds, expr) of - [ds, expr] -> [Val (ds) : defs, expr] + [ds, expr] -> [ds +++ defs, expr] esac - | Var (ds) -> + | Var (ds) -> case expandVarDefs (ds, expr) of - [ds, expr] -> [Var (ds) : defs, expr] + [ds, expr] -> [ds +++ defs, expr] esac esac }, [{}, expr], defs) of [defs, expr] -> Scope (defs, expr) - esac + esac } -- Helper AST function: distributes a scope through an expression @@ -143,19 +148,19 @@ fun distributeScope (expr, exprConstructor) { -- pattern-branch pairs fun reifyPatternBindings (matched, brs) { map ( - fun ([pat, br]) { + fun ([pat, br]) { fun collectBindings (path, p) { fun collectList (path, ps) { case foldl (fun ([acc, i], p) { - [collectBindings (i : path, p) : acc, i+1] + [collectBindings (i : path, p) : acc, i+1] }, [{}, 0], ps) of [acc, _] -> flatten (acc) - esac + esac } - + case p of Sexp (_, ps) -> collectList (path, ps) | Array (ps) -> collectList (path, ps) @@ -182,28 +187,34 @@ fun reifyPatternBindings (matched, brs) { ) } +fun unwrapMaybe(el) { + case el of + Some(el) -> el + | None -> fun (a) {assertWeakOrVoid(a, returnST $ Skip, loc)} esac +} + var primary = memo $ eta syntax ( -- array constant loc=pos x=inbr[s("["), list0(syntax (e=exp {e(Val)})), s("]")] { - fun (a) {assertValue (a, chainST (x) => fun (x) {Array (x)}, loc)} + fun (a) {assertWeakOrValue (a, chainST (x) => fun (x) {Array (x)}, loc)} } | - + -- string constant loc=pos x=strlit { - fun (a) {assertValue (a, returnST $ String (x), loc)} + fun (a) {assertWeakOrValue (a, returnST $ String (x), loc)} } | - + -- character literal loc=pos x=chrlit { - fun (a) {assertValue (a, returnST $ Const (x), loc)} + fun (a) {assertWeakOrValue (a, returnST $ Const (x), loc)} } | - + -- decimal constant loc=pos x=decimal { - fun (a) {assertValue (a, returnST $ Const (stringInt (x)), loc)} + fun (a) {assertWeakOrValue (a, returnST $ Const (stringInt (x)), loc)} } | - + -- identifier loc=pos x=lident args=inbr[s("("), list0(syntax(e=exp {e(Val)})), s(")")]? { fun (a) { @@ -213,25 +224,97 @@ var | Void -> Ignore (Var (x)) | _ -> Var (x) esac - | Some (args) -> assertValue (a, chainST (args) => fun (args) {Call (x, args)}, loc) + | Some (args) -> assertWeakOrValue (a, chainST (args) => fun (args) { + if isBuiltin(x) then Builtin(x, args) else Call (x, args) fi + }, loc) esac} } | -- S-expression loc=pos x=uident args=inbr[s("("), list0(syntax(e=exp {e (Val)})), s(")")]? { fun (a) { - assertValue (a, case args of + assertWeakOrValue (a, case args of None -> returnST $ Sexp (x, {}) | Some (args) -> chainST (args) => fun (args) {Sexp (x, args)} esac, loc)} } | - - $(failure ("the rest of primary parsing in not implemented\n"))), - + -- Parenthesis + inbr[s("("), scopeExpr, s(")")] | + -- Skip + loc=pos kSkip {fun (a) {assertWeakOrVoid(a, returnST $ Skip, loc)}} | + -- If + loc=pos kIf cond=exp kThen body=scopeExpr el=elsePart? kFi {fun(a) { + chainST({cond(Val), body(a), unwrapMaybe(el)(a)}) => fun ({cond, body, el}) { If(cond, body, el) } + }} | + -- While + loc=pos kWhile c=exp kDo b=scopeExpr kOd {fun (a) {assertWeakOrVoid(a, + chainST({c(Val), b(Void)}) => fun ({c, b}) { While(c, b) } + , loc)}}| + -- DoWhile + loc=pos kDo b=scopeExpr kWhile c=exp kOd {fun (a) {assertWeakOrVoid(a, + chainST({b(Void), c(Val)}) => fun ({b, c}) { + distributeScope(b, fun(b) { + DoWhile(b, c) + }) + }, loc) }} | + -- For + loc=pos kFor s1=scopeExpr s[","] e=exp s[","] s2=exp kDo s3=scopeExpr kOd {fun(a) {assertWeakOrVoid(a, + chainST({s1(Void), e(Val), s3(Void), s2(Void)}) => fun({s1, e, s3, s2}) { + distributeScope(s1, fun (s1) { + Seq(s1, While(e, Seq(s3, s2))) + }) + }, loc)} } | + -- Case + loc=pos kCase e=exp kOf brs=caseBranches kEsac {fun(a) { + e(Val) =>> fun(e) { + brs(a) =>> fun(brs) { + fun (env) { + case freshName(env) of + [env, name] -> [env, expandScope({Val({[name, e]})}, Case(Var(name), reifyPatternBindings(name, brs)))] + esac + } + } + } + }} + ), + + caseBranches = memo $ eta syntax( + loc=pos brs=prs[listBy(caseBranch, s("|"))] {fun (a) { + chainST(map(fun(br) { + br(a) + }, brs)) + } + } + ), + + caseBranch = memo $ eta syntax( + loc=pos pat=pattern s["->"] e=scopeExpr {fun(a) { + e(a) => fun (e) { + [pat, e] + } + }} + ), + -- special parser, returns AST Expr instead of a -> State env Expr + pattern = memo $ eta syntax( + x=decimal {Const (stringInt (x))} | + s["_"] {Wildcard} | + pats=inbr[s("["), list0(pattern), s("]")] {Array(pats)} | + x=uident args=inbr[s("("), list0(pattern), s(")")]? { + Sexp(x, case args of None -> {} | Some(args) -> args esac) + } | + x=lident pat=(-s["@"] pattern)? {Named(x, case pat of Some(pat) -> pat | _ -> Wildcard esac)} + ), + + elsePart = memo $ eta syntax( + loc=pos kElif cond=exp kThen body=scopeExpr el=elsePart? {fun(a) { + chainST({cond(Val), body(a), unwrapMaybe(el)(a)}) => fun ({cond, body, el}) { + If (cond, body, el) + }}} | + kElse el=exp {fun (a) { el(a) }}), basic = memo $ eta ( expr ({[Right, {[s (":="), fun (l, loc, r) { - fun (a) {assertValue (a, l (Ref) =>> fun (l) { + fun (a) {assertWeakOrValue (a, l (Ref) =>> fun (l) { r (Val) => fun (r) { Assn (l, r) }}, @@ -246,49 +329,48 @@ var }, postfix) ), - + postfix = memo $ eta syntax ( loc=pos e=primary ps=(i=inbr[s("["), exp, s("]")] {Index (i (Val))})* { fun (a) { - foldl (fun (e, p) { - case p of + foldl (fun (e, p) { + case p of Index (i) -> fun (a) { case a of Ref -> e (Val) =>> fun (e) { i => fun (i) { ElemRef (e, i) - }} - | _ -> assertValue (a, e (Val) =>> fun (e) { + }} + | _ -> assertWeakOrValue (a, e (Val) =>> fun (e) { i => fun (i) { Elem (e, i) }}, loc) esac } - esac + esac }, e, ps) (a) }} ), - + scopeExpr = memo $ eta syntax (ds=definition* e=exp? { fun (a) { - fun (e) { + fun (monad) { case ds of - {} -> e + {} -> monad | _ -> chainST (ds) =>> fun (ds) { - e => fun (e) { - expandScope (ds, e) - }} + monad => fun (e) { expandScope (ds, e) } + } esac } - (case e of + (case e of Some (e) -> e (a) - | _ -> returnST $ Skip + | _ -> assertWeakOrVoid(a, returnST $ Skip, loc) esac) - }} + }} ), - + definition = memo $ eta syntax ( kVar ds=list[syntax (x=lident e=(-s["="] basic)? {case e of None -> returnST $ [x, None] @@ -299,12 +381,12 @@ var } | kVal ds=list[syntax (x=lident s["="] e=basic {e (Val) => fun (e) {[x, e]}})] s[";"] { chainST (ds) => fun (ds) {Val (ds)} - } | + } | kFun name=lident args=inbr[s("("), list0 (lident), s(")")] body=inbr[s("{"), scopeExpr, s("}")] { body (Weak) => fun (body) {Fun (name, args, body)} } ), - + exp = memo $ eta syntax ( basic | s1=basic s[";"] s2=exp { diff --git a/src/SM.lama b/src/SM.lama index 059af9047e..b0c1601534 100644 --- a/src/SM.lama +++ b/src/SM.lama @@ -21,20 +21,18 @@ public fun showSMInsn (i) { fun varity (f) { if f then "var" else "val" fi } - + case d of Arg (i) -> sprintf ("arg[%d]", i) | Loc (i) -> sprintf ("loc[%d]", i) | Glb (x) -> sprintf ("%s", x) esac } - + case i of BINOP (s) -> sprintf ("BINOP %s", s) | LD (x) -> sprintf ("LD %s", showLoc (x)) | LDA (x) -> sprintf ("LDA %s", showLoc (x)) - | ST (x) -> sprintf ("ST %s", showLoc (x)) - | STI -> "STI" | SEXP (s, n) -> sprintf ("SEXP ""%s"", %d", s, n) | CONST (n) -> sprintf ("CONST %d", n) | LABEL (s, f) -> sprintf ("LABEL %s, %d", s, f) @@ -72,7 +70,7 @@ fun initEvalEnv (insns) { esac } }) $ [emptyMap (compare), insns]; - + [fun (l) { case findMap (map, l) of Some (insns) -> insns esac }] @@ -88,7 +86,7 @@ fun fromLabel (env, lab) { fun eval (env, w, insns) { -- Global state maps names of global variables to values var globalState = ref (fun (x) {error (sprintf ("name ""%s"" is undefined", x), getLoc (x))}); - + -- Make a fresh local state: a pair of arrays for arguments and local variables; -- takes the numbers of arguments and local variables respectively fun makeState (a, l) { @@ -102,17 +100,18 @@ fun eval (env, w, insns) { | Loc (i) -> locs[i] | Loc (i) -> locs[i] | Glb (x) -> deref (globalState) (x) - esac + esac } -- Assigns a value to a location - fun assign ([args, locs], loc, vl) { + fun assign (state@[args, locs], loc, vl) { case loc of Arg (i) -> args[i] := vl | Loc (i) -> locs[i] := vl | Glb (x) -> var g = deref (globalState); globalState ::= fun (y) {if compare (x, y) == 0 then vl else g (y) fi} - esac + esac; + state } -- Takes n positions from the list, retursn a pair: the remaining list and the taken @@ -128,17 +127,87 @@ fun eval (env, w, insns) { inner (n, {}, list) } + fun createLocal(s, na, nl) { + case take(s, na) of + [tail, args] -> [tail, [listArray(args), initArray (nl, fun (_) {0})]] + esac + } + -- Core interpreter: takes a configuration and a program, returns a configuration - fun eval (c@[st, cst, s, w], insns) { - failure ("SM interpreter is not implemented\n") + fun eval (c@[stack, call_stack, state, world], prog) { + case prog of + {} -> c + | i:prog -> + -- fprintf(stderr, "%s\n", showSMInsn(i)); + -- fprintf(stderr, "state: %s\n", state.string); + -- fprintf(stderr, "stack: %s\n", stack.string); + case i of + BINOP (s) -> case stack of + y : x : tail -> eval([evalOp (s, x, y) : tail, call_stack, state, world], prog) + esac + | LD (loc) -> + eval([lookup (state, loc) : stack, call_stack, state, world], prog) + | CONST (n) -> eval([n : stack, call_stack, state, world], prog) + | LABEL (_, _) -> eval(c, prog) + | JMP (lab) -> eval(c, fromLabel(env, lab)) + | CJMP ("nz", lab) -> case stack of + 0 : tail -> eval([tail, call_stack, state, world], prog) + | n : tail -> eval([tail, call_stack, state, world], fromLabel(env, lab)) + esac + | CJMP ("z", lab) -> case stack of + 0 : tail -> eval([tail, call_stack, state, world], fromLabel(env, lab)) + | n : tail -> eval([tail, call_stack, state, world], prog) + esac + | LDA (loc) -> eval([Ref (loc) : stack, call_stack, state, world], prog) + | STA -> case stack of + v : Ref (x) : tail -> eval([v : tail, call_stack, assign(state, x, v), world], prog) + | v : i : a : tail -> eval([(unwrapSexp(a)[i] := v) : tail, call_stack, state, world], prog) + esac + | DROP -> eval([tl(stack), call_stack, state, world], prog) + | END -> case call_stack of + {} -> c + | [sigma_l, q] : s_c -> eval([stack, s_c, sigma_l, world], q) + esac + | BEGIN (_, na, nl) -> + case createLocal(stack, na, nl) of + [s_stroke, sigma_l] -> eval([s_stroke, call_stack, sigma_l, world], prog) + esac + | CALL (f, _) -> eval([stack, [state, prog] : call_stack, state, world], fromLabel(env, f)) + | GLOBAL (name) -> eval([stack, call_stack, assign(state, Glb (name), 0), world], prog) + | STRING (s) -> eval([s: stack, call_stack, state, world], prog) + | ARRAY (n) -> case take(stack, n) of + [rem, top] -> eval([listArray(top) : rem, call_stack, state, world], prog) + esac + | ELEM -> case stack of + i : a : tail -> eval([unwrapSexp(a)[i] : tail, call_stack, state, world], prog) + esac + | BUILTIN (f, n) -> case take(stack, n) of [rem, top] -> + case evalBuiltin(f, top, world) of [value, world] -> + eval([value : rem, call_stack, state, world], prog) + esac + esac + | SEXP(s, n) -> case take(stack, n) of + [rem, top] -> eval([Sexp(s, listArray(top)) : rem, call_stack, state, world], prog) + esac + | PATT(typ) -> case stack of + h : tail -> + eval([ + case [h, typ] of + [#array, Array(n)] -> h.length == n + | [Sexp(s, arr), Tag(t, n)] -> compare(s, t) == 0 && arr.length == n + | _ -> 0 + esac + : tail, call_stack, state, world], prog) + esac + esac + esac } - - -- printf ("%s\n", showSM (insns)); - eval ([{}, {}, makeState (0, 0), w], insns) [3].getOutput } + + -- Runs a stack machine for a given input and a given program, returns an output public fun evalSM (input, insns) { eval (initEvalEnv (insns), createWorld (input), insns) @@ -151,7 +220,7 @@ fun makeCompEnv (nLabels, scopeDepth, state, nLocals, nArgs, functions) { [sprintf ("L%d", nLabels), makeCompEnv (nLabels+1, scopeDepth, state, nLocals, nArgs, functions)] } - -- Adds a new function + -- Adds a new function fun rememberFun (fLabel, args, body) { makeCompEnv (nLabels, scopeDepth, @@ -196,7 +265,7 @@ fun makeCompEnv (nLabels, scopeDepth, state, nLocals, nArgs, functions) { fun addFun (name, fLabel, nargs) { makeCompEnv (nLabels, scopeDepth, addName (state, name, Fun (fLabel, nargs)), nLocals, nArgs, functions) } - + -- Enters a function fun beginFun (state) { makeCompEnv (nLabels, 2, enterFunction (state), 0, 0, functions) @@ -225,7 +294,7 @@ fun makeCompEnv (nLabels, scopeDepth, state, nLocals, nArgs, functions) { | _ -> error (sprintf ("the name ""%s"" does not designate a value", name), getLoc (name)) esac } - + -- Lookups a name of a function fun lookupFun (name) { case lookup (state, name) of @@ -244,14 +313,14 @@ fun makeCompEnv (nLabels, scopeDepth, state, nLocals, nArgs, functions) { if isGlobal () then [sprintf ("L%s", name), makeCompEnv (nLabels , scopeDepth, state, nLocals, nArgs, functions)] else [sprintf ("L%s_%d", name, nLabels), makeCompEnv (nLabels+1, scopeDepth, state, nLocals, nArgs, functions)] - fi + fi } -- Checks if the current scope is a global fun isGlobal () { scopeDepth == 2 } - + [genLabel, rememberFun, beginScope, @@ -396,18 +465,227 @@ fun addVals (env, names) { public fun compileSM (stmt) { fun label (lab, labUsed) { if labUsed - then singletonBuffer (LABEL (lab)) + then singletonBuffer (LABEL (lab, false)) else emptyBuffer () fi } - + + fun compileFunction(env, Fun (fname, args, body, state)) { + case genLabel (addArgs (beginFun (env, state), args)) of + [fLab, env] -> + case compile (fLab, env, body) of + [fLabUsed, env, bCode] -> [env, + singletonBuffer (LABEL (fname, false)) <+ + BEGIN (fname, size(args), getLocals(env)) <+> + bCode <+> + label (fLab, fLabUsed) <+ + END] + esac + esac + } + + fun compileFunctions(env, fs) { + case fs of + {} -> emptyBuffer() + | f : fs -> case compileFunction(env, f) of + [env, fCode] -> fCode <+> compileFunctions(env, fs) + esac + esac + } + + fun compileList(env, stmts) { + foldl(fun ([code, env], stmt) { + case genLabel(env) of + [argLab, env] -> + case compile(argLab, env, stmt) of + [argLabUsed, env, argCode] -> [ code <+> + argCode <+> + label(argLab, argLabUsed), env] + esac + esac + }, [emptyBuffer(), env], stmts) + } + + -- Generates SM code for a given statement + -- lab --- a label th go to after the statement is executed + -- env --- compilation environment + -- stmt --- a statement to generate SM code for + -- Returns a triple: + -- boolean flag indicating if "lab" was used + -- updated environment + -- code buffer fun compile (lab, env, stmt) { + -- fprintf(stderr, "%s\n", stmt.string); case stmt of Skip -> [false, env, emptyBuffer ()] - | Var (x) -> [false, env, singletonBuffer (LD (x))] - | Ref (x) -> [false, env, singletonBuffer (LDA (x))] + | Var (x) -> [false, env, singletonBuffer (LD (lookupVal(env, x)))] + | Val (x) -> [false, env, singletonBuffer (LD (lookupVal(env, x)))] + | Ref (x) -> [false, env, singletonBuffer (LDA (lookupVar(env, x)))] | Const (n) -> [false, env, singletonBuffer (CONST (n))] - | _ -> failure ("compileSM not implemented\n") + | Assn (x, e) -> + case compileList(env, {x, e}) of + [code, env] -> [false, env, code <+ STA] + esac + | Seq (s1, s2) -> + case compileList(env, {s1, s2}) of + [code, env] -> [false, env, code] + esac + | If (e, s1, s2) -> + case genLabels(env, 3) of + [condLab, eLab, endTrueLab, env] -> + case compile (endTrueLab, env, s1) of + [trueUsed, env, s1Code] -> + case compile (lab, env, s2) of + [_, env, s2Code] -> + case compile (condLab, env, e) of + [condLabUsed, env, condCode] -> [true, env, + condCode <+> + label(condLab, condLabUsed) <+ + CJMP("z", eLab) <+> + s1Code <+> + label(endTrueLab, trueUsed) <+ + JMP(lab) <+ + LABEL(eLab, false) <+> + s2Code] + esac + esac + esac + esac + | While (e, s) -> + case genLabels(env, 3) of + [condLab, beginLab, endLab, env] -> + case compile (endLab, env, s) of + [genEnd, env, sCode] -> + case compile(condLab, env, e) of + [condLabUsed, env, condCode] -> [true, env, + singletonBuffer(LABEL(beginLab, true)) <+> + condCode <+> + label(condLab, condLabUsed) <+ + CJMP("z", lab) <+> + sCode <+> + label(endLab, genEnd) <+ + JMP(beginLab)] + esac + esac + esac + | DoWhile(s, e) -> + case genLabels(env, 3) of + [condLab, beginLab, endLab, env] -> + case compile (endLab, env, s) of + [genEnd, env, sCode] -> + case compile (condLab, env, e) of + [condLabUsed, env, condCode] -> [false, env, + singletonBuffer(LABEL(beginLab, true)) <+> + sCode <+> + label(endLab, genEnd) <+> + condCode <+> + label(condLab, condLabUsed) <+ + CJMP("nz", beginLab)] + esac + esac + esac + | Binop(op, e1, e2) -> + case compileList(env, {e1, e2}) of + [code, env] -> [false, env, code <+ BINOP(op)] + esac + | Ignore (e) -> + case compileList(env, {e}) of + [code, env] -> [false, env, code <+ DROP] + esac + | Scope (defs, expr) -> + case foldl(fun ([code, env], def) { + case def of + Fun (name, args, body) -> + case genFunLabel(env, name) of + [label, env] -> [code, rememberFun(env, label, args, body)] + esac + | _ -> [code, env] + esac + }, foldl(fun ([code, env], def) { + case def of + Fun (name, args, body) -> + case genFunLabel(env, name) of + [label, env] -> [code, addFun(env, name, label, size(args))] + esac + | Var (name) -> [if isGlobal(env) then code <+ GLOBAL (name) else code fi, addVar(env, name)] + esac + }, [emptyBuffer(), beginScope(env)], defs), defs) of + [gCode, env] -> + case compile(lab, env, expr) of + [labUsed, env, eCode] -> [labUsed, endScope(env), gCode <+> eCode] + esac + esac + | Call (f, arg_vals) -> + case compileList(env, arg_vals) of + [code, env] -> + case lookupFun(env, f) of + Fun(fLabel, nargs) -> [false, env, code <+ CALL(fLabel, nargs)] + esac + esac + | String (s) -> [false, env, singletonBuffer(STRING(s))] + | Array (vals) -> + case compileList(env, vals) of + [code, env] -> [false, env, code <+ ARRAY(size(vals))] + esac + | Sexp (name, vals) -> + case compileList(env, vals) of + [code, env] -> [false, env, code <+ SEXP(name, size(vals))] + esac + | Elem (arr, i) -> + case compileList(env, {arr, i}) of + [code, env] -> [false, env, code <+ ELEM] + esac + | ElemRef(arr, i) -> + case compileList(env, {arr, i}) of + [code, env] -> [false, env, code] + esac + | Builtin(name, args) -> + case compileList(env, args) of + [code, env] -> [false, env, code <+ BUILTIN(name, size(args))] + esac + | Case(Var(x), brs) -> + fun compileMatch(scrCode, pat, outLab) { + case pat of + Const(n) -> listBuffer(scrCode) <+ + CONST(n) <+ + BINOP("-") <+ + CJMP("nz", outLab) + | Wildcard -> emptyBuffer() + | Named(_, pat) -> compileMatch(scrCode, pat, outLab) + | Array(pats) -> foldl(fun ([code, i], aPat) { + [code <+> compileMatch(scrCode +++ {CONST(i), ELEM}, aPat, outLab), i + 1] + }, [listBuffer(scrCode) <+ + PATT(Array(size(pats))) <+ + CJMP("z", outLab), 0], pats)[0] + | Sexp(t, pats) -> + var a = foldl(fun ([code, i], sPat) { + [code <+> compileMatch(scrCode +++ {CONST(i), ELEM}, sPat, outLab), i + 1] + }, [listBuffer(scrCode) <+ + PATT(Tag(t, size(pats))) <+ + CJMP("z", outLab), 0], pats)[0]; + -- fprintf(stderr, "compileMatch: %s\n", pat.string); + -- fprintf(stderr, "compileMatchCode: %s\n", getBuffer(a).string); + a + esac + } + fun compileBranch([code_, nextLab, env], [bPat, bExp]) { + case genLabels(env, 2) of + [bLab, bCodeLab, env] -> + case compile(bCodeLab, env, bExp) of + [bCodeLabUsed, env, branchCode] -> + [ singletonBuffer(LABEL(bLab, false)) <+> + compileMatch({LD (lookupVal(env, x))}, bPat, nextLab) <+> + branchCode <+> + label(bCodeLab, bCodeLabUsed) <+ + JMP(lab) <+> + code_, + bLab, env] + esac + esac + } + case foldr(compileBranch, [emptyBuffer(), "_m_failure", env], brs) of + [code, fLab, env] -> [true, env, code] + esac esac } @@ -416,7 +694,16 @@ public fun compileSM (stmt) { .addFun ("length", "$length", 1).genLabel of [endLab, env] -> case compile (endLab, env, stmt) of - [endLabUsed, _, code] -> getBuffer $ code <+> label (endLab, endLabUsed) + [endLabUsed, env, code] -> + case getFuns(env) of + [fs, env] -> getBuffer $ singletonBuffer(LABEL("main", false)) <+ + BEGIN("main", 0, getLocals(env)) <+> + code <+> + label (endLab, endLabUsed) <+ + END <+> + compileFunctions(env, fs) <+ + LABEL("_m_failure", false) + esac esac esac } diff --git a/src/State.lama b/src/State.lama index affb45020f..b36cc31ae2 100644 --- a/src/State.lama +++ b/src/State.lama @@ -10,7 +10,7 @@ import Parser; fun makeState (scopes) { -- Searches a scope stack and performed a speficified action -- on the found binding - fun lookup (name, action) { + fun lookup (name) { fun lookupInScopes (scopes) { case scopes of {} -> error (sprintf ("name ""%s"" is undefined", name), getLoc (name)) @@ -22,9 +22,9 @@ fun makeState (scopes) { esac } - lookupInScopes (scopes) + lookupInScopes (scopes) } - + -- Makes and assignment in scopes fun assign (name, vl) { fun assignInScopes (scopes) { @@ -35,7 +35,7 @@ fun makeState (scopes) { None -> s : assignInScopes (scopes) | Some (_) -> addMap (s, name, vl) : scopes esac - esac + esac } makeState (assignInScopes (scopes)) @@ -65,7 +65,7 @@ fun makeState (scopes) { -- Enters a function; this drops all local scopes and creates a new one fun enterFunction () { fun enterScopes (scopes) { - case scopes of + case scopes of {_, _} -> scopes | _ : scopes -> enterScopes (scopes) esac @@ -80,7 +80,7 @@ fun makeState (scopes) { case scopes of {_, _} -> global | s : scopes -> s : replaceGlobal (scopes) - esac + esac } makeState (replaceGlobal (scopes)) @@ -97,7 +97,6 @@ fun makeState (scopes) { findGlobal (scopes) } - [lookup, assign, enterScope, leaveScope, addName, enterFunction, leaveFunction, getGlobal] } diff --git a/src/X86.lama b/src/X86.lama index 2f6875255e..f293fd8fab 100644 --- a/src/X86.lama +++ b/src/X86.lama @@ -8,13 +8,13 @@ import Manifest; import Buffer; -- Assembler language interface --- The registers: +-- The registers: var regs = ["%ebx", "%ecx", "%esi", "%edi", "%eax", "%edx", "%ebp", "%esp"]; -- We can not freely operate with all register; only with 4 by now var nRegs = regs.length - 5; --- For convenience we define the following synonyms for the registers: +-- For convenience we define the following synonyms for the registers: var ebx = R (0), ecx = R (1), esi = R (2), @@ -27,21 +27,21 @@ var ebx = R (0), -- We need to know the word size to calculate offsets correctly var wordSize = 4; --- We need to distinguish the following operand types: --- R (int) -- hard register --- S (int) -- a position on the hardware stack --- M (string) -- a named memory location +-- We need to distinguish the following operand types: +-- R (int) -- hard register +-- S (int) -- a position on the hardware stack +-- M (string) -- a named memory location -- L (int) -- an immediate operand -- I (int, opnd) -- an indirect operand with offset -- Some x86 instruction (we do not need all of them): --- Mov (opnd, opnd) -- copies a value from the first to the second operand +-- Mov (opnd, opnd) -- copies a value from the first to the second operand -- Lea (opnd, opnd) -- loads an address of the first operand into the second --- Binop (string, opnd, opnd) -- makes a binary operation; note, the first operand +-- Binop (string, opnd, opnd) -- makes a binary operation; note, the first operand -- designates x86 operator, not the source language one --- IDiv (opnd) -- x86 integer division, see instruction set reference --- Cltd -- see instruction set reference --- Set (string, string) -- sets a value from flags; the first operand is the +-- IDiv (opnd) -- x86 integer division, see instruction set reference +-- Cltd -- see instruction set reference +-- Set (string, string) -- sets a value from flags; the first operand is the -- suffix, which determines the value being set, the -- the second --- (sub)register name -- Jmp (string) -- unconditional jump to a label @@ -49,25 +49,25 @@ var wordSize = 4; -- Label (string) -- a label -- Push (opnd) -- pushes the operand on the hardware stack -- Pop (opnd) -- pops from the hardware stack to the operand --- Call (string) -- calls a function by its name +-- Call (string) -- calls a function by its name -- Ret -- returns from a function -- Meta (string) -- metainformation (declarations, etc.) -- --- Dec (opnd) -- arithmetic correction: decrement --- Or1 (opnd) -- arithmetic correction: or 0x0001 --- Sal1 (opnd) -- arithmetic correction: shl 1 +-- Dec (opnd) -- arithmetic correction: decrement +-- Or1 (opnd) -- arithmetic correction: or 0x0001 +-- Sal1 (opnd) -- arithmetic correction: shl 1 -- Sar1 (opnd) -- arithmetic correction: shr 1 -- Machine instruction printer fun insnString (insn) { - + fun binopString (op) { case op of "+" -> "addl" | "-" -> "subl" | "*" -> "imull" | "&&" -> "andl" - | "!!" -> "orl" + | "!!" -> "orl" | "^" -> "xorl" | "cmp" -> "cmpl" esac @@ -82,7 +82,8 @@ fun insnString (insn) { fi | M (x) -> x - | L (i) -> sprintf ("$%d", i) + | L (i) -> sprintf ("$%d", makeBox(i)) + | IL (i) -> sprintf ("$%d", i) | I (0, x) -> sprintf ("(%s)", opndString (x)) | I (n, x) -> sprintf ("%d(%s)", n, opndString (x)) esac @@ -92,9 +93,11 @@ fun insnString (insn) { Cltd -> "\tcltd\n" | Set (suf, s) -> sprintf ("\tset%s\t%s\n", suf, s) | IDiv (s1) -> sprintf ("\tidivl\t%s\n", opndString (s1)) + | Test (s1, s2) -> sprintf ("\ttestl\t%s,\t%s\n", opndString(s1), opndString(s2)) | Binop (op, s1, s2) -> sprintf ("\t%s\t%s,\t%s\n", binopString (op), opndString (s1), opndString (s2)) | Lea (s1, s2) -> sprintf ("\tleal\t%s,\t%s\n", opndString (s1), opndString (s2)) | Mov (s1, s2) -> sprintf ("\tmovl\t%s,\t%s\n", opndString (s1), opndString (s2)) + | MovZX (s1, s2) -> sprintf ("\tmovzx\t%s,\t%s\n", s1, opndString(s2)) | Push (s) -> sprintf ("\tpushl\t%s\n", opndString (s)) | Pop (s) -> sprintf ("\tpopl\t%s\n", opndString (s)) | Ret -> "\tret\n" @@ -129,12 +132,12 @@ fun makeEnv (stack, stackSlots, globals, strings, stringIndex, barrier, stackMap fun envString () { sprintf ("Stack : %s\nStackSlots: %d\nGlobals : %s\n", stack.string, stackSlots, elements (globals).string) } - + -- Allocates a new position on the symbolic stack; -- returns a pair: a location for allocated item and -- an updated environment fun allocate () { - case + case case stack of {} -> [ebx, 0] | S (n) : _ -> [S (n+1), n+2] @@ -169,7 +172,7 @@ fun makeEnv (stack, stackSlots, globals, strings, stringIndex, barrier, stackMap fun peek () { stack.fst } - + -- Adds a global variable; returns an updated environment fun addGlobal (name) { makeEnv (stack, stackSlots, addSet (globals, globalName (name)), strings, stringIndex, barrier, stackMap, fLabel, nLocals) @@ -183,7 +186,7 @@ fun makeEnv (stack, stackSlots, globals, strings, stringIndex, barrier, stackMap | Loc (i) -> S (i) esac } - + -- Gets a list of global variables from the environment fun getGlobals () { globals.elements @@ -221,12 +224,12 @@ fun makeEnv (stack, stackSlots, globals, strings, stringIndex, barrier, stackMap fun dropBarrier () { makeEnv (stack, stackSlots, globals, strings, stringIndex, false, stackMap, fLabel, nLocals) } - + -- Checks if a stack is set for a label fun hasStack (l) { compare (findMap (stackMap, l), None) != 0 } - + -- Sets the label of current function fun enterFunction (fLabel, nL) { makeEnv (stack, stackSlots, globals, strings, stringIndex, false, stackMap, fLabel, nL) @@ -262,7 +265,7 @@ fun makeEnv (stack, stackSlots, globals, strings, stringIndex, barrier, stackMap | c -> escaped [j] := c; j := j+1 esac od; - + [makeEnv (stack, stackSlots, globals, addSet (strings, [name, substring (escaped, 0, j)]), stringIndex+1, false, stackMap, fLabel, nLocals), name] } @@ -276,7 +279,7 @@ fun makeEnv (stack, stackSlots, globals, strings, stringIndex, barrier, stackMap fun staticSize () { nLocals + stackSlots } - + [envString, allocate, push, @@ -437,7 +440,7 @@ fun epilogue (env) { var metaFil = Meta (sprintf ("\t.set\tLS%s_SIZE,\t%d\n", env.currentFunction, env.staticSize)); if compare (env.currentFunction, "main") == 0 - then [env, singletonBuffer (Mov (ebp, esp)) <+ Pop (ebp) <+ Binop ("^", eax, eax) <+ Ret <+ metaDef <+ metaFil] + then [env, singletonBuffer (Mov (ebp, esp)) <+ Pop (ebp) <+ Call("free_pool") <+ Binop ("^", eax, eax) <+ Ret <+ metaDef <+ metaFil] else case env.pop of [y, env] -> [env, singletonBuffer (Mov (ebp, esp)) <+ Pop (ebp) <+ Mov (y, eax) <+ Ret <+ metaDef <+ metaFil] esac @@ -449,7 +452,7 @@ fun stackOpnd (opnd) { case opnd of S (_) -> true | _ -> false - esac + esac } -- Checks if an operand resides in memory @@ -462,7 +465,6 @@ fun memOpnd (opnd) { } -- Generates a move between locations, using --- intermediate register if needed fun move (from, to) { if memOpnd (from) && memOpnd (to) then singletonBuffer (Mov (from, eax)) <+ Mov (eax, to) @@ -470,6 +472,13 @@ fun move (from, to) { fi } +fun moveToReg(l, reg) { + case {l, reg} of + {R(n), R(m)} -> if n == m then emptyBuffer() else singletonBuffer(Mov (l, reg)) fi + | _ -> singletonBuffer(Mov (l, reg)) + esac +} + -- Gets a suffix for Set instruction from -- source language comparison operator fun suffix (op) { @@ -494,21 +503,122 @@ fun toFixedNum (r) { singletonBuffer (Sal1 (r)) <+ Or1 (r) } +fun fromFixedNum(r) { + singletonBuffer (Sar1 (r)) +} + +fun kindOp (op) { + case op of + "<" -> Comparison + | "<=" -> Comparison + | "==" -> Comparison + | "!=" -> Comparison + | ">=" -> Comparison + | ">" -> Comparison + | "&&" -> And + | "!!" -> Or + | "/" -> Div + | "%" -> Mod + | "*" -> Arithmetic + | "+" -> Arithmetic + | "-" -> Arithmetic + | _ -> failure("Unsupported op %s", op) + esac +} + +-- l in R(0)..R(4) +fun compileBinop(op, l, r) { + emptyBuffer() <+> + fromFixedNum(l) <+> + fromFixedNum(r) <+> + case kindOp(op) of + Comparison -> emptyBuffer() <+ + Binop("^", edx, edx) <+ + Binop ("cmp", r, l) <+ + Set(suffix(op), "%dl") <+ + Mov(edx, l) + | And -> emptyBuffer() <+ + Test (l, l) <+ + Set ("ne", "%al") <+ + Mov(r, edx) <+ + Test (edx, edx) <+ + Set("ne", "%dl") <+ + MovZX("%dl", edx) <+ + Binop("&&", eax, edx) <+ + Mov(edx, l) + | Or -> emptyBuffer() <+ + Binop("^", edx, edx) <+ + Binop("!!", r, l) <+ + Set("ne", "%dl") <+ + Mov(edx, l) + | Div -> moveToReg(l, eax) <+ Cltd <+ IDiv(r) <+ Mov (eax, l) + | Mod -> moveToReg(l, eax) <+ Cltd <+ IDiv(r) <+ Mov (edx, l) + | Arithmetic -> singletonBuffer(Binop(op, r, l)) + esac <+> + toFixedNum(l) +} + +fun compileCall([env, code], name, stack_args, non_stack_after, non_stack_before) { + var liveRegs = liveRegisters(env, stack_args); + + fun pushAll([env, code], regs) { + [env, foldl(fun (code, reg) { + code <+ Push(reg) + }, code, regs)] + } + + fun pushParam([env, code], n) { + if n == 0 then [env, code] else + case pop(env) of + [el, env] -> pushParam([env, code <+ Push(el)], n - 1) + esac + fi + } + + fun genCall([env, code]) { + [env, code <+ Call(name)] + } + + fun popParam([env, code]) { + [env, code <+ Binop("+", IL(wordSize * (stack_args + size(non_stack_after) + size(non_stack_before))), esp)] + } + + fun restoreAll([env, code], regs) { + [env, foldr(fun (code, reg) { + code <+ Pop(reg) + }, code, regs)] + } + + fun moveRV([env, code]) { + case allocate(env) of + [s, env] -> [env, code <+> move(eax, s)] + esac + } + + [env, code] + .pushAll(liveRegs) + .pushAll(non_stack_before) + .pushParam(stack_args) + .pushAll(non_stack_after) + .genCall + .popParam + .restoreAll(liveRegs) + .moveRV +} -- Compiles stack machine code into a list of x86 instructions. Takes an environment -- and stack machine code, returns an updated environment and x86 code. -fun compile (args, env, code) { +fun compile (args, env, code) { fun compile (env, code) { foldl ( fun ([env, scode], i) { var code = scode <+ Meta ("# " ++ showSMInsn (i) ++ "\n"); - - -- printf ("Insn: %s\n", i.string); + -- fprintf(stderr, "%s\n", i.string); -- This if removes unreachable code; otherwise -- the stack invariants for the symbolic interpreter -- are violated - if env.isBarrier + if env.isBarrier then case i of LABEL (l, true) -> [env.dropBarrier, code <+ Label (l)] | LABEL (l, _) -> if hasStack (env, l) @@ -519,40 +629,70 @@ fun compile (args, env, code) { esac else case i of - READ -> - case env.allocate of - [s, env] -> [env, code <+ Call ("Lread") <+ Mov (eax, s)] - esac - | WRITE -> + LD (x) -> + case allocate(env) of + [s, env] -> [env, code <+> move (loc(env, x), s)] + esac + | CONST (n) -> + case allocate (env) of + [s, env] -> [env, code <+> move (L(n), s)] + esac + | BINOP (binop) -> + case pop2(env) of + [r, l, env] -> + [push (env, l), code <+> + if memOpnd(l) && memOpnd(r) + then singletonBuffer(Mov(l, eax)) <+> compileBinop(binop, eax, r) <+ Mov(eax, l) + else compileBinop(binop, l, r) + fi] + esac + | LABEL (lab, _) -> + [if isBarrier(env) then retrieveStack (env, lab) else env fi, code <+ Label (lab)] + | JMP (lab) -> + [setBarrier(setStack (env, lab)), code <+ Jmp (lab)] + | CJMP (s, lab) -> case env.pop of - [s, env] -> [env, code <+ Push (s) <+ Call ("Lwrite") <+ Pop (eax)] + [x, env] -> [setStack (env, lab), code <+ Binop ("cmp", L(0), x) <+ CJmp (s, lab)] + esac + | LDA (x) -> + -- Dirty trick: STA used for x := v and x[i] := v both, + -- so we duplicate x to match Bsta signature: Bsta(x, x, v) + case allocate(env) of [locX, env] -> + case allocate(env) of [locI, env] -> + [env, code <+ Lea(loc(env, x), eax) <+ Mov(eax, locX) <+ Mov(eax, locI)] + esac + esac + | DROP -> [pop(env)[1], code] + | GLOBAL(lab) -> [addGlobal(env, lab), code] + | BEGIN(lab, _, nl) -> [enterFunction(env, lab, nl), code <+> prologue(lab)] + | END -> + case epilogue(env) of + [env, endCode] -> [env, code <+> endCode] + esac + | CALL(fLabel, nargs) -> compileCall([env, code], fLabel, nargs, {}, {}) + | STRING(s) -> + case addString(env, s) of + [env, name] -> compileCall([env, code], "Bstring", 0, {M(sprintf("$%s", name))}, {}) + esac + | ARRAY(n) -> compileCall([env, code], "Barray", n, {L(n)}, {}) + | STA -> compileCall([env, code], "Bsta", 3, {}, {}) + | ELEM -> compileCall([env, code], "Belem", 2, {}, {}) + | BUILTIN(f, n) -> compileCall([env, code], sprintf("L%s", f), n, {}, {}) + | SEXP(s, n) -> + case allocate(env) of + [l, env] -> compileCall([env, code <+ Mov(L(s.tagHash), l)], "Bsexp", n + 1, {L(n + 1)}, {}) + esac + | PATT(typ) -> + case typ of + Array(n) -> compileCall([env, code], "Barray_patt", 1, {}, {L(n)}) + | Tag(t, n) -> compileCall([env, code], "Btag", 1, {}, {L(n), L(t.tagHash)}) esac - - -- Some guidelines for generating function calls: - -- - -- 1. generate instructions to save live registers on the X86 stack (use - -- env.liveRegisters (number of arguments); - -- 2. generate instructions to move actual parameters from the symbolic - -- stack to the hardware one; - -- 3. generate the call itself; - -- 4. discard the actual parameters from the stack; - -- 5. restore saved live registers. - -- - -- Some guidelines for generating functions: - -- - -- 1. generate proper prologue for BEGIN instruction (use "prologue" helper); use - -- env.enterFunction to create a proper environment; - -- 2. generate epilogue for END instruction. - - | _ -> failure ("codegeneration for instruction %s is not yet implemented\n", i.string) esac fi }, [env, emptyBuffer ()], code) } - -- printf ("%s\n", showSM (code)); - - compile (env, code) + compile (env, code) } -- A top-level codegeneration function. Takes a driver's environment and a stack machine program, @@ -567,8 +707,8 @@ public fun compileX86 (args, code) { esac ++ "/runtime.a"; fwrite (asmFile, - map (insnString, - getBuffer $ + map (insnString, + getBuffer $ singletonBuffer (Meta ("\t.global\tmain\n")) <+> singletonBuffer (Meta ("\t.global\t__gc_data_start\n")) <+> singletonBuffer (Meta ("\t.global\t__gc_data_end\n")) <+> @@ -578,9 +718,9 @@ public fun compileX86 (args, code) { listBuffer (map (intDef , getGlobals (env))) <+> singletonBuffer (Meta ("__gc_data_end:\n")) <+> listBuffer (map (stringDef, getStrings (env)))) <+> - codeSection (code) + codeSection (code) ).stringcat); - + system ({"gcc -g -m32 -o ", args.getBaseName, " ", asmFile, " ", runtime}.stringcat) esac } \ No newline at end of file From 187bb9e54361bcd194504feee4e85fbfe5fffe81 Mon Sep 17 00:00:00 2001 From: Kirill Mitkin Date: Wed, 8 May 2024 17:59:43 +0300 Subject: [PATCH 2/2] make copying iterative --- runtime/runtime.c | 91 +++++++++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/runtime/runtime.c b/runtime/runtime.c index 604815529d..6405cf6f9d 100755 --- a/runtime/runtime.c +++ b/runtime/runtime.c @@ -621,14 +621,24 @@ typedef struct static pool from_space; // From-space (active ) semi-heap static pool to_space; // To-space (passive) semi-heap +static size_t* next_unchecked; -const int WORD = sizeof(int); +static const int WORD = sizeof(int); // initial semi-space size static size_t SPACE_SIZE = 1 * WORD; // Less than PAGE_SIZE will be rounded to PAGE_SIZE in mmap // Small SPACE_SIZE for at least one gc call in tests -void init_space(pool* space); +static void clear_space(pool* space) { + space->current = space->begin; + space->end = space->begin + SPACE_SIZE / WORD; + space->size = SPACE_SIZE; +} + +static void init_space(pool* space) { + space->begin = mmap(NULL, SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + clear_space(space); +} // @free_pool frees memory pool p int free_pool(pool *p) { @@ -674,17 +684,6 @@ static void extend_spaces(void) } } -void clear_space(pool* space) { - space->current = space->begin; - space->end = space->begin + SPACE_SIZE / WORD; - space->size = SPACE_SIZE; -} - -void init_space(pool* space) { - space->begin = mmap(NULL, SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); - clear_space(space); -} - // @init_pool is a memory pools initialization function // (is called by L__gc_init from runtime/gc_runtime.s) // two diffent mmaps for flexibility: @@ -694,13 +693,12 @@ extern void init_pool(void) { init_space(&to_space); } -size_t round_word(size_t num) { +static size_t round_word(size_t num) { return (num + WORD - 1) / WORD; } -void gc_test_and_copy_root(size_t** root); - -void *gc_copy(size_t *p) { +// p in from_space, return pointer in to_space +static void *gc_copy_unchecked(size_t *p) { data* obj = TO_DATA(p); if (IS_FORWARD_PTR((size_t*)obj->tag)) { return (void*)obj->tag; @@ -712,23 +710,18 @@ void *gc_copy(size_t *p) { size_t* pos = to_space.current + 1; obj->tag = (int)pos; to_space.current += 1 + len; - size_t* first = pos; - for (size_t* it = first; it < first + len; ++it) { - gc_test_and_copy_root((size_t**)it); - } return pos; } case SEXP_TAG: { sexp* s = TO_SEXP(p); size_t len = LEN(obj->tag); memcpy((void*)to_space.current, (void*)s, (len + 2) * WORD); + // Swap to distinguish SEXP on checked step + *(to_space.current + 1) = s->tag; + *(to_space.current) = obj->tag; size_t* pos = to_space.current + 2; obj->tag = (int)pos; to_space.current += 2 + len; - size_t* first = pos; - for (size_t* it = first; it < first + len; ++it) { - gc_test_and_copy_root((size_t**)it); - } return pos; } case STRING_TAG: { @@ -745,9 +738,41 @@ void *gc_copy(size_t *p) { } } -void gc_test_and_copy_root(size_t **root) { +static void gc_test_and_copy(size_t **root) { if (IS_VALID_HEAP_POINTER(*root)) { - *root = gc_copy(*root); + *root = gc_copy_unchecked(*root); + } +} + +// p in to_space, return pointer to next object +static size_t* gc_check(size_t *p) { + // fprintf(stderr, "gc_check on 0x%x\n", p); + data* obj = p; + size_t len = LEN(obj->tag); + switch(TAG(obj->tag)) { + case ARRAY_TAG: { + size_t* first = p + 1; + for (size_t* it = first; it < first + len; ++it) { + gc_test_and_copy(it); + } + return p + 1 + len; + } + case SEXP_TAG: { + // Swap back + size_t tag = *(p + 1); + *(p + 1) = obj->tag; + *p = tag; + size_t* first = p + 2; + for (size_t* it = first; it < first + len; ++it) { + gc_test_and_copy(it); + } + return p + 2 + len; + } + case STRING_TAG: { + return p + 1 + round_word(len); + } + default: + failure("gc copy: tag: 0x%x\n",TAG(obj->tag)); } } @@ -756,18 +781,24 @@ void gc_test_and_copy_root(size_t **root) { // @size is a size of the block that @alloc failed to allocate static void gc(size_t size) { // fprintf(stderr, "gc called on %d\n", size); + next_unchecked = to_space.begin; // Static data for (size_t* i = (size_t*)&__gc_data_start; i < &__gc_data_end; i++) { - gc_test_and_copy_root((size_t**)i); + gc_test_and_copy((size_t**)i); } // Stack for (size_t* i = (size_t*)__gc_stack_top; i < __gc_stack_bottom; i++) { - gc_test_and_copy_root((size_t**)i); + gc_test_and_copy((size_t**)i); } // Extra roots for (int i = 0; i < extra_roots.current_free; ++i) { - gc_test_and_copy_root((size_t**)extra_roots.roots[i]); + gc_test_and_copy((size_t**)extra_roots.roots[i]); } + + while (next_unchecked != to_space.current) { + next_unchecked = gc_check(next_unchecked); + } + // Equal to avoid empty arrays and sexps (their content == space.end) while (to_space.end - to_space.current <= size) { extend_spaces();