From e2b2b087e3ec930e24c3f14fc41a59957dfa788f Mon Sep 17 00:00:00 2001 From: Hongbo Zhang Date: Wed, 13 May 2020 17:28:06 +0800 Subject: [PATCH 1/3] post about lazy encoding --- website/blog/2020-05-13-lazy-encoding.md | 109 +++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 website/blog/2020-05-13-lazy-encoding.md diff --git a/website/blog/2020-05-13-lazy-encoding.md b/website/blog/2020-05-13-lazy-encoding.md new file mode 100644 index 000000000..ccdb6b0ce --- /dev/null +++ b/website/blog/2020-05-13-lazy-encoding.md @@ -0,0 +1,109 @@ +--- +title: Enhanced lazy encoding in BuckleScript +--- + + + +Recently we made some significant improvements with our new encoding for lazy values and we find it so exciting that we want to highlight the changes. The new encoding generates very idiomatic JS output like hand-written ones. + +# What's the difference? + +Take this code snippet for example: + +```reasonml +let lazy1 = lazy { + "Hello, lazy" -> Js.log; + 1 +}; // create a lazy value + +let lazy2 = lazy 3 ; // artifical lazy values for demo purpose + +Js.log2 (lazy1, lazy2); // logging the lazy values + +let (lazy la, lazy lb) = (lazy1, lazy2); // pattern match to force it evaluated + +Js.log2 (la, lb); // logging forced values +``` + +Running this code in node, the output is as below: +```bash +lazy_demo$node src/lazy_demo.bs.js +[ [Function], tag: 246 ] 3 # logging the output of two lazy blocks +Hello, lazy +1 3 +``` + +With the new encoding, the output is as below: +```bash +bucklescript$node jscomp/test/lazy_demo.js +{ RE_LAZY: 'todo', value: [Function: value] } # logging block one +{ RE_LAZY: 'done', value: 3 } # logging block two +Hello, lazy +1 3 +``` +As you can see, with the new encoding, no magic tags like 246 appear, and the lazy status is clearly marked via `RE_LAZY: 'todo'` or `RE_LAZY: 'done'`. + +More than that, the generated code quality is also improved, in the old mode, the generated JS code is like this: + +```js +var lazy1 = Caml_obj.caml_lazy_make((function (param) { + console.log("Hello, lazy"); + return 1; + })); + +console.log(lazy1, 3); + +var la = CamlinternalLazy.force(lazy1); + +var lb = CamlinternalLazy.force(3); + +console.log(la, lb); + +var lazy2 = 3; +``` + +In the new mode, it is much simplified: +```js +var lazy1 = { + RE_LAZY: "todo", + value: (function () { // internal function is using uncurried function for performance + console.log("Hello, lazy"); + return 1; + }) +}; + +var lazy2 = { + RE_LAZY: "done", + value: 3 +}; + +console.log(lazy1, lazy2); + +var la = CamlinternalLazy.force(lazy1); + +var lb = CamlinternalLazy.force(lazy2); + +console.log(la, lb); +``` + +## What changes do we make? + +In native, the encoding of lazy values is rather complicated: + +- It is an array, which is not friendly for debugging in JS context. +- It has some special tags which is not meaningful, for example, magic number 246, in JS context. +- It tries to unbox lazy values with the help of native GC, however, such complexity does not pay off in JS since JSVM does not expose its GC semantics. + +So in the master, our encoding scheme is much simplified to take advantage of JS as much as possible: + +- The encoding is uniform, it is always an object of two key value pairs, one is `RE_LAZY` to mark its status, +the other is either a closure or an evaluated value. + +- The compiler optimization still kicks in at compile time: if it knows such lazy value is already evaluated or does not need to be evaluated, it will promote its status to be 'done'. However, unboxing is not happening unlike native. This makes sense since the most interesting unboxing scenarios happens in runtime instead of compile time where it is impossible in JSVM. + + +With the new encoding, the lazy is a much nicer sugar and we encourage you to use it whenever it is convenient! + +# Caveats: + +Don't rely on the special name `RE_LAZY` for JS interop, we may change it to a symbol in the future. \ No newline at end of file From 62f678453126509540b13d58af9676b303d1ca84 Mon Sep 17 00:00:00 2001 From: J David Eisenberg Date: Wed, 13 May 2020 06:24:12 -0700 Subject: [PATCH 2/3] Fixes to grammar and punctuation. --- website/blog/2020-05-13-lazy-encoding.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/website/blog/2020-05-13-lazy-encoding.md b/website/blog/2020-05-13-lazy-encoding.md index ccdb6b0ce..fab056f31 100644 --- a/website/blog/2020-05-13-lazy-encoding.md +++ b/website/blog/2020-05-13-lazy-encoding.md @@ -4,11 +4,11 @@ title: Enhanced lazy encoding in BuckleScript -Recently we made some significant improvements with our new encoding for lazy values and we find it so exciting that we want to highlight the changes. The new encoding generates very idiomatic JS output like hand-written ones. +Recently we made some significant improvements with our new encoding for lazy values, and we find it so exciting that we want to highlight the changes. The new encoding generates very idiomatic JS output like hand-written code. # What's the difference? -Take this code snippet for example: +Take this code snippet, for example: ```reasonml let lazy1 = lazy { @@ -20,7 +20,7 @@ let lazy2 = lazy 3 ; // artifical lazy values for demo purpose Js.log2 (lazy1, lazy2); // logging the lazy values -let (lazy la, lazy lb) = (lazy1, lazy2); // pattern match to force it evaluated +let (lazy la, lazy lb) = (lazy1, lazy2); // pattern match to force evaluation Js.log2 (la, lb); // logging forced values ``` @@ -43,7 +43,7 @@ Hello, lazy ``` As you can see, with the new encoding, no magic tags like 246 appear, and the lazy status is clearly marked via `RE_LAZY: 'todo'` or `RE_LAZY: 'done'`. -More than that, the generated code quality is also improved, in the old mode, the generated JS code is like this: +More than that, the generated code quality is also improved. In the old mode, the generated JS code was like this: ```js var lazy1 = Caml_obj.caml_lazy_make((function (param) { @@ -86,24 +86,24 @@ var lb = CamlinternalLazy.force(lazy2); console.log(la, lb); ``` -## What changes do we make? +## What changes did we make? In native, the encoding of lazy values is rather complicated: - It is an array, which is not friendly for debugging in JS context. -- It has some special tags which is not meaningful, for example, magic number 246, in JS context. -- It tries to unbox lazy values with the help of native GC, however, such complexity does not pay off in JS since JSVM does not expose its GC semantics. +- It has some special tags which are not meaningful, for example, magic number 246, in JS context. +- It tries to unbox lazy values with the help of native GC. However, such complexity does not pay off in JS since the JSVM does not expose its GC semantics. So in the master, our encoding scheme is much simplified to take advantage of JS as much as possible: -- The encoding is uniform, it is always an object of two key value pairs, one is `RE_LAZY` to mark its status, +- The encoding is uniform; it is always an object of two key value pairs. One is `RE_LAZY` to mark its status, the other is either a closure or an evaluated value. -- The compiler optimization still kicks in at compile time: if it knows such lazy value is already evaluated or does not need to be evaluated, it will promote its status to be 'done'. However, unboxing is not happening unlike native. This makes sense since the most interesting unboxing scenarios happens in runtime instead of compile time where it is impossible in JSVM. +- The compiler optimization still kicks in at compile time: if it knows a lazy value is already evaluated or does not need to be evaluated, it will promote its status to be 'done'. However, unlike native, unboxing is not happening. This makes sense since the most interesting unboxing scenario happens in runtime instead of compile time where it is impossible in JSVM. -With the new encoding, the lazy is a much nicer sugar and we encourage you to use it whenever it is convenient! +With the new encoding, `lazy` has a much nicer sugar, and we encourage you to use it whenever it is convenient! # Caveats: -Don't rely on the special name `RE_LAZY` for JS interop, we may change it to a symbol in the future. \ No newline at end of file +Don't rely on the special name `RE_LAZY` for JS interop; we may change it to a symbol in the future. From d60f6c3147b09a59f702bb500f1eab6d35ede912 Mon Sep 17 00:00:00 2001 From: Hongbo Zhang Date: Thu, 14 May 2020 13:57:26 +0800 Subject: [PATCH 3/3] address comments --- website/blog/2020-05-13-lazy-encoding.md | 25 ++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/website/blog/2020-05-13-lazy-encoding.md b/website/blog/2020-05-13-lazy-encoding.md index fab056f31..916c677f1 100644 --- a/website/blog/2020-05-13-lazy-encoding.md +++ b/website/blog/2020-05-13-lazy-encoding.md @@ -6,6 +6,8 @@ title: Enhanced lazy encoding in BuckleScript Recently we made some significant improvements with our new encoding for lazy values, and we find it so exciting that we want to highlight the changes. The new encoding generates very idiomatic JS output like hand-written code. +For people who are not familiar with lazy evaluation, it is documented here: https://ocaml.org/releases/4.10/htmlman/expr.html#sss:expr-lazy. + # What's the difference? Take this code snippet, for example: @@ -29,19 +31,18 @@ Running this code in node, the output is as below: ```bash lazy_demo$node src/lazy_demo.bs.js [ [Function], tag: 246 ] 3 # logging the output of two lazy blocks -Hello, lazy -1 3 +Hello, lazy # lazy1, laz2 evaluated forced by pattern match, hence logging +1 3 #logging the evaluated lazy block ``` With the new encoding, the output is as below: ```bash -bucklescript$node jscomp/test/lazy_demo.js -{ RE_LAZY: 'todo', value: [Function: value] } # logging block one -{ RE_LAZY: 'done', value: 3 } # logging block two +{ RE_LAZY_DONE: false, value: [Function: value] } { RE_LAZY_DONE: true, value: 3 } # logging block one with new encoding Hello, lazy 1 3 ``` -As you can see, with the new encoding, no magic tags like 246 appear, and the lazy status is clearly marked via `RE_LAZY: 'todo'` or `RE_LAZY: 'done'`. + +As you can see, with the new encoding, no magic tags like 246 appear, and the lazy status is clearly marked via `RE_LAZY_DONE: (true | false) `. More than that, the generated code quality is also improved. In the old mode, the generated JS code was like this: @@ -62,18 +63,18 @@ console.log(la, lb); var lazy2 = 3; ``` -In the new mode, it is much simplified: +In the new mode, it is simplified: ```js var lazy1 = { - RE_LAZY: "todo", - value: (function () { // internal function is using uncurried function for performance + RE_LAZY_DONE: false, + value: (function () { // closure now is uncurried arity-0 function console.log("Hello, lazy"); return 1; }) }; var lazy2 = { - RE_LAZY: "done", + RE_LAZY_DONE: true, value: 3 }; @@ -96,7 +97,7 @@ In native, the encoding of lazy values is rather complicated: So in the master, our encoding scheme is much simplified to take advantage of JS as much as possible: -- The encoding is uniform; it is always an object of two key value pairs. One is `RE_LAZY` to mark its status, +- The encoding is uniform; it is always an object of two key value pairs. One is `RE_LAZY_DONE` to mark its status, the other is either a closure or an evaluated value. - The compiler optimization still kicks in at compile time: if it knows a lazy value is already evaluated or does not need to be evaluated, it will promote its status to be 'done'. However, unlike native, unboxing is not happening. This makes sense since the most interesting unboxing scenario happens in runtime instead of compile time where it is impossible in JSVM. @@ -106,4 +107,4 @@ With the new encoding, `lazy` has a much nicer sugar, and we encourage you to us # Caveats: -Don't rely on the special name `RE_LAZY` for JS interop; we may change it to a symbol in the future. +Don't rely on the special name `RE_LAZY_DONE` for JS interop; we may change it to a symbol in the future.