From ab7c2f5dd427b527c37ec544ecee685b2e4e4083 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Fri, 22 Nov 2024 06:02:12 +0800
Subject: [PATCH 01/25] feat: buffer api (placeholder)

---
 buffer/buffer.mbt  | 37 +++++++++++++++++++++++++++++++------
 buffer/buffer.mbti | 38 +++++++++++++++++++++++---------------
 builtin/bytes.mbt  | 29 +++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+), 21 deletions(-)

diff --git a/buffer/buffer.mbt b/buffer/buffer.mbt
index b22db3ace..89130c939 100644
--- a/buffer/buffer.mbt
+++ b/buffer/buffer.mbt
@@ -84,11 +84,12 @@ pub fn to_string(self : T) -> String {
 /// Return a new unchecked string contains the data in buffer.
 /// Note this function does not validate the encoding of the byte sequence, 
 /// it simply copy the bytes into a new String.
-pub fn to_unchecked_string(self : T) -> String {
-  Bytes::from_fixedarray(self.data).to_unchecked_string(
-    offset=0,
-    length=self.len,
-  )
+pub fn to_unchecked_string(
+  self : T,
+  offset~ : Int = 0,
+  length~ : Int = self.len
+) -> String {
+  Bytes::from_fixedarray(self.data).to_unchecked_string(offset~, length~)
 }
 
 ///|
@@ -154,6 +155,12 @@ pub fn write_char(self : T, value : Char) -> Unit {
   self.len += inc
 }
 
+pub fn write_utf8_char(self : T, value : Char) -> Unit {
+  self.grow_if_necessary(self.len + 4)
+  let inc = self.data.set_utf8_char(self.len, value)
+  self.len += inc
+}
+
 ///|
 /// Write a byte into buffer.
 pub fn write_byte(self : T, value : Byte) -> Unit {
@@ -162,6 +169,10 @@ pub fn write_byte(self : T, value : Byte) -> Unit {
   self.len += 1
 }
 
+pub fn blit(self : T, srcoff : Int, dst : T, dstoff : Int, len : Int) -> Unit {
+  Bytes::blit(self.to_bytes(), srcoff, dst.to_bytes(), dstoff, len)
+}
+
 ///|
 pub fn reset(self : T) -> Unit {
   self.data = self.initial_data
@@ -173,7 +184,21 @@ pub fn to_bytes(self : T) -> Bytes {
   Bytes::from_fixedarray(self.data, len=self.len)
 }
 
+///|
+pub fn to_array(self : T) -> Array[Byte] {
+  self.to_bytes().to_array()
+}
+
+///|
+pub fn op_set(self : T, index : Int, value : Byte) -> Unit {
+  self.data[index] = value
+}
+
+pub fn op_get(self : T, index : Int) -> Byte {
+  self.data[index]
+}
+
 ///|
 pub impl Show for T with output(self, logger) {
-  logger.write_string(self.to_unchecked_string())
+  logger.write_string(self.to_unchecked_string(offset=0, length=self.len))
 }
diff --git a/buffer/buffer.mbti b/buffer/buffer.mbti
index 41570ecb8..91bd69d78 100644
--- a/buffer/buffer.mbti
+++ b/buffer/buffer.mbti
@@ -5,21 +5,29 @@ package moonbitlang/core/buffer
 // Types and methods
 type T
 impl T {
-  is_empty(Self) -> Bool
-  length(Self) -> Int
-  new(size_hint~ : Int = ..) -> Self
-  reset(Self) -> Unit
-  to_bytes(Self) -> Bytes
-  to_string(Self) -> String //deprecated
-  to_unchecked_string(Self) -> String
-  write_byte(Self, Byte) -> Unit
-  write_bytes(Self, Bytes) -> Unit
-  write_char(Self, Char) -> Unit
-  write_object(Self, Show) -> Unit
-  write_string(Self, String) -> Unit
-  write_sub_string(Self, String, Int, Int) -> Unit //deprecated
-  write_substring(Self, String, Int, Int) -> Unit
-}
+        blit(Self, Int, Self, Int, Int) -> Unit
+        from_array(Array[Byte]) -> Self
+        from_bytes(Bytes) -> Self
+        is_empty(Self) -> Bool
+        length(Self) -> Int
+        new(size_hint~ : Int = ..) -> Self
+        new(~size_hint : Int = ..) -> Self
+        op_get(Self, Int) -> Byte
+        op_set(Self, Int, Byte) -> Unit
+        reset(Self) -> Unit
+        to_array(Self) -> Array[Byte]
+        to_bytes(Self) -> Bytes
+        to_string(Self) -> String //deprecated
+        to_unchecked_string(Self, ~offset : Int, ~length : Int) -> String
+        write_byte(Self, Byte) -> Unit
+        write_bytes(Self, Bytes) -> Unit
+        write_char(Self, Char) -> Unit
+        write_object(Self, Show) -> Unit
+        write_string(Self, String) -> Unit
+        write_sub_string(Self, String, Int, Int) -> Unit //deprecated
+        write_substring(Self, String, Int, Int) -> Unit
+        write_utf8_char(Self, Char) -> Unit}
+
 impl Show for T
 
 // Type aliases
diff --git a/builtin/bytes.mbt b/builtin/bytes.mbt
index 4a54f8cde..a77baefe4 100644
--- a/builtin/bytes.mbt
+++ b/builtin/bytes.mbt
@@ -168,6 +168,35 @@ pub fn set_utf8_char(self : Bytes, offset : Int, value : Char) -> Int {
   }
 }
 
+pub fn set_utf8_char(
+  self : FixedArray[Byte],
+  offset : Int,
+  value : Char
+) -> Int {
+  let code = value.to_uint()
+  if code < 0x80 {
+    self[offset] = ((code & 0x7F) | 0x00).to_byte()
+    1
+  } else if code < 0x0800 {
+    self[offset] = (((code >> 6) & 0x1F) | 0xC0).to_byte()
+    self[offset + 1] = ((code & 0x3F) | 0x80).to_byte()
+    2
+  } else if code < 0x010000 {
+    self[offset] = (((code >> 12) & 0x0F) | 0xE0).to_byte()
+    self[offset + 1] = (((code >> 6) & 0x3F) | 0x80).to_byte()
+    self[offset + 2] = ((code & 0x3F) | 0x80).to_byte()
+    3
+  } else if code < 0x110000 {
+    self[offset] = (((code >> 18) & 0x07) | 0xF0).to_byte()
+    self[offset + 1] = (((code >> 12) & 0x3F) | 0x80).to_byte()
+    self[offset + 2] = (((code >> 6) & 0x3F) | 0x80).to_byte()
+    self[offset + 3] = ((code & 0x3F) | 0x80).to_byte()
+    4
+  } else {
+    abort("Char out of range")
+  }
+}
+
 ///|
 /// Fill utf16 encoded char `value` into byte sequence `self`, starting at `offset`.
 /// It return the length of bytes has been written.

From 673e1bdc197f27986b55661ab2b0ad0de3e60095 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Fri, 22 Nov 2024 06:02:23 +0800
Subject: [PATCH 02/25] feat: int api (placeholder)

---
 int/int.mbt  | 8 ++++++++
 int/int.mbti | 1 +
 2 files changed, 9 insertions(+)

diff --git a/int/int.mbt b/int/int.mbt
index 54d320a8f..5aa82f510 100644
--- a/int/int.mbt
+++ b/int/int.mbt
@@ -41,3 +41,11 @@ pub fn abs(self : Int) -> Int {
     self
   }
 }
+
+pub fn minimum(self : Int, x : Int) -> Int {
+  if self > x {
+    x
+  } else {
+    self
+  }
+}
diff --git a/int/int.mbti b/int/int.mbti
index e7694923f..cb1c6f6d4 100644
--- a/int/int.mbti
+++ b/int/int.mbti
@@ -10,6 +10,7 @@ let min_value : Int
 
 impl Int {
   abs(Int) -> Int
+  minimum(Int, Int) -> Int
 }
 
 // Type aliases

From 6c633db5d4a38ef975753ef8bf7dafa0899446d6 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Fri, 22 Nov 2024 06:02:34 +0800
Subject: [PATCH 03/25] feat: string api (placeholder)

---
 buffer/buffer.mbti   | 43 ++++++++++++++++++++-----------------------
 builtin/builtin.mbti |  1 +
 string/string.mbt    |  5 +++++
 string/string.mbti   |  1 +
 4 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/buffer/buffer.mbti b/buffer/buffer.mbti
index 91bd69d78..c53097c8f 100644
--- a/buffer/buffer.mbti
+++ b/buffer/buffer.mbti
@@ -5,29 +5,26 @@ package moonbitlang/core/buffer
 // Types and methods
 type T
 impl T {
-        blit(Self, Int, Self, Int, Int) -> Unit
-        from_array(Array[Byte]) -> Self
-        from_bytes(Bytes) -> Self
-        is_empty(Self) -> Bool
-        length(Self) -> Int
-        new(size_hint~ : Int = ..) -> Self
-        new(~size_hint : Int = ..) -> Self
-        op_get(Self, Int) -> Byte
-        op_set(Self, Int, Byte) -> Unit
-        reset(Self) -> Unit
-        to_array(Self) -> Array[Byte]
-        to_bytes(Self) -> Bytes
-        to_string(Self) -> String //deprecated
-        to_unchecked_string(Self, ~offset : Int, ~length : Int) -> String
-        write_byte(Self, Byte) -> Unit
-        write_bytes(Self, Bytes) -> Unit
-        write_char(Self, Char) -> Unit
-        write_object(Self, Show) -> Unit
-        write_string(Self, String) -> Unit
-        write_sub_string(Self, String, Int, Int) -> Unit //deprecated
-        write_substring(Self, String, Int, Int) -> Unit
-        write_utf8_char(Self, Char) -> Unit}
-
+  blit(Self, Int, Self, Int, Int) -> Unit
+  is_empty(Self) -> Bool
+  length(Self) -> Int
+  new(size_hint~ : Int = ..) -> Self
+  op_get(Self, Int) -> Byte
+  op_set(Self, Int, Byte) -> Unit
+  reset(Self) -> Unit
+  to_array(Self) -> Array[Byte]
+  to_bytes(Self) -> Bytes
+  to_string(Self) -> String //deprecated
+  to_unchecked_string(Self, offset~ : Int = .., length~ : Int = ..) -> String
+  write_byte(Self, Byte) -> Unit
+  write_bytes(Self, Bytes) -> Unit
+  write_char(Self, Char) -> Unit
+  write_object(Self, Show) -> Unit
+  write_string(Self, String) -> Unit
+  write_sub_string(Self, String, Int, Int) -> Unit //deprecated
+  write_substring(Self, String, Int, Int) -> Unit
+  write_utf8_char(Self, Char) -> Unit
+}
 impl Show for T
 
 // Type aliases
diff --git a/builtin/builtin.mbti b/builtin/builtin.mbti
index 0935940a1..d1c4ed4fd 100644
--- a/builtin/builtin.mbti
+++ b/builtin/builtin.mbti
@@ -668,6 +668,7 @@ impl FixedArray {
   op_set[T](Self[T], Int, T) -> Unit
   set[T](Self[T], Int, T) -> Unit
   set_utf16_char(Self[Byte], Int, Char) -> Int
+  set_utf8_char(Self[Byte], Int, Char) -> Int
   to_json[X : ToJson](Self[X]) -> Json
   to_string[X : Show](Self[X]) -> String
   unsafe_blit[A](Self[A], Int, Self[A], Int, Int) -> Unit
diff --git a/string/string.mbt b/string/string.mbt
index 2328d864c..a320c9761 100644
--- a/string/string.mbt
+++ b/string/string.mbt
@@ -31,6 +31,11 @@ pub fn String::from_array(chars : Array[Char]) -> String {
   buf.to_string()
 }
 
+pub fn String::from_iter(iter : Iter[Char]) -> String {
+  let chars = iter.collect()
+  String::from_array(chars)
+}
+
 ///|
 /// Concatenate strings.
 /// 
diff --git a/string/string.mbti b/string/string.mbti
index f21058e69..e7b8b35f0 100644
--- a/string/string.mbti
+++ b/string/string.mbti
@@ -14,6 +14,7 @@ impl String {
   ends_with(String, String) -> Bool
   fold[A](String, init~ : A, (A, Char) -> A) -> A
   from_array(Array[Char]) -> String
+  from_iter(Iter[Char]) -> String
   index_of(String, String, from~ : Int = ..) -> Int
   is_blank(String) -> Bool
   is_empty(String) -> Bool

From a947d8293cb140868101a790433a08028574f2a5 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Fri, 22 Nov 2024 06:02:46 +0800
Subject: [PATCH 04/25] feat: encoding api (placeholder)

---
 encoding/decoding.mbt      | 325 +++++++++++++++++++++++++++++++++++++
 encoding/encoding.mbt      |  13 ++
 encoding/encoding.mbti     |  26 +++
 encoding/encoding_test.mbt | 105 ++++++++++++
 encoding/moon.pkg.json     |  15 ++
 encoding/types.mbt         |  96 +++++++++++
 6 files changed, 580 insertions(+)
 create mode 100644 encoding/decoding.mbt
 create mode 100644 encoding/encoding.mbt
 create mode 100644 encoding/encoding.mbti
 create mode 100644 encoding/encoding_test.mbt
 create mode 100644 encoding/moon.pkg.json
 create mode 100644 encoding/types.mbt

diff --git a/encoding/decoding.mbt b/encoding/decoding.mbt
new file mode 100644
index 000000000..ab5598ab6
--- /dev/null
+++ b/encoding/decoding.mbt
@@ -0,0 +1,325 @@
+// Copyright 2024 International Digital Economy Academy
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+const U_REP = '\u{FFFD}'
+
+// consider const
+let utf_8_len = [
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
+  4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+]
+
+fn r_utf_8(buf : Buffer, offset : Int, length : Int) -> Decode {
+  fn uchar(c : Int) {
+    Uchar(Char::from_int(c))
+  }
+
+  match length {
+    1 => uchar(buf[offset].to_int())
+    2 => {
+      let b0 = buf[offset].to_int()
+      let b1 = buf[offset + 1].to_int()
+      if (b1 >> 6) != 0b10 {
+        malformed(buf, offset, length)
+      } else {
+        uchar(((b0 & 0x1F) << 6) | (b1 & 0x3F))
+      }
+    }
+    3 => {
+      let b0 = buf[offset].to_int()
+      let b1 = buf[offset + 1].to_int()
+      let b2 = buf[offset + 2].to_int()
+      let c = ((b0 & 0x0F) << 12) | (((b1 & 0x3F) << 6) | (b2 & 0x3F))
+      if (b2 >> 6) != 0b10 {
+        malformed(buf, offset, length)
+      } else {
+        match b0 {
+          0xE0 =>
+            if b1 < 0xA0 || 0xBF < b1 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+          0xED =>
+            if b1 < 0x80 || 0x9F < b1 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+          _ =>
+            if (b1 >> 6) != 0b10 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+        }
+      }
+    }
+    4 => {
+      let b0 = buf[offset].to_int()
+      let b1 = buf[offset + 1].to_int()
+      let b2 = buf[offset + 2].to_int()
+      let b3 = buf[offset + 3].to_int()
+      let c = ((b0 & 0x07) << 18) |
+        ((b1 & 0x3F) << 12) |
+        ((b2 & 0x3F) << 6) |
+        (b3 & 0x3F)
+      if (b3 >> 6) != 0b10 || (b2 >> 6) != 0b10 {
+        malformed(buf, offset, length)
+      } else {
+        match b0 {
+          0xF0 =>
+            if b1 < 0x90 || 0xBF < b1 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+          0xF4 =>
+            if b1 < 0x80 || 0x8F < b1 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+          _ =>
+            if (b1 >> 6) != 0b10 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+        }
+      }
+    }
+    _ => panic()
+  }
+}
+
+fn r_utf_16(buf : Buffer, offset0 : Int, offset1 : Int) -> UTF16Decode {
+  let b0 = buf[offset0].to_int()
+  let b1 = buf[offset1].to_int()
+  let u = (b0 << 8) | b1
+  if u < 0xD800 || u > 0xDFFF {
+    UTF16Uchar(Char::from_int(u))
+  } else if u > 0xDBFF {
+    UTF16Malformed(
+      buf.to_unchecked_string(offset=@int.minimum(offset0, offset1), length=2),
+    )
+  } else {
+    Hi(u)
+  }
+}
+
+fn r_utf_16_lo(hi : Int, buf : Buffer, offset0 : Int, offset1 : Int) -> Decode {
+  let b0 = buf[offset0].to_int()
+  let b1 = buf[offset1].to_int()
+  let lo = (b0 << 8) | b1
+  if lo < 0xDC00 || lo > 0xDFFF {
+    malformed_pair(
+      offset0 < offset1,
+      hi,
+      buf,
+      @int.minimum(offset0, offset1),
+      2,
+    )
+  } else {
+    Uchar(Char::from_int(((hi & 0x3FF) << 10) | ((lo & 0x3FF) + 0x10000)))
+  }
+}
+
+fn decode(self : Decoder) -> Decode {
+  (self.k)(self)
+}
+
+fn decoder(~encoding : Encoding, src : Buffer) -> Decoder {
+  let i = src
+  let i_pos = 0
+  let i_max = src.length() - 1
+  let t = Buffer::from_bytes(b"\x00\x00\x00\x00")
+  let t_len = 0
+  let t_need = 0
+  let k = match encoding {
+    UTF8 => decode_utf_8
+    UTF16 => decode_utf_16le
+    UTF16BE => decode_utf_16le
+    UTF16LE => decode_utf_16le
+  }
+  { i, i_pos, i_max, t, t_len, t_need, k }
+}
+
+fn ret(self : Decoder, k : Cont, v : Decode) -> Decode {
+  self.k = k
+  v
+}
+
+priv enum UTF16Decode {
+  Hi(Int)
+  UTF16Malformed(String)
+  UTF16Uchar(Char)
+}
+
+fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
+  if decoder.t_len < decoder.t_need {
+    decoder.ret(
+      decode_utf_16le,
+      malformed_pair(false, hi, decoder.t, 0, decoder.t_len),
+    )
+  } else {
+    decoder.ret(decode_utf_16le, r_utf_16_lo(hi, decoder.t, 1, 0))
+  }
+}
+
+fn t_decode_utf_16le(self : Decoder) -> Decode {
+  if self.t_len < self.t_need {
+    self.ret(decode_utf_16le, malformed(self.t, 0, self.t_len))
+  } else {
+    self.decode_utf_16le_lo(r_utf_16(self.t, 1, 0))
+  }
+}
+
+fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
+  match v {
+    UTF16Uchar(u) => Uchar(u)
+    UTF16Malformed(s) => Malformed(s)
+    Hi(hi) => {
+      let rem = self.i_rem()
+      if rem < 2 {
+        self.t_need(2)
+        t_fill(@tuple.curry(t_decode_utf_16le_lo)(hi), self)
+      } else {
+        let j = self.i_pos
+        self.i_pos = self.i_pos + 2
+        r_utf_16_lo(hi, self.i, j + 1, j)
+      }
+    }
+  }
+}
+
+fn decode_utf_16le(self : Decoder) -> Decode {
+  let rem = self.i_rem()
+  match rem.compare(0) {
+    // rem < 0
+    -1 => Decode::End
+    // rem = 0
+    0 => self.refill(decode_utf_16le)
+    // rem > 0
+    1 =>
+      if rem < 2 {
+        self.t_need(2)
+        t_fill(t_decode_utf_16le, self)
+      } else {
+        let j = self.i_pos
+        self.i_pos = self.i_pos + 2
+        // mark
+        self.decode_utf_16le_lo(r_utf_16(self.i, j + 1, j))
+      }
+    _ => abort("unreachable")
+  }
+}
+
+fn t_decode_utf_8(self : Decoder) -> Decode {
+  if self.t_len < self.t_need {
+    malformed(self.t, 0, self.t_len)
+  } else {
+    r_utf_8(self.t, 0, self.t_len)
+  }
+}
+
+fn decode_utf_8(self : Decoder) -> Decode {
+  let rem = self.i_rem()
+  match rem.compare(0) {
+    // rem < 0
+    -1 => Decode::End
+    // rem = 0
+    0 => self.refill(decode_utf_8)
+    // rem > 0
+    1 => {
+      let idx = self.i[self.i_pos].to_int()
+      let need = utf_8_len[idx]
+      if rem < need {
+        self.t_need(need)
+        t_fill(t_decode_utf_8, self)
+      } else {
+        let j = self.i_pos
+        if need == 0 {
+          self.i_pos = self.i_pos + 1
+          self.ret(decode_utf_8, malformed(self.i, j, 1))
+        } else {
+          self.i_pos = self.i_pos + need
+          self.ret(decode_utf_8, r_utf_8(self.i, j, need))
+        }
+      }
+    }
+    _ => abort("unreachable")
+  }
+}
+
+fn i_rem(self : Decoder) -> Int {
+  self.i_max - self.i_pos + 1
+}
+
+fn eoi(self : Decoder) -> Unit {
+  self.i = Buffer::new()
+  self.i_pos = 0
+  self.i_max = @int.min_value
+}
+
+fn refill(self : Decoder, k : Cont) -> Decode {
+  // only Buffer
+  self.eoi()
+  k(self)
+}
+
+fn t_need(self : Decoder, need : Int) -> Unit {
+  self.t_len = 0
+  self.t_need = need
+}
+
+fn t_fill(k : Cont, decoder : Decoder) -> Decode {
+  fn blit(decoder : Decoder, l : Int) -> Unit {
+    decoder.i.blit(decoder.i_pos, decoder.t, decoder.t_len, l)
+    decoder.i_pos = decoder.i_pos + 1
+    decoder.t_len = decoder.t_len + 1
+  }
+
+  let rem = decoder.i_rem()
+  if rem < 0 { // eoi
+    k(decoder)
+  } else {
+    let need = decoder.t_need - decoder.t_len
+    if rem < need {
+      blit(decoder, rem)
+      decoder.refill(@tuple.curry(t_fill)(k))
+    } else {
+      blit(decoder, need)
+      k(decoder)
+    }
+  }
+}
+
+pub fn decode_lossy(~encoding : Encoding = UTF8, src : Buffer) -> Stream {
+  let decoder = decoder(~encoding, src)
+  { decoder, lossy: true }
+}
+
+pub fn decode_strict(~encoding : Encoding = UTF8, src : Buffer) -> Stream {
+  let decoder = decoder(~encoding, src)
+  { decoder, lossy: false }
+}
diff --git a/encoding/encoding.mbt b/encoding/encoding.mbt
new file mode 100644
index 000000000..f307e1338
--- /dev/null
+++ b/encoding/encoding.mbt
@@ -0,0 +1,13 @@
+// Copyright 2024 International Digital Economy Academy
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
diff --git a/encoding/encoding.mbti b/encoding/encoding.mbti
new file mode 100644
index 000000000..f62a27936
--- /dev/null
+++ b/encoding/encoding.mbti
@@ -0,0 +1,26 @@
+package moonbitlang/core/encoding
+
+alias @moonbitlang/core/buffer as @buffer
+
+// Values
+fn decode_lossy(~encoding : Encoding = .., @buffer.T) -> Stream
+
+fn decode_strict(~encoding : Encoding = .., @buffer.T) -> Stream
+
+// Types and methods
+pub enum Encoding {
+  UTF8
+  UTF16
+  UTF16BE
+  UTF16LE
+}
+
+type Stream
+impl Stream {
+  iter(Self) -> Iter[Char]
+}
+
+// Type aliases
+
+// Traits
+
diff --git a/encoding/encoding_test.mbt b/encoding/encoding_test.mbt
new file mode 100644
index 000000000..64a13a3a9
--- /dev/null
+++ b/encoding/encoding_test.mbt
@@ -0,0 +1,105 @@
+// Copyright 2024 International Digital Economy Academy
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+test "decoding String (UTF16LE) to String" {
+  let src = "你好👀"
+  let buf = @buffer.T::from_bytes(src.to_bytes())
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x60\x4f\x7d\x59\x3d\xd8\x40\xdc"
+    ,
+  )
+  let stream = @encoding.decode_lossy(encoding=UTF16LE, buf)
+  inspect!(String::from_iter(stream.iter()), content=src)
+}
+
+test "decoding UTF16LE to String" {
+  let src = "你好👀"
+  let buf = @buffer.T::new(size_hint=10)
+  buf.write_bytes(b"\x60\x4f")
+  buf.write_bytes(b"\x7d\x59")
+  buf.write_bytes(b"\x3d\xd8\x40\xdc")
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x60\x4f\x7d\x59\x3d\xd8\x40\xdc"
+    ,
+  )
+  let stream = @encoding.decode_lossy(encoding=UTF16LE, buf)
+  inspect!(String::from_iter(stream.iter()), content=src)
+}
+
+test "decoding UTF8 to String" {
+  let buf = @buffer.T::new(size_hint=10)
+  buf.write_bytes(b"\xe4\xbd\xa0")
+  buf.write_bytes(b"\xe5\xa5\xbd")
+  buf.write_bytes(b"\xf0\x9f\x91\x80")
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xe4\xbd\xa0\xe5\xa5\xbd\xf0\x9f\x91\x80"
+    ,
+  )
+  let stream = @encoding.decode_lossy(encoding=UTF8, buf)
+  inspect!(String::from_iter(stream.iter()), content="你好👀")
+}
+
+test "decoding encoded String (UTF16LE) to String" {
+  let src = "👋再见"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    buf.write_char(s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x3d\xd8\x4b\xdc\x8d\x51\xc1\x89"
+    ,
+  )
+  let stream = @encoding.decode_lossy(encoding=UTF16LE, buf)
+  inspect!(String::from_iter(stream.iter()), content=src)
+}
+
+test "decoding encoded UTF8 to String" {
+  let src = "👋再见"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    buf.write_utf8_char(s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xf0\x9f\x91\x8b\xe5\x86\x8d\xe8\xa7\x81"
+    ,
+  )
+  let stream = @encoding.decode_lossy(buf) // defaults to UTF8
+  inspect!(String::from_iter(stream.iter()), content=src)
+}
+
+test "decoding encoded UTF8" {
+  let src = "👋再见"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    buf.write_utf8_char(s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xf0\x9f\x91\x8b\xe5\x86\x8d\xe8\xa7\x81"
+    ,
+  )
+  let stream = @encoding.decode_lossy(buf) // defaults to UTF8
+  inspect!(stream.iter().collect(), content="['👋', '再', '见']")
+}
diff --git a/encoding/moon.pkg.json b/encoding/moon.pkg.json
new file mode 100644
index 000000000..eb6312aee
--- /dev/null
+++ b/encoding/moon.pkg.json
@@ -0,0 +1,15 @@
+{
+  "import": [
+    "moonbitlang/core/builtin",
+    "moonbitlang/core/buffer",
+    "moonbitlang/core/coverage",
+    "moonbitlang/core/string",
+    "moonbitlang/core/bytes",
+    "moonbitlang/core/tuple",
+    "moonbitlang/core/array",
+    "moonbitlang/core/char",
+    "moonbitlang/core/int"
+  ],
+  "test-import": [
+  ]
+}
diff --git a/encoding/types.mbt b/encoding/types.mbt
new file mode 100644
index 000000000..072b4d720
--- /dev/null
+++ b/encoding/types.mbt
@@ -0,0 +1,96 @@
+// Copyright 2024 International Digital Economy Academy
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+typealias Buffer = @buffer.T
+
+typealias Cont = (Decoder) -> Decode
+
+pub enum Encoding {
+  UTF8
+  UTF16
+  UTF16BE
+  UTF16LE
+}
+
+// Decoder
+
+priv struct Decoder {
+  mut i : Buffer
+  mut i_pos : Int
+  mut i_max : Int
+  t : Buffer
+  mut t_len : Int
+  mut t_need : Int
+  mut k : Cont
+}
+
+priv enum Decode {
+  End
+  Malformed(String)
+  Uchar(Char)
+}
+
+fn malformed(buf : Buffer, offset : Int, length : Int) -> Decode {
+  Malformed(buf.to_unchecked_string(~offset, ~length))
+}
+
+fn malformed_pair(
+  be : Bool,
+  hi : Int,
+  buf : Buffer,
+  offset : Int,
+  length : Int
+) -> Decode {
+  let bs1 = buf.to_unchecked_string(~offset, ~length).to_bytes()
+  let bs0 = Buffer::new(size_hint=2)
+  let (j0, j1) = if be { (0, 1) } else { (1, 0) }
+  bs0[j0] = (hi >> 8).to_byte()
+  bs0[j1] = hi.land(0xFF).to_byte()
+  let arr = bs0.to_array()
+  arr.append(bs1.to_array())
+  let bs = Buffer::from_array(arr)
+  Malformed(bs.to_unchecked_string(offset=0, length=bs.length()))
+}
+
+// Stream
+
+struct Stream {
+  decoder : Decoder
+  lossy : Bool
+}
+
+pub fn iter(self : Stream) -> Iter[Char] {
+  Iter::new(
+    fn(yield) {
+      loop self.decoder.decode() {
+        Uchar(u) => {
+          if yield(u) == IterEnd {
+            break IterEnd
+          }
+          continue self.decoder.decode()
+        }
+        Malformed(s) => {
+          if not(self.lossy) {
+            abort(s)
+          }
+          if yield(U_REP) == IterEnd {
+            break IterEnd
+          }
+          continue self.decoder.decode()
+        }
+        End => break IterEnd
+      }
+    },
+  )
+}

From 3be567b1b4d72ce245704fe6518d1f5c01f269ce Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Fri, 22 Nov 2024 12:43:02 +0800
Subject: [PATCH 05/25] feat: buffer `T::from{bytes, array}` api (placeholder)

---
 buffer/buffer.mbt | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/buffer/buffer.mbt b/buffer/buffer.mbt
index 89130c939..98db77203 100644
--- a/buffer/buffer.mbt
+++ b/buffer/buffer.mbt
@@ -100,6 +100,20 @@ pub fn T::new(size_hint~ : Int = 0) -> T {
   { data, len: 0, initial_data: data }
 }
 
+pub fn T::from_bytes(bytes : Bytes) -> T {
+  let buf = T::new(size_hint=bytes.length())
+  buf.write_bytes(bytes)
+  buf
+}
+
+pub fn T::from_array(arr : Array[Byte]) -> T {
+  let buf = T::new(size_hint=arr.length())
+  for byte in arr {
+    buf.write_byte(byte)
+  }
+  buf
+}
+
 ///|
 /// Write a string into buffer.
 pub fn write_string(self : T, value : String) -> Unit {

From 7abfb67567104d5ee99fb1d2918adf6bc217ec13 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Fri, 22 Nov 2024 12:43:17 +0800
Subject: [PATCH 06/25] feat: encoding api (placeholder)

---
 encoding/decoding.mbt | 25 +++++++++++++++----------
 encoding/types.mbt    | 26 ++++++++++++--------------
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/encoding/decoding.mbt b/encoding/decoding.mbt
index ab5598ab6..7d1589d89 100644
--- a/encoding/decoding.mbt
+++ b/encoding/decoding.mbt
@@ -28,7 +28,7 @@ let utf_8_len = [
   4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 ]
 
-fn r_utf_8(buf : Buffer, offset : Int, length : Int) -> Decode {
+fn r_utf_8(buf : @buffer.T, offset : Int, length : Int) -> Decode {
   fn uchar(c : Int) {
     Uchar(Char::from_int(c))
   }
@@ -112,7 +112,7 @@ fn r_utf_8(buf : Buffer, offset : Int, length : Int) -> Decode {
   }
 }
 
-fn r_utf_16(buf : Buffer, offset0 : Int, offset1 : Int) -> UTF16Decode {
+fn r_utf_16(buf : @buffer.T, offset0 : Int, offset1 : Int) -> UTF16Decode {
   let b0 = buf[offset0].to_int()
   let b1 = buf[offset1].to_int()
   let u = (b0 << 8) | b1
@@ -127,7 +127,12 @@ fn r_utf_16(buf : Buffer, offset0 : Int, offset1 : Int) -> UTF16Decode {
   }
 }
 
-fn r_utf_16_lo(hi : Int, buf : Buffer, offset0 : Int, offset1 : Int) -> Decode {
+fn r_utf_16_lo(
+  hi : Int,
+  buf : @buffer.T,
+  offset0 : Int,
+  offset1 : Int
+) -> Decode {
   let b0 = buf[offset0].to_int()
   let b1 = buf[offset1].to_int()
   let lo = (b0 << 8) | b1
@@ -148,11 +153,11 @@ fn decode(self : Decoder) -> Decode {
   (self.k)(self)
 }
 
-fn decoder(~encoding : Encoding, src : Buffer) -> Decoder {
+fn decoder(encoding~ : Encoding, src : @buffer.T) -> Decoder {
   let i = src
   let i_pos = 0
   let i_max = src.length() - 1
-  let t = Buffer::from_bytes(b"\x00\x00\x00\x00")
+  let t = @buffer.from_bytes(b"\x00\x00\x00\x00")
   let t_len = 0
   let t_need = 0
   let k = match encoding {
@@ -276,7 +281,7 @@ fn i_rem(self : Decoder) -> Int {
 }
 
 fn eoi(self : Decoder) -> Unit {
-  self.i = Buffer::new()
+  self.i = @buffer.new()
   self.i_pos = 0
   self.i_max = @int.min_value
 }
@@ -314,12 +319,12 @@ fn t_fill(k : Cont, decoder : Decoder) -> Decode {
   }
 }
 
-pub fn decode_lossy(~encoding : Encoding = UTF8, src : Buffer) -> Stream {
-  let decoder = decoder(~encoding, src)
+pub fn decode_lossy(encoding~ : Encoding = UTF8, src : @buffer.T) -> Stream {
+  let decoder = decoder(encoding~, src)
   { decoder, lossy: true }
 }
 
-pub fn decode_strict(~encoding : Encoding = UTF8, src : Buffer) -> Stream {
-  let decoder = decoder(~encoding, src)
+pub fn decode_strict(encoding~ : Encoding = UTF8, src : @buffer.T) -> Stream {
+  let decoder = decoder(encoding~, src)
   { decoder, lossy: false }
 }
diff --git a/encoding/types.mbt b/encoding/types.mbt
index 072b4d720..ba476103c 100644
--- a/encoding/types.mbt
+++ b/encoding/types.mbt
@@ -12,11 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-typealias Buffer = @buffer.T
-
 typealias Cont = (Decoder) -> Decode
 
-pub enum Encoding {
+pub(all) enum Encoding {
   UTF8
   UTF16
   UTF16BE
@@ -26,10 +24,10 @@ pub enum Encoding {
 // Decoder
 
 priv struct Decoder {
-  mut i : Buffer
+  mut i : @buffer.T
   mut i_pos : Int
   mut i_max : Int
-  t : Buffer
+  t : @buffer.T
   mut t_len : Int
   mut t_need : Int
   mut k : Cont
@@ -41,25 +39,25 @@ priv enum Decode {
   Uchar(Char)
 }
 
-fn malformed(buf : Buffer, offset : Int, length : Int) -> Decode {
-  Malformed(buf.to_unchecked_string(~offset, ~length))
+fn malformed(buf : @buffer.T, offset : Int, length : Int) -> Decode {
+  Malformed(buf.to_unchecked_string(offset~, length~))
 }
 
 fn malformed_pair(
   be : Bool,
   hi : Int,
-  buf : Buffer,
+  buf : @buffer.T,
   offset : Int,
   length : Int
 ) -> Decode {
-  let bs1 = buf.to_unchecked_string(~offset, ~length).to_bytes()
-  let bs0 = Buffer::new(size_hint=2)
+  let bs1 = buf.to_unchecked_string(offset~, length~).to_bytes()
+  let bs0 = @buffer.new(size_hint=2)
   let (j0, j1) = if be { (0, 1) } else { (1, 0) }
   bs0[j0] = (hi >> 8).to_byte()
   bs0[j1] = hi.land(0xFF).to_byte()
   let arr = bs0.to_array()
   arr.append(bs1.to_array())
-  let bs = Buffer::from_array(arr)
+  let bs = @buffer.from_array(arr)
   Malformed(bs.to_unchecked_string(offset=0, length=bs.length()))
 }
 
@@ -72,10 +70,10 @@ struct Stream {
 
 pub fn iter(self : Stream) -> Iter[Char] {
   Iter::new(
-    fn(yield) {
+    fn(yield_) {
       loop self.decoder.decode() {
         Uchar(u) => {
-          if yield(u) == IterEnd {
+          if yield_(u) == IterEnd {
             break IterEnd
           }
           continue self.decoder.decode()
@@ -84,7 +82,7 @@ pub fn iter(self : Stream) -> Iter[Char] {
           if not(self.lossy) {
             abort(s)
           }
-          if yield(U_REP) == IterEnd {
+          if yield_(U_REP) == IterEnd {
             break IterEnd
           }
           continue self.decoder.decode()

From 7f9fbd58b701aa98aebf3d6c1b94598a56607289 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Fri, 22 Nov 2024 12:43:33 +0800
Subject: [PATCH 07/25] moon info

---
 buffer/buffer.mbti     | 2 ++
 encoding/encoding.mbti | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/buffer/buffer.mbti b/buffer/buffer.mbti
index c53097c8f..019414de2 100644
--- a/buffer/buffer.mbti
+++ b/buffer/buffer.mbti
@@ -6,6 +6,8 @@ package moonbitlang/core/buffer
 type T
 impl T {
   blit(Self, Int, Self, Int, Int) -> Unit
+  from_array(Array[Byte]) -> Self
+  from_bytes(Bytes) -> Self
   is_empty(Self) -> Bool
   length(Self) -> Int
   new(size_hint~ : Int = ..) -> Self
diff --git a/encoding/encoding.mbti b/encoding/encoding.mbti
index f62a27936..38000d3c7 100644
--- a/encoding/encoding.mbti
+++ b/encoding/encoding.mbti
@@ -3,12 +3,12 @@ package moonbitlang/core/encoding
 alias @moonbitlang/core/buffer as @buffer
 
 // Values
-fn decode_lossy(~encoding : Encoding = .., @buffer.T) -> Stream
+fn decode_lossy(encoding~ : Encoding = .., @buffer.T) -> Stream
 
-fn decode_strict(~encoding : Encoding = .., @buffer.T) -> Stream
+fn decode_strict(encoding~ : Encoding = .., @buffer.T) -> Stream
 
 // Types and methods
-pub enum Encoding {
+pub(all) enum Encoding {
   UTF8
   UTF16
   UTF16BE

From 0fc9cd40d1eb646de23b7e6ea1ecc4954db39d8a Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Fri, 22 Nov 2024 12:49:11 +0800
Subject: [PATCH 08/25] moon fmt with block-style

---
 buffer/buffer.mbt     |  5 +++++
 builtin/bytes.mbt     |  1 +
 encoding/decoding.mbt | 22 ++++++++++++++++++++++
 encoding/types.mbt    |  8 ++++++++
 int/int.mbt           |  1 +
 string/string.mbt     |  1 +
 6 files changed, 38 insertions(+)

diff --git a/buffer/buffer.mbt b/buffer/buffer.mbt
index 98db77203..ad8672bba 100644
--- a/buffer/buffer.mbt
+++ b/buffer/buffer.mbt
@@ -100,12 +100,14 @@ pub fn T::new(size_hint~ : Int = 0) -> T {
   { data, len: 0, initial_data: data }
 }
 
+///|
 pub fn T::from_bytes(bytes : Bytes) -> T {
   let buf = T::new(size_hint=bytes.length())
   buf.write_bytes(bytes)
   buf
 }
 
+///|
 pub fn T::from_array(arr : Array[Byte]) -> T {
   let buf = T::new(size_hint=arr.length())
   for byte in arr {
@@ -169,6 +171,7 @@ pub fn write_char(self : T, value : Char) -> Unit {
   self.len += inc
 }
 
+///|
 pub fn write_utf8_char(self : T, value : Char) -> Unit {
   self.grow_if_necessary(self.len + 4)
   let inc = self.data.set_utf8_char(self.len, value)
@@ -183,6 +186,7 @@ pub fn write_byte(self : T, value : Byte) -> Unit {
   self.len += 1
 }
 
+///|
 pub fn blit(self : T, srcoff : Int, dst : T, dstoff : Int, len : Int) -> Unit {
   Bytes::blit(self.to_bytes(), srcoff, dst.to_bytes(), dstoff, len)
 }
@@ -208,6 +212,7 @@ pub fn op_set(self : T, index : Int, value : Byte) -> Unit {
   self.data[index] = value
 }
 
+///|
 pub fn op_get(self : T, index : Int) -> Byte {
   self.data[index]
 }
diff --git a/builtin/bytes.mbt b/builtin/bytes.mbt
index a77baefe4..2708d0c60 100644
--- a/builtin/bytes.mbt
+++ b/builtin/bytes.mbt
@@ -168,6 +168,7 @@ pub fn set_utf8_char(self : Bytes, offset : Int, value : Char) -> Int {
   }
 }
 
+///|
 pub fn set_utf8_char(
   self : FixedArray[Byte],
   offset : Int,
diff --git a/encoding/decoding.mbt b/encoding/decoding.mbt
index 7d1589d89..e19e8fd4f 100644
--- a/encoding/decoding.mbt
+++ b/encoding/decoding.mbt
@@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+///|
 const U_REP = '\u{FFFD}'
 
+///|
 // consider const
 let utf_8_len = [
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -28,6 +30,7 @@ let utf_8_len = [
   4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 ]
 
+///|
 fn r_utf_8(buf : @buffer.T, offset : Int, length : Int) -> Decode {
   fn uchar(c : Int) {
     Uchar(Char::from_int(c))
@@ -112,6 +115,7 @@ fn r_utf_8(buf : @buffer.T, offset : Int, length : Int) -> Decode {
   }
 }
 
+///|
 fn r_utf_16(buf : @buffer.T, offset0 : Int, offset1 : Int) -> UTF16Decode {
   let b0 = buf[offset0].to_int()
   let b1 = buf[offset1].to_int()
@@ -127,6 +131,7 @@ fn r_utf_16(buf : @buffer.T, offset0 : Int, offset1 : Int) -> UTF16Decode {
   }
 }
 
+///|
 fn r_utf_16_lo(
   hi : Int,
   buf : @buffer.T,
@@ -149,10 +154,12 @@ fn r_utf_16_lo(
   }
 }
 
+///|
 fn decode(self : Decoder) -> Decode {
   (self.k)(self)
 }
 
+///|
 fn decoder(encoding~ : Encoding, src : @buffer.T) -> Decoder {
   let i = src
   let i_pos = 0
@@ -169,17 +176,20 @@ fn decoder(encoding~ : Encoding, src : @buffer.T) -> Decoder {
   { i, i_pos, i_max, t, t_len, t_need, k }
 }
 
+///|
 fn ret(self : Decoder, k : Cont, v : Decode) -> Decode {
   self.k = k
   v
 }
 
+///|
 priv enum UTF16Decode {
   Hi(Int)
   UTF16Malformed(String)
   UTF16Uchar(Char)
 }
 
+///|
 fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
   if decoder.t_len < decoder.t_need {
     decoder.ret(
@@ -191,6 +201,7 @@ fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
   }
 }
 
+///|
 fn t_decode_utf_16le(self : Decoder) -> Decode {
   if self.t_len < self.t_need {
     self.ret(decode_utf_16le, malformed(self.t, 0, self.t_len))
@@ -199,6 +210,7 @@ fn t_decode_utf_16le(self : Decoder) -> Decode {
   }
 }
 
+///|
 fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
   match v {
     UTF16Uchar(u) => Uchar(u)
@@ -217,6 +229,7 @@ fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
   }
 }
 
+///|
 fn decode_utf_16le(self : Decoder) -> Decode {
   let rem = self.i_rem()
   match rem.compare(0) {
@@ -239,6 +252,7 @@ fn decode_utf_16le(self : Decoder) -> Decode {
   }
 }
 
+///|
 fn t_decode_utf_8(self : Decoder) -> Decode {
   if self.t_len < self.t_need {
     malformed(self.t, 0, self.t_len)
@@ -247,6 +261,7 @@ fn t_decode_utf_8(self : Decoder) -> Decode {
   }
 }
 
+///|
 fn decode_utf_8(self : Decoder) -> Decode {
   let rem = self.i_rem()
   match rem.compare(0) {
@@ -276,27 +291,32 @@ fn decode_utf_8(self : Decoder) -> Decode {
   }
 }
 
+///|
 fn i_rem(self : Decoder) -> Int {
   self.i_max - self.i_pos + 1
 }
 
+///|
 fn eoi(self : Decoder) -> Unit {
   self.i = @buffer.new()
   self.i_pos = 0
   self.i_max = @int.min_value
 }
 
+///|
 fn refill(self : Decoder, k : Cont) -> Decode {
   // only Buffer
   self.eoi()
   k(self)
 }
 
+///|
 fn t_need(self : Decoder, need : Int) -> Unit {
   self.t_len = 0
   self.t_need = need
 }
 
+///|
 fn t_fill(k : Cont, decoder : Decoder) -> Decode {
   fn blit(decoder : Decoder, l : Int) -> Unit {
     decoder.i.blit(decoder.i_pos, decoder.t, decoder.t_len, l)
@@ -319,11 +339,13 @@ fn t_fill(k : Cont, decoder : Decoder) -> Decode {
   }
 }
 
+///|
 pub fn decode_lossy(encoding~ : Encoding = UTF8, src : @buffer.T) -> Stream {
   let decoder = decoder(encoding~, src)
   { decoder, lossy: true }
 }
 
+///|
 pub fn decode_strict(encoding~ : Encoding = UTF8, src : @buffer.T) -> Stream {
   let decoder = decoder(encoding~, src)
   { decoder, lossy: false }
diff --git a/encoding/types.mbt b/encoding/types.mbt
index ba476103c..fdd397d3d 100644
--- a/encoding/types.mbt
+++ b/encoding/types.mbt
@@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+///|
 typealias Cont = (Decoder) -> Decode
 
+///|
 pub(all) enum Encoding {
   UTF8
   UTF16
@@ -23,6 +25,7 @@ pub(all) enum Encoding {
 
 // Decoder
 
+///|
 priv struct Decoder {
   mut i : @buffer.T
   mut i_pos : Int
@@ -33,16 +36,19 @@ priv struct Decoder {
   mut k : Cont
 }
 
+///|
 priv enum Decode {
   End
   Malformed(String)
   Uchar(Char)
 }
 
+///|
 fn malformed(buf : @buffer.T, offset : Int, length : Int) -> Decode {
   Malformed(buf.to_unchecked_string(offset~, length~))
 }
 
+///|
 fn malformed_pair(
   be : Bool,
   hi : Int,
@@ -63,11 +69,13 @@ fn malformed_pair(
 
 // Stream
 
+///|
 struct Stream {
   decoder : Decoder
   lossy : Bool
 }
 
+///|
 pub fn iter(self : Stream) -> Iter[Char] {
   Iter::new(
     fn(yield_) {
diff --git a/int/int.mbt b/int/int.mbt
index 5aa82f510..753d0bb6b 100644
--- a/int/int.mbt
+++ b/int/int.mbt
@@ -42,6 +42,7 @@ pub fn abs(self : Int) -> Int {
   }
 }
 
+///|
 pub fn minimum(self : Int, x : Int) -> Int {
   if self > x {
     x
diff --git a/string/string.mbt b/string/string.mbt
index a320c9761..d436ab83a 100644
--- a/string/string.mbt
+++ b/string/string.mbt
@@ -31,6 +31,7 @@ pub fn String::from_array(chars : Array[Char]) -> String {
   buf.to_string()
 }
 
+///|
 pub fn String::from_iter(iter : Iter[Char]) -> String {
   let chars = iter.collect()
   String::from_array(chars)

From aad5d6b1d68e86dc28571e196578fcf9baf8eecc Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Fri, 22 Nov 2024 13:20:36 +0800
Subject: [PATCH 09/25] some tests update

---
 encoding/encoding_test.mbt | 88 +++++++++++++++++++++++++++++++++++---
 1 file changed, 82 insertions(+), 6 deletions(-)

diff --git a/encoding/encoding_test.mbt b/encoding/encoding_test.mbt
index 64a13a3a9..f8fab85ad 100644
--- a/encoding/encoding_test.mbt
+++ b/encoding/encoding_test.mbt
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-test "decoding String (UTF16LE) to String" {
+test "lossy decoding String (UTF16LE encoded) to String" {
   let src = "你好👀"
   let buf = @buffer.T::from_bytes(src.to_bytes())
   inspect!(
@@ -25,7 +25,7 @@ test "decoding String (UTF16LE) to String" {
   inspect!(String::from_iter(stream.iter()), content=src)
 }
 
-test "decoding UTF16LE to String" {
+test "lossy decoding UTF16LE encoded data to String" {
   let src = "你好👀"
   let buf = @buffer.T::new(size_hint=10)
   buf.write_bytes(b"\x60\x4f")
@@ -41,7 +41,7 @@ test "decoding UTF16LE to String" {
   inspect!(String::from_iter(stream.iter()), content=src)
 }
 
-test "decoding UTF8 to String" {
+test "lossy decoding UTF8 encoded data to String" {
   let buf = @buffer.T::new(size_hint=10)
   buf.write_bytes(b"\xe4\xbd\xa0")
   buf.write_bytes(b"\xe5\xa5\xbd")
@@ -56,7 +56,7 @@ test "decoding UTF8 to String" {
   inspect!(String::from_iter(stream.iter()), content="你好👀")
 }
 
-test "decoding encoded String (UTF16LE) to String" {
+test "lossy decoding String (UTF16LE encoded) to String" {
   let src = "👋再见"
   let buf = @buffer.T::new(size_hint=10)
   for s in src {
@@ -72,7 +72,7 @@ test "decoding encoded String (UTF16LE) to String" {
   inspect!(String::from_iter(stream.iter()), content=src)
 }
 
-test "decoding encoded UTF8 to String" {
+test "lossy decoding UTF8 encoded data to String" {
   let src = "👋再见"
   let buf = @buffer.T::new(size_hint=10)
   for s in src {
@@ -88,7 +88,7 @@ test "decoding encoded UTF8 to String" {
   inspect!(String::from_iter(stream.iter()), content=src)
 }
 
-test "decoding encoded UTF8" {
+test "lossy decoding UTF8 encoded data" {
   let src = "👋再见"
   let buf = @buffer.T::new(size_hint=10)
   for s in src {
@@ -103,3 +103,79 @@ test "decoding encoded UTF8" {
   let stream = @encoding.decode_lossy(buf) // defaults to UTF8
   inspect!(stream.iter().collect(), content="['👋', '再', '见']")
 }
+
+test "lossy decoding UTF8 encoded data with UTF16LE" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    buf.write_char(s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xd1\x8d\x65\x6b\x3c\xd8\xc3\xdf\x38\x6e\xf3\x6c\x3c\xd8\xca\xdf"
+    ,
+  )
+  let stream = @encoding.decode_lossy(buf) // defaults to UTF8
+  inspect!(
+    stream.iter().collect(),
+    content="['э', 'e', 'k', '<', '�', '�', 'n', '�', '�']",
+  )
+}
+
+test "lossy decoding UTF16LE encoded data with UTF8" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    buf.write_utf8_char(s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
+    ,
+  )
+  let stream = @encoding.decode_lossy(encoding=UTF16LE, buf) // defaults to UTF8
+  inspect!(
+    stream.iter().collect(),
+    content="['럨', '', 'ꖭ', '鿰', '莏', '룦', '', '뎳', '鿰', '誏']",
+  )
+}
+
+// test "scrictly decoding UTF8 encoded data with UTF16LE" {
+//   let src = "跑步🏃游泳🏊"
+//   let buf = @buffer.T::new(size_hint=10)
+//   for s in src {
+//     buf.write_char(s)
+//   }
+//   inspect!(
+//     buf.to_bytes(),
+//     content=
+//       #|b"\xd1\x8d\x65\x6b\x3c\xd8\xc3\xdf\x38\x6e\xf3\x6c\x3c\xd8\xca\xdf"
+//     ,
+//   )
+//   let stream = @encoding.decode_strict(buf) // defaults to UTF8
+//   inspect!(
+//     stream.iter().collect(),
+//     content="['э', 'e', 'k', '<', '�', '�', 'n', '�', '�']",
+//   )
+// }
+//
+// test "scrictly decoding UTF16LE encoded data with UTF8" {
+//   let src = "跑步🏃游泳🏊"
+//   let buf = @buffer.T::new(size_hint=10)
+//   for s in src {
+//     buf.write_utf8_char(s)
+//   }
+//   inspect!(
+//     buf.to_bytes(),
+//     content=
+//       #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
+//     ,
+//   )
+//   let stream = @encoding.decode_strict(encoding=UTF16LE, buf)
+//   inspect!(
+//     stream.iter().collect(),
+//     content="['럨', '', 'ꖭ', '鿰', '莏', '룦', '', '뎳', '鿰', '誏']",
+//   )
+// }

From 763135c0599672f6776315f876fbe09a403e8fa6 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Nov 2024 15:21:54 +0800
Subject: [PATCH 10/25] refactor: guard buffer op_{get, set}

---
 buffer/buffer.mbt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/buffer/buffer.mbt b/buffer/buffer.mbt
index ad8672bba..83e4e6fcf 100644
--- a/buffer/buffer.mbt
+++ b/buffer/buffer.mbt
@@ -209,11 +209,15 @@ pub fn to_array(self : T) -> Array[Byte] {
 
 ///|
 pub fn op_set(self : T, index : Int, value : Byte) -> Unit {
+  let len = self.length()
+  guard index >= 0 && index < len
   self.data[index] = value
 }
 
 ///|
 pub fn op_get(self : T, index : Int) -> Byte {
+  let len = self.length()
+  guard index >= 0 && index < len
   self.data[index]
 }
 

From bda6d066de2045dee7bc263e82b131fbbe5f90bf Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Nov 2024 15:22:20 +0800
Subject: [PATCH 11/25] chore

---
 encoding/encoding_test.mbt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/encoding/encoding_test.mbt b/encoding/encoding_test.mbt
index f8fab85ad..a71ee7200 100644
--- a/encoding/encoding_test.mbt
+++ b/encoding/encoding_test.mbt
@@ -135,7 +135,7 @@ test "lossy decoding UTF16LE encoded data with UTF8" {
       #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
     ,
   )
-  let stream = @encoding.decode_lossy(encoding=UTF16LE, buf) // defaults to UTF8
+  let stream = @encoding.decode_lossy(encoding=UTF16LE, buf)
   inspect!(
     stream.iter().collect(),
     content="['럨', '', 'ꖭ', '鿰', '莏', '룦', '', '뎳', '鿰', '誏']",

From a09296affd263c85be0ccea9e2049cdf611c1354 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Nov 2024 15:28:36 +0800
Subject: [PATCH 12/25] feat: use optional params instead of laballed dep in
 buffer method

---
 buffer/buffer.mbt | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/buffer/buffer.mbt b/buffer/buffer.mbt
index 83e4e6fcf..a8404cf81 100644
--- a/buffer/buffer.mbt
+++ b/buffer/buffer.mbt
@@ -84,11 +84,15 @@ pub fn to_string(self : T) -> String {
 /// Return a new unchecked string contains the data in buffer.
 /// Note this function does not validate the encoding of the byte sequence, 
 /// it simply copy the bytes into a new String.
-pub fn to_unchecked_string(
-  self : T,
-  offset~ : Int = 0,
-  length~ : Int = self.len
-) -> String {
+pub fn to_unchecked_string(self : T, offset? : Int, length? : Int) -> String {
+  let offset = match offset {
+    None => 0
+    Some(x) => x
+  }
+  let length = match length {
+    None => self.len
+    Some(x) => x
+  }
   Bytes::from_fixedarray(self.data).to_unchecked_string(offset~, length~)
 }
 

From 1df6c3d2e24379d3a16311f0977b0f4553ca15b2 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Nov 2024 16:15:03 +0800
Subject: [PATCH 13/25] moon info

---
 buffer/buffer.mbti | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/buffer/buffer.mbti b/buffer/buffer.mbti
index 019414de2..28a79725b 100644
--- a/buffer/buffer.mbti
+++ b/buffer/buffer.mbti
@@ -17,7 +17,7 @@ impl T {
   to_array(Self) -> Array[Byte]
   to_bytes(Self) -> Bytes
   to_string(Self) -> String //deprecated
-  to_unchecked_string(Self, offset~ : Int = .., length~ : Int = ..) -> String
+  to_unchecked_string(Self, offset? : Int, length? : Int) -> String
   write_byte(Self, Byte) -> Unit
   write_bytes(Self, Bytes) -> Unit
   write_char(Self, Char) -> Unit

From 471354dcba28852b8dae96f1b01d2ee7e461ddc9 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Nov 2024 16:48:59 +0800
Subject: [PATCH 14/25] feat: builtin try_collect

---
 builtin/builtin.mbti |  1 +
 builtin/iter.mbt     | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/builtin/builtin.mbti b/builtin/builtin.mbti
index d1c4ed4fd..1df5121a5 100644
--- a/builtin/builtin.mbti
+++ b/builtin/builtin.mbti
@@ -246,6 +246,7 @@ impl Iter {
   tap[T](Self[T], (T) -> Unit) -> Self[T] //deprecated
   to_array[T](Self[T]) -> Array[T]
   to_string[T : Show](Self[T]) -> String
+  try_collect[T, E : Error](Self[Result[T, E]]) -> Array[T]!E
 }
 impl[T : Show] Show for Iter[T]
 
diff --git a/builtin/iter.mbt b/builtin/iter.mbt
index 28e03ba78..75a2f6263 100644
--- a/builtin/iter.mbt
+++ b/builtin/iter.mbt
@@ -786,6 +786,19 @@ pub fn collect[T](self : Iter[T]) -> Array[T] {
   result
 }
 
+///|
+/// Collects the elements of the iterator into an array.
+pub fn try_collect[T, E : Error](self : Iter[Result[T, E]]) -> Array[T]!E {
+  let result = []
+  for a in self {
+    match a {
+      Ok(x) => result.push(x)
+      Err(e) => raise e
+    }
+  }
+  result
+}
+
 ///|
 /// Iter itself is an iterator.
 /// so that it works with array spread operator. e.g, `[..iter]`

From fa81954c0c59b37297409339de9d703c923b5626 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Nov 2024 16:49:20 +0800
Subject: [PATCH 15/25] feat: StrictChars

---
 encoding/decoding.mbt      | 11 +++---
 encoding/encoding.mbti     | 16 ++++++---
 encoding/encoding_test.mbt | 71 ++++++++++++++++++--------------------
 encoding/types.mbt         | 57 +++++++++++++++++++++++-------
 4 files changed, 97 insertions(+), 58 deletions(-)

diff --git a/encoding/decoding.mbt b/encoding/decoding.mbt
index e19e8fd4f..e6b8dd5ef 100644
--- a/encoding/decoding.mbt
+++ b/encoding/decoding.mbt
@@ -340,13 +340,16 @@ fn t_fill(k : Cont, decoder : Decoder) -> Decode {
 }
 
 ///|
-pub fn decode_lossy(encoding~ : Encoding = UTF8, src : @buffer.T) -> Stream {
+pub fn decode_lossy(encoding~ : Encoding = UTF8, src : @buffer.T) -> LossyChars {
   let decoder = decoder(encoding~, src)
-  { decoder, lossy: true }
+  decoder
 }
 
 ///|
-pub fn decode_strict(encoding~ : Encoding = UTF8, src : @buffer.T) -> Stream {
+pub fn decode_strict(
+  encoding~ : Encoding = UTF8,
+  src : @buffer.T
+) -> StrictChars {
   let decoder = decoder(encoding~, src)
-  { decoder, lossy: false }
+  decoder
 }
diff --git a/encoding/encoding.mbti b/encoding/encoding.mbti
index 38000d3c7..efbe7c65e 100644
--- a/encoding/encoding.mbti
+++ b/encoding/encoding.mbti
@@ -3,11 +3,14 @@ package moonbitlang/core/encoding
 alias @moonbitlang/core/buffer as @buffer
 
 // Values
-fn decode_lossy(encoding~ : Encoding = .., @buffer.T) -> Stream
+fn decode_lossy(encoding~ : Encoding = .., @buffer.T) -> LossyChars
 
-fn decode_strict(encoding~ : Encoding = .., @buffer.T) -> Stream
+fn decode_strict(encoding~ : Encoding = .., @buffer.T) -> StrictChars
 
 // Types and methods
+type DecodeError
+impl Show for DecodeError
+
 pub(all) enum Encoding {
   UTF8
   UTF16
@@ -15,11 +18,16 @@ pub(all) enum Encoding {
   UTF16LE
 }
 
-type Stream
-impl Stream {
+type LossyChars
+impl LossyChars {
   iter(Self) -> Iter[Char]
 }
 
+type StrictChars
+impl StrictChars {
+  iter(Self) -> Iter[Result[Char, DecodeError]]
+}
+
 // Type aliases
 
 // Traits
diff --git a/encoding/encoding_test.mbt b/encoding/encoding_test.mbt
index a71ee7200..2000d60c3 100644
--- a/encoding/encoding_test.mbt
+++ b/encoding/encoding_test.mbt
@@ -142,40 +142,37 @@ test "lossy decoding UTF16LE encoded data with UTF8" {
   )
 }
 
-// test "scrictly decoding UTF8 encoded data with UTF16LE" {
-//   let src = "跑步🏃游泳🏊"
-//   let buf = @buffer.T::new(size_hint=10)
-//   for s in src {
-//     buf.write_char(s)
-//   }
-//   inspect!(
-//     buf.to_bytes(),
-//     content=
-//       #|b"\xd1\x8d\x65\x6b\x3c\xd8\xc3\xdf\x38\x6e\xf3\x6c\x3c\xd8\xca\xdf"
-//     ,
-//   )
-//   let stream = @encoding.decode_strict(buf) // defaults to UTF8
-//   inspect!(
-//     stream.iter().collect(),
-//     content="['э', 'e', 'k', '<', '�', '�', 'n', '�', '�']",
-//   )
-// }
-//
-// test "scrictly decoding UTF16LE encoded data with UTF8" {
-//   let src = "跑步🏃游泳🏊"
-//   let buf = @buffer.T::new(size_hint=10)
-//   for s in src {
-//     buf.write_utf8_char(s)
-//   }
-//   inspect!(
-//     buf.to_bytes(),
-//     content=
-//       #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
-//     ,
-//   )
-//   let stream = @encoding.decode_strict(encoding=UTF16LE, buf)
-//   inspect!(
-//     stream.iter().collect(),
-//     content="['럨', '', 'ꖭ', '鿰', '莏', '룦', '', '뎳', '鿰', '誏']",
-//   )
-// }
+test "scrictly decoding UTF8 encoded data with UTF16LE" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    buf.write_char(s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xd1\x8d\x65\x6b\x3c\xd8\xc3\xdf\x38\x6e\xf3\x6c\x3c\xd8\xca\xdf"
+    ,
+  )
+  let stream = @encoding.decode_strict(buf) // defaults to UTF8
+  inspect!(stream.iter().try_collect?(), content="Err(쏘)")
+}
+
+test "scrictly decoding UTF16LE encoded data with UTF8" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    buf.write_utf8_char(s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
+    ,
+  )
+  let stream = @encoding.decode_strict(encoding=UTF16LE, buf)
+  inspect!(
+    stream.iter().try_collect?(),
+    content="Ok(['럨', '', 'ꖭ', '鿰', '莏', '룦', '', '뎳', '鿰', '誏'])",
+  )
+}
diff --git a/encoding/types.mbt b/encoding/types.mbt
index fdd397d3d..de8fa9a30 100644
--- a/encoding/types.mbt
+++ b/encoding/types.mbt
@@ -67,33 +67,64 @@ fn malformed_pair(
   Malformed(bs.to_unchecked_string(offset=0, length=bs.length()))
 }
 
-// Stream
+// Chars
 
 ///|
-struct Stream {
-  decoder : Decoder
-  lossy : Bool
-}
+type LossyChars Decoder
 
 ///|
-pub fn iter(self : Stream) -> Iter[Char] {
+pub fn iter(self : LossyChars) -> Iter[Char] {
   Iter::new(
     fn(yield_) {
-      loop self.decoder.decode() {
+      loop self._.decode() {
         Uchar(u) => {
           if yield_(u) == IterEnd {
             break IterEnd
           }
-          continue self.decoder.decode()
+          continue self._.decode()
         }
-        Malformed(s) => {
-          if not(self.lossy) {
-            abort(s)
-          }
+        Malformed(_) => {
           if yield_(U_REP) == IterEnd {
             break IterEnd
           }
-          continue self.decoder.decode()
+          continue self._.decode()
+        }
+        End => break IterEnd
+      }
+    },
+  )
+}
+
+///|
+type StrictChars Decoder
+
+///|
+type! DecodeError String
+
+///|
+pub impl Show for DecodeError with output(self, logger) {
+  match self {
+    DecodeError(err) => logger.write_string(err)
+  }
+}
+
+///|
+pub fn iter(self : StrictChars) -> Iter[Result[Char, DecodeError]] {
+  Iter::new(
+    fn(yield_) {
+      loop self._.decode() {
+        Uchar(u) => {
+          if yield_(Ok(u)) == IterEnd {
+            break IterEnd
+          }
+          continue self._.decode()
+        }
+        Malformed(s) => {
+          let err = DecodeError(s)
+          if yield_(Err(err)) == IterEnd {
+            break IterEnd
+          }
+          continue self._.decode()
         }
         End => break IterEnd
       }

From 775533a01c586b46cc61a03403ca094e46ddb0a0 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Nov 2024 17:25:07 +0800
Subject: [PATCH 16/25] refactor

---
 encoding/decoding.mbt | 373 +++++++++++++++++++++---------------------
 1 file changed, 189 insertions(+), 184 deletions(-)

diff --git a/encoding/decoding.mbt b/encoding/decoding.mbt
index e6b8dd5ef..6e0d1c74c 100644
--- a/encoding/decoding.mbt
+++ b/encoding/decoding.mbt
@@ -16,7 +16,6 @@
 const U_REP = '\u{FFFD}'
 
 ///|
-// consider const
 let utf_8_len = [
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -30,6 +29,140 @@ let utf_8_len = [
   4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 ]
 
+///|
+pub fn decode_lossy(encoding~ : Encoding = UTF8, src : @buffer.T) -> LossyChars {
+  let decoder = decoder(encoding~, src)
+  decoder
+}
+
+///|
+pub fn decode_strict(
+  encoding~ : Encoding = UTF8,
+  src : @buffer.T
+) -> StrictChars {
+  let decoder = decoder(encoding~, src)
+  decoder
+}
+
+// Implements
+
+///|
+fn decoder(encoding~ : Encoding, src : @buffer.T) -> Decoder {
+  let i = src
+  let i_pos = 0
+  let i_max = src.length() - 1
+  let t = @buffer.from_bytes(b"\x00\x00\x00\x00")
+  let t_len = 0
+  let t_need = 0
+  let k = match encoding {
+    UTF8 => decode_utf_8
+    UTF16 => decode_utf_16be // TODO: BE
+    UTF16BE => decode_utf_16be
+    UTF16LE => decode_utf_16le
+  }
+  { i, i_pos, i_max, t, t_len, t_need, k }
+}
+
+///|
+fn decode(self : Decoder) -> Decode {
+  (self.k)(self)
+}
+
+///|
+fn ret(self : Decoder, k : Cont, v : Decode) -> Decode {
+  self.k = k
+  v
+}
+
+///|
+fn i_rem(self : Decoder) -> Int {
+  self.i_max - self.i_pos + 1
+}
+
+///|
+fn eoi(self : Decoder) -> Unit {
+  self.i = @buffer.new()
+  self.i_pos = 0
+  self.i_max = @int.min_value
+}
+
+///|
+fn refill(self : Decoder, k : Cont) -> Decode {
+  // only Buffer
+  self.eoi()
+  k(self)
+}
+
+///|
+fn t_need(self : Decoder, need : Int) -> Unit {
+  self.t_len = 0
+  self.t_need = need
+}
+
+///|
+fn t_fill(k : Cont, decoder : Decoder) -> Decode {
+  fn blit(decoder : Decoder, l : Int) -> Unit {
+    decoder.i.blit(decoder.i_pos, decoder.t, decoder.t_len, l)
+    decoder.i_pos = decoder.i_pos + 1
+    decoder.t_len = decoder.t_len + 1
+  }
+
+  let rem = decoder.i_rem()
+  if rem < 0 { // eoi
+    k(decoder)
+  } else {
+    let need = decoder.t_need - decoder.t_len
+    if rem < need {
+      blit(decoder, rem)
+      decoder.refill(@tuple.curry(t_fill)(k))
+    } else {
+      blit(decoder, need)
+      k(decoder)
+    }
+  }
+}
+
+// UTF8
+
+///|
+fn decode_utf_8(self : Decoder) -> Decode {
+  let rem = self.i_rem()
+  match rem.compare(0) {
+    // rem < 0
+    -1 => Decode::End
+    // rem = 0
+    0 => self.refill(decode_utf_8)
+    // rem > 0
+    1 => {
+      let idx = self.i[self.i_pos].to_int()
+      let need = utf_8_len[idx]
+      if rem < need {
+        self.t_need(need)
+        t_fill(t_decode_utf_8, self)
+      } else {
+        let j = self.i_pos
+        if need == 0 {
+          self.i_pos = self.i_pos + 1
+          self.ret(decode_utf_8, malformed(self.i, j, 1))
+        } else {
+          self.i_pos = self.i_pos + need
+          self.ret(decode_utf_8, r_utf_8(self.i, j, need))
+        }
+      }
+    }
+    _ => abort("unreachable")
+  }
+}
+
+///|
+fn t_decode_utf_8(self : Decoder) -> Decode {
+  if self.t_len < self.t_need {
+    malformed(self.t, 0, self.t_len)
+  } else {
+    r_utf_8(self.t, 0, self.t_len)
+  }
+}
+
 ///|
 fn r_utf_8(buf : @buffer.T, offset : Int, length : Int) -> Decode {
   fn uchar(c : Int) {
@@ -115,72 +248,7 @@ fn r_utf_8(buf : @buffer.T, offset : Int, length : Int) -> Decode {
   }
 }
 
-///|
-fn r_utf_16(buf : @buffer.T, offset0 : Int, offset1 : Int) -> UTF16Decode {
-  let b0 = buf[offset0].to_int()
-  let b1 = buf[offset1].to_int()
-  let u = (b0 << 8) | b1
-  if u < 0xD800 || u > 0xDFFF {
-    UTF16Uchar(Char::from_int(u))
-  } else if u > 0xDBFF {
-    UTF16Malformed(
-      buf.to_unchecked_string(offset=@int.minimum(offset0, offset1), length=2),
-    )
-  } else {
-    Hi(u)
-  }
-}
-
-///|
-fn r_utf_16_lo(
-  hi : Int,
-  buf : @buffer.T,
-  offset0 : Int,
-  offset1 : Int
-) -> Decode {
-  let b0 = buf[offset0].to_int()
-  let b1 = buf[offset1].to_int()
-  let lo = (b0 << 8) | b1
-  if lo < 0xDC00 || lo > 0xDFFF {
-    malformed_pair(
-      offset0 < offset1,
-      hi,
-      buf,
-      @int.minimum(offset0, offset1),
-      2,
-    )
-  } else {
-    Uchar(Char::from_int(((hi & 0x3FF) << 10) | ((lo & 0x3FF) + 0x10000)))
-  }
-}
-
-///|
-fn decode(self : Decoder) -> Decode {
-  (self.k)(self)
-}
-
-///|
-fn decoder(encoding~ : Encoding, src : @buffer.T) -> Decoder {
-  let i = src
-  let i_pos = 0
-  let i_max = src.length() - 1
-  let t = @buffer.from_bytes(b"\x00\x00\x00\x00")
-  let t_len = 0
-  let t_need = 0
-  let k = match encoding {
-    UTF8 => decode_utf_8
-    UTF16 => decode_utf_16le
-    UTF16BE => decode_utf_16le
-    UTF16LE => decode_utf_16le
-  }
-  { i, i_pos, i_max, t, t_len, t_need, k }
-}
-
-///|
-fn ret(self : Decoder, k : Cont, v : Decode) -> Decode {
-  self.k = k
-  v
-}
+// UTF16LE
 
 ///|
 priv enum UTF16Decode {
@@ -189,46 +257,6 @@ priv enum UTF16Decode {
   UTF16Uchar(Char)
 }
 
-///|
-fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
-  if decoder.t_len < decoder.t_need {
-    decoder.ret(
-      decode_utf_16le,
-      malformed_pair(false, hi, decoder.t, 0, decoder.t_len),
-    )
-  } else {
-    decoder.ret(decode_utf_16le, r_utf_16_lo(hi, decoder.t, 1, 0))
-  }
-}
-
-///|
-fn t_decode_utf_16le(self : Decoder) -> Decode {
-  if self.t_len < self.t_need {
-    self.ret(decode_utf_16le, malformed(self.t, 0, self.t_len))
-  } else {
-    self.decode_utf_16le_lo(r_utf_16(self.t, 1, 0))
-  }
-}
-
-///|
-fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
-  match v {
-    UTF16Uchar(u) => Uchar(u)
-    UTF16Malformed(s) => Malformed(s)
-    Hi(hi) => {
-      let rem = self.i_rem()
-      if rem < 2 {
-        self.t_need(2)
-        t_fill(@tuple.curry(t_decode_utf_16le_lo)(hi), self)
-      } else {
-        let j = self.i_pos
-        self.i_pos = self.i_pos + 2
-        r_utf_16_lo(hi, self.i, j + 1, j)
-      }
-    }
-  }
-}
-
 ///|
 fn decode_utf_16le(self : Decoder) -> Decode {
   let rem = self.i_rem()
@@ -253,103 +281,80 @@ fn decode_utf_16le(self : Decoder) -> Decode {
 }
 
 ///|
-fn t_decode_utf_8(self : Decoder) -> Decode {
+fn t_decode_utf_16le(self : Decoder) -> Decode {
   if self.t_len < self.t_need {
-    malformed(self.t, 0, self.t_len)
+    self.ret(decode_utf_16le, malformed(self.t, 0, self.t_len))
   } else {
-    r_utf_8(self.t, 0, self.t_len)
+    self.decode_utf_16le_lo(r_utf_16(self.t, 1, 0))
   }
 }
 
 ///|
-fn decode_utf_8(self : Decoder) -> Decode {
-  let rem = self.i_rem()
-  match rem.compare(0) {
-    // rem < 0
-    -1 => Decode::End
-    // rem = 0
-    0 => self.refill(decode_utf_8)
-    // rem > 0
-    1 => {
-      let idx = self.i[self.i_pos].to_int()
-      let need = utf_8_len[idx]
-      if rem < need {
-        self.t_need(need)
-        t_fill(t_decode_utf_8, self)
+fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
+  match v {
+    UTF16Uchar(u) => Uchar(u)
+    UTF16Malformed(s) => Malformed(s)
+    Hi(hi) => {
+      let rem = self.i_rem()
+      if rem < 2 {
+        self.t_need(2)
+        t_fill(@tuple.curry(t_decode_utf_16le_lo)(hi), self)
       } else {
         let j = self.i_pos
-        if need == 0 {
-          self.i_pos = self.i_pos + 1
-          self.ret(decode_utf_8, malformed(self.i, j, 1))
-        } else {
-          self.i_pos = self.i_pos + need
-          self.ret(decode_utf_8, r_utf_8(self.i, j, need))
-        }
+        self.i_pos = self.i_pos + 2
+        r_utf_16_lo(hi, self.i, j + 1, j)
       }
     }
-    _ => abort("unreachable")
   }
 }
 
 ///|
-fn i_rem(self : Decoder) -> Int {
-  self.i_max - self.i_pos + 1
-}
-
-///|
-fn eoi(self : Decoder) -> Unit {
-  self.i = @buffer.new()
-  self.i_pos = 0
-  self.i_max = @int.min_value
-}
-
-///|
-fn refill(self : Decoder, k : Cont) -> Decode {
-  // only Buffer
-  self.eoi()
-  k(self)
-}
-
-///|
-fn t_need(self : Decoder, need : Int) -> Unit {
-  self.t_len = 0
-  self.t_need = need
-}
-
-///|
-fn t_fill(k : Cont, decoder : Decoder) -> Decode {
-  fn blit(decoder : Decoder, l : Int) -> Unit {
-    decoder.i.blit(decoder.i_pos, decoder.t, decoder.t_len, l)
-    decoder.i_pos = decoder.i_pos + 1
-    decoder.t_len = decoder.t_len + 1
-  }
-
-  let rem = decoder.i_rem()
-  if rem < 0 { // eoi
-    k(decoder)
+fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
+  if decoder.t_len < decoder.t_need {
+    decoder.ret(
+      decode_utf_16le,
+      malformed_pair(false, hi, decoder.t, 0, decoder.t_len),
+    )
   } else {
-    let need = decoder.t_need - decoder.t_len
-    if rem < need {
-      blit(decoder, rem)
-      decoder.refill(@tuple.curry(t_fill)(k))
-    } else {
-      blit(decoder, need)
-      k(decoder)
-    }
+    decoder.ret(decode_utf_16le, r_utf_16_lo(hi, decoder.t, 1, 0))
   }
 }
 
 ///|
-pub fn decode_lossy(encoding~ : Encoding = UTF8, src : @buffer.T) -> LossyChars {
-  let decoder = decoder(encoding~, src)
-  decoder
+fn r_utf_16_lo(
+  hi : Int,
+  buf : @buffer.T,
+  offset0 : Int,
+  offset1 : Int
+) -> Decode {
+  let b0 = buf[offset0].to_int()
+  let b1 = buf[offset1].to_int()
+  let lo = (b0 << 8) | b1
+  if lo < 0xDC00 || lo > 0xDFFF {
+    malformed_pair(
+      offset0 < offset1,
+      hi,
+      buf,
+      @int.minimum(offset0, offset1),
+      2,
+    )
+  } else {
+    Uchar(Char::from_int(((hi & 0x3FF) << 10) | ((lo & 0x3FF) + 0x10000)))
+  }
 }
 
 ///|
-pub fn decode_strict(
-  encoding~ : Encoding = UTF8,
-  src : @buffer.T
-) -> StrictChars {
-  let decoder = decoder(encoding~, src)
-  decoder
+fn r_utf_16(buf : @buffer.T, offset0 : Int, offset1 : Int) -> UTF16Decode {
+  let b0 = buf[offset0].to_int()
+  let b1 = buf[offset1].to_int()
+  let u = (b0 << 8) | b1
+  if u < 0xD800 || u > 0xDFFF {
+    UTF16Uchar(Char::from_int(u))
+  } else if u > 0xDBFF {
+    UTF16Malformed(
+      buf.to_unchecked_string(offset=@int.minimum(offset0, offset1), length=2),
+    )
+  } else {
+    Hi(u)
+  }
 }

From 6ff5817366ac8b36079acf18350b6287c91c4277 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Nov 2024 17:30:31 +0800
Subject: [PATCH 17/25] feat: decode_ no labelled dependency

---
 encoding/decoding.mbt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/encoding/decoding.mbt b/encoding/decoding.mbt
index 6e0d1c74c..88461cc8a 100644
--- a/encoding/decoding.mbt
+++ b/encoding/decoding.mbt
@@ -30,24 +30,24 @@ let utf_8_len = [
 ]
 
 ///|
-pub fn decode_lossy(encoding~ : Encoding = UTF8, src : @buffer.T) -> LossyChars {
-  let decoder = decoder(encoding~, src)
+pub fn decode_lossy(encoding : Encoding, src : @buffer.T) -> LossyChars {
+  let decoder = decoder(encoding, src)
   decoder
 }
 
 ///|
 pub fn decode_strict(
-  encoding~ : Encoding = UTF8,
+  encoding : Encoding,
   src : @buffer.T
 ) -> StrictChars {
-  let decoder = decoder(encoding~, src)
+  let decoder = decoder(encoding, src)
   decoder
 }
 
 // Implements
 
 ///|
-fn decoder(encoding~ : Encoding, src : @buffer.T) -> Decoder {
+fn decoder(encoding : Encoding, src : @buffer.T) -> Decoder {
   let i = src
   let i_pos = 0
   let i_max = src.length() - 1

From 7688e38d136d863479ae8221868f4a90fa644530 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Nov 2024 23:28:50 +0800
Subject: [PATCH 18/25] feat: support decode UTF16BE

---
 encoding/decoding.mbt      | 60 +++++++++++++++++++++++++++-
 encoding/encoding_test.mbt | 81 +++++++++++++++++++++++++++++++-------
 2 files changed, 126 insertions(+), 15 deletions(-)

diff --git a/encoding/decoding.mbt b/encoding/decoding.mbt
index 88461cc8a..ae47594b3 100644
--- a/encoding/decoding.mbt
+++ b/encoding/decoding.mbt
@@ -56,7 +56,7 @@ fn decoder(encoding : Encoding, src : @buffer.T) -> Decoder {
   let t_need = 0
   let k = match encoding {
     UTF8 => decode_utf_8
-    UTF16 => decode_utf_16be // TODO: BE
+    UTF16 => decode_utf_16be
     UTF16BE => decode_utf_16be
     UTF16LE => decode_utf_16le
   }
@@ -358,3 +358,61 @@ fn r_utf_16(buf : @buffer.T, offset0 : Int, offset1 : Int) -> UTF16Decode {
     Hi(u)
   }
 }
+
+// UTF16BE
+
+fn decode_utf_16be(self : Decoder) -> Decode {
+  let rem = self.i_rem()
+  match rem.compare(0) {
+    // rem < 0
+    -1 => Decode::End
+    // rem = 0
+    0 => self.refill(decode_utf_16be)
+    // rem > 0
+    1 => {
+      if rem < 2 {
+        self.t_need(2)
+        t_fill(t_decode_utf_16be, self)
+      } else {
+        let j = self.i_pos
+        self.i_pos = self.i_pos + 2
+        self.decode_utf_16be_lo(r_utf_16(self.i, j, (j + 1)))
+      }
+    }
+    _ => abort("unreachable")
+  }
+}
+
+fn t_decode_utf_16be(self : Decoder) -> Decode {
+  if self.t_len < self.t_need {
+    self.ret(decode_utf_16be, malformed(self.t, 0, self.t_len))
+  } else {
+    self.decode_utf_16be_lo(r_utf_16(self.t, 0, 1))
+  }
+}
+
+fn decode_utf_16be_lo(self : Decoder, decode : UTF16Decode) -> Decode {
+  match decode {
+    UTF16Uchar(x)=> self.ret(decode_utf_16be, Uchar(x))
+    UTF16Malformed(x)=> self.ret(decode_utf_16be, Malformed(x))
+    Hi(hi) => {
+      let rem = self.i_rem()
+      if rem < 2 {
+        self.t_need(2)
+        t_fill(@tuple.curry(t_decode_utf_16be_lo)(hi), self)
+      } else {
+        let j = self.i_pos
+        self.i_pos = self.i_pos + 2
+        self.ret(decode_utf_16be, r_utf_16_lo(hi, self.i, j, (j + 1)))
+      }
+    }
+  }
+}
+
+fn t_decode_utf_16be_lo(hi : Int, self : Decoder) -> Decode {
+  if self.t_len < self.t_need {
+    self.ret(decode_utf_16be, malformed_pair(true, hi, self.t, 0, self.t_len))
+  } else {
+    self.ret(decode_utf_16be, r_utf_16_lo(hi, self.t, 0, 1))
+  }
+}
diff --git a/encoding/encoding_test.mbt b/encoding/encoding_test.mbt
index 2000d60c3..5eab775f1 100644
--- a/encoding/encoding_test.mbt
+++ b/encoding/encoding_test.mbt
@@ -21,12 +21,11 @@ test "lossy decoding String (UTF16LE encoded) to String" {
       #|b"\x60\x4f\x7d\x59\x3d\xd8\x40\xdc"
     ,
   )
-  let stream = @encoding.decode_lossy(encoding=UTF16LE, buf)
+  let stream = @encoding.decode_lossy(UTF16LE, buf)
   inspect!(String::from_iter(stream.iter()), content=src)
 }
 
 test "lossy decoding UTF16LE encoded data to String" {
-  let src = "你好👀"
   let buf = @buffer.T::new(size_hint=10)
   buf.write_bytes(b"\x60\x4f")
   buf.write_bytes(b"\x7d\x59")
@@ -37,8 +36,43 @@ test "lossy decoding UTF16LE encoded data to String" {
       #|b"\x60\x4f\x7d\x59\x3d\xd8\x40\xdc"
     ,
   )
-  let stream = @encoding.decode_lossy(encoding=UTF16LE, buf)
-  inspect!(String::from_iter(stream.iter()), content=src)
+  let stream = @encoding.decode_lossy(UTF16LE, buf)
+  inspect!(String::from_iter(stream.iter()), content="你好👀")
+}
+
+test "lossy decoding UTF16BE encoded data to String" {
+  let buf = @buffer.T::new(size_hint=10)
+  buf.write_bytes(b"\xd8\x3d\xdc\x08")
+  buf.write_bytes(b"\xd8\x3d\xdc\x31")
+  buf.write_bytes(b"\xd8\x3d\xdc\x07")
+  buf.write_bytes(b"\xd8\x3d\xdc\x30")
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xd8\x3d\xdc\x08\xd8\x3d\xdc\x31\xd8\x3d\xdc\x07\xd8\x3d\xdc\x30"
+    ,
+  )
+  let stream = @encoding.decode_lossy(UTF16BE, buf)
+  inspect!(String::from_iter(stream.iter()), content="🐈🐱🐇🐰")
+}
+
+test "lossy decoding UTF16 (alias of UTF16BE) encoded data to String" {
+  let buf = @buffer.T::new(size_hint=24)
+  buf.write_bytes(b"\x18\x65")
+  buf.write_bytes(b"\x18\x20")
+  buf.write_bytes(b"\x18\x73")
+  buf.write_bytes(b"\x18\x64")
+  buf.write_bytes(b"\x18\x73")
+  buf.write_bytes(b"\x18\x36")
+  buf.write_bytes(b"\x18\x20")
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x18\x65\x18\x20\x18\x73\x18\x64\x18\x73\x18\x36\x18\x20"
+    ,
+  )
+  let stream = @encoding.decode_lossy(UTF16, buf)
+  inspect!(String::from_iter(stream.iter()), content="ᡥᠠᡳᡤᡳᠶᠠ")
 }
 
 test "lossy decoding UTF8 encoded data to String" {
@@ -52,7 +86,7 @@ test "lossy decoding UTF8 encoded data to String" {
       #|b"\xe4\xbd\xa0\xe5\xa5\xbd\xf0\x9f\x91\x80"
     ,
   )
-  let stream = @encoding.decode_lossy(encoding=UTF8, buf)
+  let stream = @encoding.decode_lossy(UTF8, buf)
   inspect!(String::from_iter(stream.iter()), content="你好👀")
 }
 
@@ -68,7 +102,7 @@ test "lossy decoding String (UTF16LE encoded) to String" {
       #|b"\x3d\xd8\x4b\xdc\x8d\x51\xc1\x89"
     ,
   )
-  let stream = @encoding.decode_lossy(encoding=UTF16LE, buf)
+  let stream = @encoding.decode_lossy(UTF16LE, buf)
   inspect!(String::from_iter(stream.iter()), content=src)
 }
 
@@ -84,7 +118,7 @@ test "lossy decoding UTF8 encoded data to String" {
       #|b"\xf0\x9f\x91\x8b\xe5\x86\x8d\xe8\xa7\x81"
     ,
   )
-  let stream = @encoding.decode_lossy(buf) // defaults to UTF8
+  let stream = @encoding.decode_lossy(UTF8, buf)
   inspect!(String::from_iter(stream.iter()), content=src)
 }
 
@@ -100,7 +134,7 @@ test "lossy decoding UTF8 encoded data" {
       #|b"\xf0\x9f\x91\x8b\xe5\x86\x8d\xe8\xa7\x81"
     ,
   )
-  let stream = @encoding.decode_lossy(buf) // defaults to UTF8
+  let stream = @encoding.decode_lossy(UTF8, buf)
   inspect!(stream.iter().collect(), content="['👋', '再', '见']")
 }
 
@@ -116,7 +150,7 @@ test "lossy decoding UTF8 encoded data with UTF16LE" {
       #|b"\xd1\x8d\x65\x6b\x3c\xd8\xc3\xdf\x38\x6e\xf3\x6c\x3c\xd8\xca\xdf"
     ,
   )
-  let stream = @encoding.decode_lossy(buf) // defaults to UTF8
+  let stream = @encoding.decode_lossy(UTF8, buf)
   inspect!(
     stream.iter().collect(),
     content="['э', 'e', 'k', '<', '�', '�', 'n', '�', '�']",
@@ -135,14 +169,14 @@ test "lossy decoding UTF16LE encoded data with UTF8" {
       #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
     ,
   )
-  let stream = @encoding.decode_lossy(encoding=UTF16LE, buf)
+  let stream = @encoding.decode_lossy(UTF16LE, buf)
   inspect!(
     stream.iter().collect(),
     content="['럨', '', 'ꖭ', '鿰', '莏', '룦', '', '뎳', '鿰', '誏']",
   )
 }
 
-test "scrictly decoding UTF8 encoded data with UTF16LE" {
+test "strictly decoding UTF8 encoded data with UTF16LE" {
   let src = "跑步🏃游泳🏊"
   let buf = @buffer.T::new(size_hint=10)
   for s in src {
@@ -154,11 +188,11 @@ test "scrictly decoding UTF8 encoded data with UTF16LE" {
       #|b"\xd1\x8d\x65\x6b\x3c\xd8\xc3\xdf\x38\x6e\xf3\x6c\x3c\xd8\xca\xdf"
     ,
   )
-  let stream = @encoding.decode_strict(buf) // defaults to UTF8
+  let stream = @encoding.decode_strict(UTF8, buf)
   inspect!(stream.iter().try_collect?(), content="Err(쏘)")
 }
 
-test "scrictly decoding UTF16LE encoded data with UTF8" {
+test "strictly decoding UTF16LE encoded data with UTF8" {
   let src = "跑步🏃游泳🏊"
   let buf = @buffer.T::new(size_hint=10)
   for s in src {
@@ -170,9 +204,28 @@ test "scrictly decoding UTF16LE encoded data with UTF8" {
       #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
     ,
   )
-  let stream = @encoding.decode_strict(encoding=UTF16LE, buf)
+  let stream = @encoding.decode_strict(UTF16LE, buf)
   inspect!(
     stream.iter().try_collect?(),
     content="Ok(['럨', '', 'ꖭ', '鿰', '莏', '룦', '', '뎳', '鿰', '誏'])",
   )
 }
+
+test "strictly decoding UTF16BE encoded data with UTF8" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    buf.write_utf8_char(s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
+    ,
+  )
+  let stream = @encoding.decode_strict(UTF16BE, buf)
+  inspect!(
+    stream.iter().try_collect?(),
+    content="Ok(['', '釦', '궥', '', '较', '', '룦', '뎳', '', '辊'])",
+  )
+}

From a6e8c1810fcf31b9c1e881adb2dfcace75e6fb80 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Tue, 26 Nov 2024 08:31:17 +0800
Subject: [PATCH 19/25] refactor: moon fmt

---
 encoding/decoding.mbt | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/encoding/decoding.mbt b/encoding/decoding.mbt
index ae47594b3..b25be1cac 100644
--- a/encoding/decoding.mbt
+++ b/encoding/decoding.mbt
@@ -36,10 +36,7 @@ pub fn decode_lossy(encoding : Encoding, src : @buffer.T) -> LossyChars {
 }
 
 ///|
-pub fn decode_strict(
-  encoding : Encoding,
-  src : @buffer.T
-) -> StrictChars {
+pub fn decode_strict(encoding : Encoding, src : @buffer.T) -> StrictChars {
   let decoder = decoder(encoding, src)
   decoder
 }
@@ -361,6 +358,7 @@ fn r_utf_16(buf : @buffer.T, offset0 : Int, offset1 : Int) -> UTF16Decode {
 
 // UTF16BE
 
+///|
 fn decode_utf_16be(self : Decoder) -> Decode {
   let rem = self.i_rem()
   match rem.compare(0) {
@@ -369,20 +367,20 @@ fn decode_utf_16be(self : Decoder) -> Decode {
     // rem = 0
     0 => self.refill(decode_utf_16be)
     // rem > 0
-    1 => {
+    1 =>
       if rem < 2 {
         self.t_need(2)
         t_fill(t_decode_utf_16be, self)
       } else {
         let j = self.i_pos
         self.i_pos = self.i_pos + 2
-        self.decode_utf_16be_lo(r_utf_16(self.i, j, (j + 1)))
+        self.decode_utf_16be_lo(r_utf_16(self.i, j, j + 1))
       }
-    }
     _ => abort("unreachable")
   }
 }
 
+///|
 fn t_decode_utf_16be(self : Decoder) -> Decode {
   if self.t_len < self.t_need {
     self.ret(decode_utf_16be, malformed(self.t, 0, self.t_len))
@@ -391,10 +389,11 @@ fn t_decode_utf_16be(self : Decoder) -> Decode {
   }
 }
 
+///|
 fn decode_utf_16be_lo(self : Decoder, decode : UTF16Decode) -> Decode {
   match decode {
-    UTF16Uchar(x)=> self.ret(decode_utf_16be, Uchar(x))
-    UTF16Malformed(x)=> self.ret(decode_utf_16be, Malformed(x))
+    UTF16Uchar(x) => self.ret(decode_utf_16be, Uchar(x))
+    UTF16Malformed(x) => self.ret(decode_utf_16be, Malformed(x))
     Hi(hi) => {
       let rem = self.i_rem()
       if rem < 2 {
@@ -403,12 +402,13 @@ fn decode_utf_16be_lo(self : Decoder, decode : UTF16Decode) -> Decode {
       } else {
         let j = self.i_pos
         self.i_pos = self.i_pos + 2
-        self.ret(decode_utf_16be, r_utf_16_lo(hi, self.i, j, (j + 1)))
+        self.ret(decode_utf_16be, r_utf_16_lo(hi, self.i, j, j + 1))
       }
     }
   }
 }
 
+///|
 fn t_decode_utf_16be_lo(hi : Int, self : Decoder) -> Decode {
   if self.t_len < self.t_need {
     self.ret(decode_utf_16be, malformed_pair(true, hi, self.t, 0, self.t_len))

From 3b6e4264c43218fb230796653de11b88bbce7ec8 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Tue, 26 Nov 2024 08:32:55 +0800
Subject: [PATCH 20/25] refactor: rename

---
 encoding/{encoding_test.mbt => decoding_test.mbt} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename encoding/{encoding_test.mbt => decoding_test.mbt} (100%)

diff --git a/encoding/encoding_test.mbt b/encoding/decoding_test.mbt
similarity index 100%
rename from encoding/encoding_test.mbt
rename to encoding/decoding_test.mbt

From aed3e5b795fc3674af83bb04ed85d034e46eec2c Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Tue, 26 Nov 2024 08:33:34 +0800
Subject: [PATCH 21/25] feat: encode init

---
 encoding/encoding.mbt      | 18 ++++++++++++++++++
 encoding/encoding_test.mbt | 24 ++++++++++++++++++++++++
 encoding/moon.pkg.json     |  1 +
 3 files changed, 43 insertions(+)
 create mode 100644 encoding/encoding_test.mbt

diff --git a/encoding/encoding.mbt b/encoding/encoding.mbt
index f307e1338..0da1b87c9 100644
--- a/encoding/encoding.mbt
+++ b/encoding/encoding.mbt
@@ -11,3 +11,21 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
+///|
+pub fn encode(encoding : Encoding, src : String) -> Bytes! {
+  let buf = @buffer.T::from_bytes(src.to_bytes())
+  // MoonBit String encoded UTF16LE
+  let chars = decode_strict(UTF16LE, buf)
+  let new_buf = @buffer.T::new(size_hint=buf.length())
+  let write = match encoding {
+    UTF8 => @buffer.write_utf8_char
+    UTF16 => @buffer.write_char // TODO: no
+    UTF16BE => @buffer.write_char
+    UTF16LE => @buffer.write_char
+  }
+  for char in chars {
+    write(new_buf, char.unwrap_or_error!())
+  }
+  new_buf.to_bytes()
+}
diff --git a/encoding/encoding_test.mbt b/encoding/encoding_test.mbt
new file mode 100644
index 000000000..e763ae245
--- /dev/null
+++ b/encoding/encoding_test.mbt
@@ -0,0 +1,24 @@
+// Copyright 2024 International Digital Economy Academy
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+test "pp" {
+  let src = "你好👀"
+  let bytes = @encoding.encode!(UTF8, src)
+  inspect!(
+    bytes,
+    content=
+      #|b"\xe4\xbd\xa0\xe5\xa5\xbd\xf0\x9f\x91\x80"
+    ,
+  )
+}
diff --git a/encoding/moon.pkg.json b/encoding/moon.pkg.json
index eb6312aee..b2e471183 100644
--- a/encoding/moon.pkg.json
+++ b/encoding/moon.pkg.json
@@ -8,6 +8,7 @@
     "moonbitlang/core/tuple",
     "moonbitlang/core/array",
     "moonbitlang/core/char",
+    "moonbitlang/core/result",
     "moonbitlang/core/int"
   ],
   "test-import": [

From 816077edc796305a48f7e4d1177f0f0fcc07daf4 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Tue, 26 Nov 2024 08:34:02 +0800
Subject: [PATCH 22/25] refactor: sort import

---
 encoding/moon.pkg.json | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/encoding/moon.pkg.json b/encoding/moon.pkg.json
index b2e471183..58dd8726e 100644
--- a/encoding/moon.pkg.json
+++ b/encoding/moon.pkg.json
@@ -1,15 +1,15 @@
 {
   "import": [
-    "moonbitlang/core/builtin",
+    "moonbitlang/core/array",
     "moonbitlang/core/buffer",
-    "moonbitlang/core/coverage",
-    "moonbitlang/core/string",
+    "moonbitlang/core/builtin",
     "moonbitlang/core/bytes",
-    "moonbitlang/core/tuple",
-    "moonbitlang/core/array",
     "moonbitlang/core/char",
+    "moonbitlang/core/coverage",
+    "moonbitlang/core/int",
     "moonbitlang/core/result",
-    "moonbitlang/core/int"
+    "moonbitlang/core/string",
+    "moonbitlang/core/tuple"
   ],
   "test-import": [
   ]

From 8ae2bb9915691461ed06a1faaab81814e372f8f4 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Tue, 26 Nov 2024 10:12:01 +0800
Subject: [PATCH 23/25] feat: set_utf16{le, be}_char

---
 builtin/builtin.mbti |  6 +++--
 builtin/bytes.mbt    | 62 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/builtin/builtin.mbti b/builtin/builtin.mbti
index 1df5121a5..740afb865 100644
--- a/builtin/builtin.mbti
+++ b/builtin/builtin.mbti
@@ -668,7 +668,9 @@ impl FixedArray {
   op_get[T](Self[T], Int) -> T
   op_set[T](Self[T], Int, T) -> Unit
   set[T](Self[T], Int, T) -> Unit
-  set_utf16_char(Self[Byte], Int, Char) -> Int
+  set_utf16_char(Self[Byte], Int, Char) -> Int //deprecated
+  set_utf16be_char(Self[Byte], Int, Char) -> Int
+  set_utf16le_char(Self[Byte], Int, Char) -> Int
   set_utf8_char(Self[Byte], Int, Char) -> Int
   to_json[X : ToJson](Self[X]) -> Json
   to_string[X : Show](Self[X]) -> String
@@ -687,7 +689,7 @@ impl Bytes {
   op_equal(Bytes, Bytes) -> Bool
   op_get(Bytes, Int) -> Byte
   op_set(Bytes, Int, Byte) -> Unit
-  set_utf16_char(Bytes, Int, Char) -> Int
+  set_utf16_char(Bytes, Int, Char) -> Int //deprecated
   set_utf8_char(Bytes, Int, Char) -> Int //deprecated
   sub_string(Bytes, Int, Int) -> String //deprecated
   to_string(Bytes) -> String //deprecated
diff --git a/builtin/bytes.mbt b/builtin/bytes.mbt
index 2708d0c60..97501aa19 100644
--- a/builtin/bytes.mbt
+++ b/builtin/bytes.mbt
@@ -140,7 +140,7 @@ pub fn copy(self : Bytes) -> Bytes {
 }
 
 ///|
-/// Fill utf8 encoded char `value` into byte sequence `self`, starting at `offset`.
+/// Fill UTF8 encoded char `value` into byte sequence `self`, starting at `offset`.
 /// It return the length of bytes has been written.
 /// @alert deprecated "The type Bytes is about to be changed to be immutable. Use `FixedArray[Byte]` or `Buffer` instead."
 pub fn set_utf8_char(self : Bytes, offset : Int, value : Char) -> Int {
@@ -199,9 +199,10 @@ pub fn set_utf8_char(
 }
 
 ///|
-/// Fill utf16 encoded char `value` into byte sequence `self`, starting at `offset`.
+/// Fill UTF16 encoded char `value` into byte sequence `self`, starting at `offset`.
 /// It return the length of bytes has been written.
 /// @alert unsafe "Panic if the [value] is out of range"
+/// @alert deprecated "The type Bytes is about to be changed to be immutable. Use `FixedArray[Byte]` or `Buffer` instead."
 pub fn set_utf16_char(self : Bytes, offset : Int, value : Char) -> Int {
   let code = value.to_uint()
   if code < 0x10000 {
@@ -226,6 +227,7 @@ pub fn set_utf16_char(self : Bytes, offset : Int, value : Char) -> Int {
 /// Fill utf16 encoded char `value` into byte sequence `self`, starting at `offset`.
 /// It return the length of bytes has been written.
 /// @alert unsafe "Panic if the [value] is out of range"
+/// @alert deprecated "Use `set_utf16le_char` instead"
 pub fn set_utf16_char(
   self : FixedArray[Byte],
   offset : Int,
@@ -250,6 +252,62 @@ pub fn set_utf16_char(
   }
 }
 
+///|
+/// Fill UTF16LE encoded char `value` into byte sequence `self`, starting at `offset`.
+/// It return the length of bytes has been written.
+/// @alert unsafe "Panic if the [value] is out of range"
+pub fn set_utf16le_char(
+  self : FixedArray[Byte],
+  offset : Int,
+  value : Char
+) -> Int {
+  let code = value.to_uint()
+  if code < 0x10000 {
+    self[offset] = (code & 0xFF).to_byte()
+    self[offset + 1] = (code >> 8).to_byte()
+    2
+  } else if code < 0x110000 {
+    let hi = code - 0x10000
+    let lo = (hi >> 10) | 0xD800
+    let hi = (hi & 0x3FF) | 0xDC00
+    self[offset] = (lo & 0xFF).to_byte()
+    self[offset + 1] = (lo >> 8).to_byte()
+    self[offset + 2] = (hi & 0xFF).to_byte()
+    self[offset + 3] = (hi >> 8).to_byte()
+    4
+  } else {
+    abort("Char out of range")
+  }
+}
+
+///|
+/// Fill UTF16BE encoded char `value` into byte sequence `self`, starting at `offset`.
+/// It return the length of bytes has been written.
+/// @alert unsafe "Panic if the [value] is out of range"
+pub fn set_utf16be_char(
+  self : FixedArray[Byte],
+  offset : Int,
+  value : Char
+) -> Int {
+  let code = value.to_uint()
+  if code < 0x10000 {
+    self[offset] = (code >> 0xFF).to_byte()
+    self[offset + 1] = (code & 0xFF).to_byte()
+    2
+  } else if code < 0x110000 {
+    let hi = code - 0x10000
+    let lo = (hi >> 10) | 0xD800
+    let hi = (hi & 0x3FF) | 0xDC00
+    self[offset] = (lo >> 8).to_byte()
+    self[offset + 1] = (lo & 0xFF).to_byte()
+    self[offset + 2] = (hi >> 8).to_byte()
+    self[offset + 3] = (hi & 0xFF).to_byte()
+    4
+  } else {
+    abort("Char out of range")
+  }
+}
+
 ///|
 pub fn op_equal(self : Bytes, other : Bytes) -> Bool {
   if self.length() != other.length() {

From bc6d71454fab98857bdca1170aa9accf9fae3beb Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Tue, 26 Nov 2024 10:12:27 +0800
Subject: [PATCH 24/25] feat: write_utf16be_char

---
 buffer/buffer.mbt  | 13 +++++++++++--
 buffer/buffer.mbti |  1 +
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/buffer/buffer.mbt b/buffer/buffer.mbt
index a8404cf81..9ec49f187 100644
--- a/buffer/buffer.mbt
+++ b/buffer/buffer.mbt
@@ -168,14 +168,23 @@ pub fn write_sub_string(
 }
 
 ///|
-/// Write a char into buffer.
+/// Write a char into buffer as UTF16LE.
 pub fn write_char(self : T, value : Char) -> Unit {
   self.grow_if_necessary(self.len + 4)
-  let inc = self.data.set_utf16_char(self.len, value)
+  let inc = self.data.set_utf16le_char(self.len, value)
   self.len += inc
 }
 
 ///|
+/// Write a char into buffer as UTF16BE.
+pub fn write_utf16be_char(self : T, value : Char) -> Unit {
+  self.grow_if_necessary(self.len + 4)
+  let inc = self.data.set_utf16be_char(self.len, value)
+  self.len += inc
+}
+
+///|
+/// Write a char into buffer as UTF8.
 pub fn write_utf8_char(self : T, value : Char) -> Unit {
   self.grow_if_necessary(self.len + 4)
   let inc = self.data.set_utf8_char(self.len, value)
diff --git a/buffer/buffer.mbti b/buffer/buffer.mbti
index 28a79725b..d2bf8c98a 100644
--- a/buffer/buffer.mbti
+++ b/buffer/buffer.mbti
@@ -25,6 +25,7 @@ impl T {
   write_string(Self, String) -> Unit
   write_sub_string(Self, String, Int, Int) -> Unit //deprecated
   write_substring(Self, String, Int, Int) -> Unit
+  write_utf16be_char(Self, Char) -> Unit
   write_utf8_char(Self, Char) -> Unit
 }
 impl Show for T

From e8e8659e291f1e27aeac9503e37502345503f750 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Tue, 26 Nov 2024 10:12:35 +0800
Subject: [PATCH 25/25] feat: encoding

---
 encoding/decoding.mbt      |  4 ++--
 encoding/decoding_test.mbt | 20 ++++++++++----------
 encoding/encoding.mbt      | 16 ++++++++++------
 encoding/encoding.mbti     |  8 +++++---
 encoding/encoding_test.mbt | 37 +++++++++++++++++++++++++++++++++++--
 encoding/types.mbt         |  4 ++--
 6 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/encoding/decoding.mbt b/encoding/decoding.mbt
index b25be1cac..19f577406 100644
--- a/encoding/decoding.mbt
+++ b/encoding/decoding.mbt
@@ -53,9 +53,9 @@ fn decoder(encoding : Encoding, src : @buffer.T) -> Decoder {
   let t_need = 0
   let k = match encoding {
     UTF8 => decode_utf_8
-    UTF16 => decode_utf_16be
-    UTF16BE => decode_utf_16be
+    UTF16 => decode_utf_16le
     UTF16LE => decode_utf_16le
+    UTF16BE => decode_utf_16be
   }
   { i, i_pos, i_max, t, t_len, t_need, k }
 }
diff --git a/encoding/decoding_test.mbt b/encoding/decoding_test.mbt
index 5eab775f1..324cfb985 100644
--- a/encoding/decoding_test.mbt
+++ b/encoding/decoding_test.mbt
@@ -56,19 +56,19 @@ test "lossy decoding UTF16BE encoded data to String" {
   inspect!(String::from_iter(stream.iter()), content="🐈🐱🐇🐰")
 }
 
-test "lossy decoding UTF16 (alias of UTF16BE) encoded data to String" {
-  let buf = @buffer.T::new(size_hint=24)
-  buf.write_bytes(b"\x18\x65")
-  buf.write_bytes(b"\x18\x20")
-  buf.write_bytes(b"\x18\x73")
-  buf.write_bytes(b"\x18\x64")
-  buf.write_bytes(b"\x18\x73")
-  buf.write_bytes(b"\x18\x36")
-  buf.write_bytes(b"\x18\x20")
+test "lossy decoding UTF16 (alias of UTF16LE) encoded data to String" {
+  let buf = @buffer.T::new(size_hint=20)
+  buf.write_bytes(b"\x65\x18")
+  buf.write_bytes(b"\x20\x18")
+  buf.write_bytes(b"\x73\x18")
+  buf.write_bytes(b"\x64\x18")
+  buf.write_bytes(b"\x73\x18")
+  buf.write_bytes(b"\x36\x18")
+  buf.write_bytes(b"\x20\x18")
   inspect!(
     buf.to_bytes(),
     content=
-      #|b"\x18\x65\x18\x20\x18\x73\x18\x64\x18\x73\x18\x36\x18\x20"
+      #|b"\x65\x18\x20\x18\x73\x18\x64\x18\x73\x18\x36\x18\x20\x18"
     ,
   )
   let stream = @encoding.decode_lossy(UTF16, buf)
diff --git a/encoding/encoding.mbt b/encoding/encoding.mbt
index 0da1b87c9..a6afe84e4 100644
--- a/encoding/encoding.mbt
+++ b/encoding/encoding.mbt
@@ -13,19 +13,23 @@
 // limitations under the License.
 
 ///|
-pub fn encode(encoding : Encoding, src : String) -> Bytes! {
+pub fn encode(encoding : Encoding, src : String) -> Bytes {
+  // NOTE: special case: MoonBit String are already valid UTF16(LE) bytes
+  match encoding {
+    UTF16 | UTF16LE => return src.to_bytes()
+    _ => ()
+  }
   let buf = @buffer.T::from_bytes(src.to_bytes())
-  // MoonBit String encoded UTF16LE
   let chars = decode_strict(UTF16LE, buf)
   let new_buf = @buffer.T::new(size_hint=buf.length())
   let write = match encoding {
     UTF8 => @buffer.write_utf8_char
-    UTF16 => @buffer.write_char // TODO: no
-    UTF16BE => @buffer.write_char
-    UTF16LE => @buffer.write_char
+    UTF16BE => @buffer.write_utf16be_char
+    _ => abort("unreachable")
   }
   for char in chars {
-    write(new_buf, char.unwrap_or_error!())
+    // SAFETY: Assume String are always valid UTF16LE
+    write(new_buf, char.unwrap())
   }
   new_buf.to_bytes()
 }
diff --git a/encoding/encoding.mbti b/encoding/encoding.mbti
index efbe7c65e..a28205bef 100644
--- a/encoding/encoding.mbti
+++ b/encoding/encoding.mbti
@@ -3,9 +3,11 @@ package moonbitlang/core/encoding
 alias @moonbitlang/core/buffer as @buffer
 
 // Values
-fn decode_lossy(encoding~ : Encoding = .., @buffer.T) -> LossyChars
+fn decode_lossy(Encoding, @buffer.T) -> LossyChars
 
-fn decode_strict(encoding~ : Encoding = .., @buffer.T) -> StrictChars
+fn decode_strict(Encoding, @buffer.T) -> StrictChars
+
+fn encode(Encoding, String) -> Bytes
 
 // Types and methods
 type DecodeError
@@ -14,8 +16,8 @@ impl Show for DecodeError
 pub(all) enum Encoding {
   UTF8
   UTF16
-  UTF16BE
   UTF16LE
+  UTF16BE
 }
 
 type LossyChars
diff --git a/encoding/encoding_test.mbt b/encoding/encoding_test.mbt
index e763ae245..2ff086c9b 100644
--- a/encoding/encoding_test.mbt
+++ b/encoding/encoding_test.mbt
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-test "pp" {
+test "encoding String to UTF8" {
   let src = "你好👀"
-  let bytes = @encoding.encode!(UTF8, src)
+  let bytes = @encoding.encode(UTF8, src)
   inspect!(
     bytes,
     content=
@@ -22,3 +22,36 @@ test "pp" {
     ,
   )
 }
+
+test "encoding String to UTF16 (alias of UTF16LE)" {
+  let src = "LISP programmers know the value of everything"
+  let bytes = @encoding.encode(UTF16, src)
+  inspect!(
+    bytes,
+    content=
+      #|b"\x4c\x00\x49\x00\x53\x00\x50\x00\x20\x00\x70\x00\x72\x00\x6f\x00\x67\x00\x72\x00\x61\x00\x6d\x00\x6d\x00\x65\x00\x72\x00\x73\x00\x20\x00\x6b\x00\x6e\x00\x6f\x00\x77\x00\x20\x00\x74\x00\x68\x00\x65\x00\x20\x00\x76\x00\x61\x00\x6c\x00\x75\x00\x65\x00\x20\x00\x6f\x00\x66\x00\x20\x00\x65\x00\x76\x00\x65\x00\x72\x00\x79\x00\x74\x00\x68\x00\x69\x00\x6e\x00\x67\x00"
+    ,
+  )
+}
+
+test "encoding String to UTF16LE" {
+  let src = "and the cost of nothing"
+  let bytes = @encoding.encode(UTF16LE, src)
+  inspect!(
+    bytes,
+    content=
+      #|b"\x61\x00\x6e\x00\x64\x00\x20\x00\x74\x00\x68\x00\x65\x00\x20\x00\x63\x00\x6f\x00\x73\x00\x74\x00\x20\x00\x6f\x00\x66\x00\x20\x00\x6e\x00\x6f\x00\x74\x00\x68\x00\x69\x00\x6e\x00\x67\x00"
+    ,
+  )
+}
+
+test "encoding String to UTF16BE" {
+  let src = "λf.(λx.f(x x))(λx.f(x x))"
+  let bytes = @encoding.encode(UTF16BE, src)
+  inspect!(
+    bytes,
+    content=
+      #|b"\x00\xbb\x00\x66\x00\x2e\x00\x28\x00\xbb\x00\x78\x00\x2e\x00\x66\x00\x28\x00\x78\x00\x20\x00\x78\x00\x29\x00\x29\x00\x28\x00\xbb\x00\x78\x00\x2e\x00\x66\x00\x28\x00\x78\x00\x20\x00\x78\x00\x29\x00\x29"
+    ,
+  )
+}
diff --git a/encoding/types.mbt b/encoding/types.mbt
index de8fa9a30..84164f2a2 100644
--- a/encoding/types.mbt
+++ b/encoding/types.mbt
@@ -18,9 +18,9 @@ typealias Cont = (Decoder) -> Decode
 ///|
 pub(all) enum Encoding {
   UTF8
-  UTF16
-  UTF16BE
+  UTF16 // alias of UTF16LE
   UTF16LE
+  UTF16BE
 }
 
 // Decoder