From 6fa8655e8b85c0f8358050c504d411180dc31855 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Wed, 27 Nov 2024 22:43:15 +0800
Subject: [PATCH] feat: new package encoding

---
 encoding/decoding.mbt      | 458 +++++++++++++++++++++++++++++++++++++
 encoding/decoding_test.mbt | 336 +++++++++++++++++++++++++++
 encoding/encoding.mbt      |  87 +++++++
 encoding/encoding.mbti     |  44 ++++
 encoding/encoding_test.mbt |  57 +++++
 encoding/moon.pkg.json     |   4 +
 encoding/types.mbt         | 133 +++++++++++
 7 files changed, 1119 insertions(+)
 create mode 100644 encoding/decoding.mbt
 create mode 100644 encoding/decoding_test.mbt
 create mode 100644 encoding/encoding.mbt
 create mode 100644 encoding/encoding.mbti
 create mode 100644 encoding/encoding_test.mbt
 create mode 100644 encoding/moon.pkg.json
 create mode 100644 encoding/types.mbt

diff --git a/encoding/decoding.mbt b/encoding/decoding.mbt
new file mode 100644
index 0000000..49dcb70
--- /dev/null
+++ b/encoding/decoding.mbt
@@ -0,0 +1,458 @@
+// Copyright 2024 International Digital Economy Academy
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///|
+const U_REP = '\u{FFFD}'
+
+///|
+let utf_8_len = [
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
+  4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+]
+
+///|
+/// Decodes bytes from a specified encoding into lossily decoded characters.
+///
+/// # Parameters
+/// - `encoding`: The character encoding of the input `bytes`.
+/// - `src`: A `bytes` representing the encoded string in the specified format.
+///
+/// # Returns
+///
+/// A `LossyChars` iterator representing the decoded characters, with invalid byte sequences replaced by a replacement character.
+///
+/// # Behavior
+///
+/// - Any invalid sequences in the `bytes` are replaced with a replacement character (`\u{FFFD}`), preventing decoding errors.
+///
+/// # Examples
+///
+/// ```moonbit
+/// let buf = @buffer.T::new(size_hint=10)
+/// buf.write_bytes(b"\xe4\xbd\xa0") // "你" in UTF8
+/// buf.write_bytes(b"\xe5\xa5\xbd") // "好" in UTF8
+/// buf.write_bytes(b"\xf0\x9f\x91\x80") // "👀" in UTF8
+/// let chars = @encoding.decode_lossy(UTF8, buf.to_bytes())
+/// let arr = chars.iter().collect() // Array of unicode point code: `['你', '好', '👀']`
+/// let str = String::from_array(arr) // MoonBit String, representing as UTF16LE: `"你好👀"`
+/// ```
+pub fn decode_lossy(encoding : Encoding, src : Bytes) -> LossyChars {
+  let decoder = decoder(encoding, src)
+  decoder
+}
+
+///|
+/// Decodes bytes from a specified encoding into strictly decoded characters.
+///
+/// # Parameters
+///
+/// - `encoding`: The character encoding of the input `bytes`.
+/// - `src`: A `bytes` representing the encoded string in the specified format.
+///
+/// # Returns
+///
+/// A `StrictChars` iterator representing the decoded characters.
+///
+/// # Behavior
+///
+/// - Assumes all sequences in the `bytes` are valid and will raise errors if invalid sequences are encountered.
+///
+/// # Examples
+///
+/// ```moonbit
+/// let buf = @buffer.T::new(size_hint=10)
+/// buf.write_bytes(b"\xe4\xbd\xa0") // "你" in UTF8
+/// buf.write_bytes(b"\xe5\xa5\xbd") // "好" in UTF8
+/// buf.write_bytes(b"\xf0\x9f\x91\x80") // "👀" in UTF8
+/// let chars = @encoding.decode_strict(UTF8, buf.to_bytes())
+/// let arr = chars.iter().try_collect!() // Array of unicode point code: `['你', '好', '👀']`
+/// let str = String::from_array(arr) // MoonBit String, representing as UTF16LE: `"你好👀"`
+/// ```
+pub fn decode_strict(encoding : Encoding, src : Bytes) -> StrictChars {
+  let decoder = decoder(encoding, src)
+  decoder
+}
+
+// Implementations
+
+///|
+fn decoder(encoding : Encoding, src : Bytes) -> Decoder {
+  let i = src
+  let i_pos = 0
+  let i_max = src.length() - 1
+  let t = b"\x00\x00\x00\x00"
+  let t_len = 0
+  let t_need = 0
+  let k = match encoding {
+    UTF8 => decode_utf_8
+    UTF16 => decode_utf_16le
+    UTF16LE => decode_utf_16le
+    UTF16BE => decode_utf_16be
+  }
+  { i, i_pos, i_max, t, t_len, t_need, k }
+}
+
+///|
+fn decode(self : Decoder) -> Decode {
+  (self.k)(self)
+}
+
+///|
+fn ret(self : Decoder, k : Cont, v : Decode) -> Decode {
+  self.k = k
+  v
+}
+
+///|
+fn i_rem(self : Decoder) -> Int {
+  self.i_max - self.i_pos + 1
+}
+
+///|
+fn eoi(self : Decoder) -> Unit {
+  self.i = @bytes.default()
+  self.i_pos = 0
+  self.i_max = @int.min_value
+}
+
+///|
+fn refill(self : Decoder, k : Cont) -> Decode {
+  // only Bytes
+  self.eoi()
+  k(self)
+}
+
+///|
+fn t_need(self : Decoder, need : Int) -> Unit {
+  self.t_len = 0
+  self.t_need = need
+}
+
+///|
+fn t_fill(k : Cont, decoder : Decoder) -> Decode {
+  fn blit(decoder : Decoder, l : Int) -> Unit {
+    decoder.i.blit(decoder.i_pos, decoder.t, decoder.t_len, l)
+    decoder.i_pos = decoder.i_pos + 1
+    decoder.t_len = decoder.t_len + 1
+  }
+
+  let rem = decoder.i_rem()
+  if rem < 0 { // eoi
+    k(decoder)
+  } else {
+    let need = decoder.t_need - decoder.t_len
+    if rem < need {
+      blit(decoder, rem)
+      decoder.refill(@tuple.curry(t_fill)(k))
+    } else {
+      blit(decoder, need)
+      k(decoder)
+    }
+  }
+}
+
+// UTF8
+
+///|
+fn decode_utf_8(self : Decoder) -> Decode {
+  let rem = self.i_rem()
+  match rem.compare(0) {
+    // rem < 0
+    -1 => Decode::End
+    // rem = 0
+    0 => self.refill(decode_utf_8)
+    // rem > 0
+    1 => {
+      let idx = self.i[self.i_pos].to_int()
+      let need = utf_8_len[idx]
+      if rem < need {
+        self.t_need(need)
+        t_fill(t_decode_utf_8, self)
+      } else {
+        let j = self.i_pos
+        if need == 0 {
+          self.i_pos = self.i_pos + 1
+          self.ret(decode_utf_8, malformed(self.i, j, 1))
+        } else {
+          self.i_pos = self.i_pos + need
+          self.ret(decode_utf_8, r_utf_8(self.i, j, need))
+        }
+      }
+    }
+    _ => abort("unreachable")
+  }
+}
+
+///|
+fn t_decode_utf_8(self : Decoder) -> Decode {
+  if self.t_len < self.t_need {
+    malformed(self.t, 0, self.t_len)
+  } else {
+    r_utf_8(self.t, 0, self.t_len)
+  }
+}
+
+///|
+fn r_utf_8(buf : Bytes, offset : Int, length : Int) -> Decode {
+  fn uchar(c : Int) {
+    Uchar(Char::from_int(c))
+  }
+
+  match length {
+    1 => uchar(buf[offset].to_int())
+    2 => {
+      let b0 = buf[offset].to_int()
+      let b1 = buf[offset + 1].to_int()
+      if (b1 >> 6) != 0b10 {
+        malformed(buf, offset, length)
+      } else {
+        uchar(((b0 & 0x1F) << 6) | (b1 & 0x3F))
+      }
+    }
+    3 => {
+      let b0 = buf[offset].to_int()
+      let b1 = buf[offset + 1].to_int()
+      let b2 = buf[offset + 2].to_int()
+      let c = ((b0 & 0x0F) << 12) | (((b1 & 0x3F) << 6) | (b2 & 0x3F))
+      if (b2 >> 6) != 0b10 {
+        malformed(buf, offset, length)
+      } else {
+        match b0 {
+          0xE0 =>
+            if b1 < 0xA0 || 0xBF < b1 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+          0xED =>
+            if b1 < 0x80 || 0x9F < b1 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+          _ =>
+            if (b1 >> 6) != 0b10 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+        }
+      }
+    }
+    4 => {
+      let b0 = buf[offset].to_int()
+      let b1 = buf[offset + 1].to_int()
+      let b2 = buf[offset + 2].to_int()
+      let b3 = buf[offset + 3].to_int()
+      let c = ((b0 & 0x07) << 18) |
+        ((b1 & 0x3F) << 12) |
+        ((b2 & 0x3F) << 6) |
+        (b3 & 0x3F)
+      if (b3 >> 6) != 0b10 || (b2 >> 6) != 0b10 {
+        malformed(buf, offset, length)
+      } else {
+        match b0 {
+          0xF0 =>
+            if b1 < 0x90 || 0xBF < b1 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+          0xF4 =>
+            if b1 < 0x80 || 0x8F < b1 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+          _ =>
+            if (b1 >> 6) != 0b10 {
+              malformed(buf, offset, length)
+            } else {
+              uchar(c)
+            }
+        }
+      }
+    }
+    _ => panic()
+  }
+}
+
+// UTF16LE
+
+///|
+priv enum UTF16Decode {
+  Hi(Int)
+  UTF16Malformed(String)
+  UTF16Uchar(Char)
+}
+
+///|
+fn decode_utf_16le(self : Decoder) -> Decode {
+  let rem = self.i_rem()
+  match rem.compare(0) {
+    // rem < 0
+    -1 => Decode::End
+    // rem = 0
+    0 => self.refill(decode_utf_16le)
+    // rem > 0
+    1 =>
+      if rem < 2 {
+        self.t_need(2)
+        t_fill(t_decode_utf_16le, self)
+      } else {
+        let j = self.i_pos
+        self.i_pos = self.i_pos + 2
+        // mark
+        self.decode_utf_16le_lo(r_utf_16(self.i, j + 1, j))
+      }
+    _ => abort("unreachable")
+  }
+}
+
+///|
+fn t_decode_utf_16le(self : Decoder) -> Decode {
+  if self.t_len < self.t_need {
+    self.ret(decode_utf_16le, malformed(self.t, 0, self.t_len))
+  } else {
+    self.decode_utf_16le_lo(r_utf_16(self.t, 1, 0))
+  }
+}
+
+///|
+fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
+  match v {
+    UTF16Uchar(u) => Uchar(u)
+    UTF16Malformed(s) => Malformed(s)
+    Hi(hi) => {
+      let rem = self.i_rem()
+      if rem < 2 {
+        self.t_need(2)
+        t_fill(@tuple.curry(t_decode_utf_16le_lo)(hi), self)
+      } else {
+        let j = self.i_pos
+        self.i_pos = self.i_pos + 2
+        r_utf_16_lo(hi, self.i, j + 1, j)
+      }
+    }
+  }
+}
+
+///|
+fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
+  if decoder.t_len < decoder.t_need {
+    decoder.ret(
+      decode_utf_16le,
+      malformed_pair(false, hi, decoder.t, 0, decoder.t_len),
+    )
+  } else {
+    decoder.ret(decode_utf_16le, r_utf_16_lo(hi, decoder.t, 1, 0))
+  }
+}
+
+///|
+fn r_utf_16_lo(hi : Int, buf : Bytes, offset0 : Int, offset1 : Int) -> Decode {
+  let b0 = buf[offset0].to_int()
+  let b1 = buf[offset1].to_int()
+  let lo = (b0 << 8) | b1
+  if lo < 0xDC00 || lo > 0xDFFF {
+    malformed_pair(offset0 < offset1, hi, buf, @int.min(offset0, offset1), 2)
+  } else {
+    Uchar(Char::from_int(((hi & 0x3FF) << 10) | ((lo & 0x3FF) + 0x10000)))
+  }
+}
+
+///|
+fn r_utf_16(buf : Bytes, offset0 : Int, offset1 : Int) -> UTF16Decode {
+  let b0 = buf[offset0].to_int()
+  let b1 = buf[offset1].to_int()
+  let u = (b0 << 8) | b1
+  if u < 0xD800 || u > 0xDFFF {
+    UTF16Uchar(Char::from_int(u))
+  } else if u > 0xDBFF {
+    UTF16Malformed(
+      buf.to_unchecked_string(offset=@int.min(offset0, offset1), length=2),
+    )
+  } else {
+    Hi(u)
+  }
+}
+
+// UTF16BE
+
+///|
+fn decode_utf_16be(self : Decoder) -> Decode {
+  let rem = self.i_rem()
+  match rem.compare(0) {
+    // rem < 0
+    -1 => Decode::End
+    // rem = 0
+    0 => self.refill(decode_utf_16be)
+    // rem > 0
+    1 =>
+      if rem < 2 {
+        self.t_need(2)
+        t_fill(t_decode_utf_16be, self)
+      } else {
+        let j = self.i_pos
+        self.i_pos = self.i_pos + 2
+        self.decode_utf_16be_lo(r_utf_16(self.i, j, j + 1))
+      }
+    _ => abort("unreachable")
+  }
+}
+
+///|
+fn t_decode_utf_16be(self : Decoder) -> Decode {
+  if self.t_len < self.t_need {
+    self.ret(decode_utf_16be, malformed(self.t, 0, self.t_len))
+  } else {
+    self.decode_utf_16be_lo(r_utf_16(self.t, 0, 1))
+  }
+}
+
+///|
+fn decode_utf_16be_lo(self : Decoder, decode : UTF16Decode) -> Decode {
+  match decode {
+    UTF16Uchar(x) => self.ret(decode_utf_16be, Uchar(x))
+    UTF16Malformed(x) => self.ret(decode_utf_16be, Malformed(x))
+    Hi(hi) => {
+      let rem = self.i_rem()
+      if rem < 2 {
+        self.t_need(2)
+        t_fill(@tuple.curry(t_decode_utf_16be_lo)(hi), self)
+      } else {
+        let j = self.i_pos
+        self.i_pos = self.i_pos + 2
+        self.ret(decode_utf_16be, r_utf_16_lo(hi, self.i, j, j + 1))
+      }
+    }
+  }
+}
+
+///|
+fn t_decode_utf_16be_lo(hi : Int, self : Decoder) -> Decode {
+  if self.t_len < self.t_need {
+    self.ret(decode_utf_16be, malformed_pair(true, hi, self.t, 0, self.t_len))
+  } else {
+    self.ret(decode_utf_16be, r_utf_16_lo(hi, self.t, 0, 1))
+  }
+}
diff --git a/encoding/decoding_test.mbt b/encoding/decoding_test.mbt
new file mode 100644
index 0000000..4491a42
--- /dev/null
+++ b/encoding/decoding_test.mbt
@@ -0,0 +1,336 @@
+// Copyright 2024 International Digital Economy Academy
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///|
+fn string_from_lossy_chars(chars : @encoding.LossyChars) -> String {
+  let arr = chars.iter().collect()
+  String::from_array(arr)
+}
+
+// lossy
+
+test "lossy decoding String (UTF16LE encoded) to String (buffer.write_bytes)" {
+  let src = "你好👀"
+  let buf = @buffer.T::new(size_hint=src.to_bytes().length())
+  buf.write_bytes(src.to_bytes())
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x60\x4f\x7d\x59\x3d\xd8\x40\xdc"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF16LE, buf.to_bytes())
+  inspect!(string_from_lossy_chars(chars), content=src)
+}
+
+test "lossy decoding String (UTF16LE encoded) to String (buffer.write_char)" {
+  let src = "👋再见"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    buf.write_char(s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x3d\xd8\x4b\xdc\x8d\x51\xc1\x89"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF16LE, buf.to_bytes())
+  inspect!(string_from_lossy_chars(chars), content=src)
+}
+
+test "lossy decoding UTF16LE encoded data to String" {
+  let buf = @buffer.T::new(size_hint=10)
+  buf.write_bytes(b"\x60\x4f")
+  buf.write_bytes(b"\x7d\x59")
+  buf.write_bytes(b"\x3d\xd8\x40\xdc")
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x60\x4f\x7d\x59\x3d\xd8\x40\xdc"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF16LE, buf.to_bytes())
+  inspect!(string_from_lossy_chars(chars), content="你好👀")
+}
+
+test "lossy decoding UTF16 (alias for UTF16LE) encoded data to String" {
+  let buf = @buffer.T::new(size_hint=20)
+  buf.write_bytes(b"\x65\x18")
+  buf.write_bytes(b"\x20\x18")
+  buf.write_bytes(b"\x73\x18")
+  buf.write_bytes(b"\x64\x18")
+  buf.write_bytes(b"\x73\x18")
+  buf.write_bytes(b"\x36\x18")
+  buf.write_bytes(b"\x20\x18")
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x65\x18\x20\x18\x73\x18\x64\x18\x73\x18\x36\x18\x20\x18"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF16, buf.to_bytes())
+  inspect!(string_from_lossy_chars(chars), content="ᡥᠠᡳᡤᡳᠶᠠ")
+}
+
+test "lossy decoding UTF16BE encoded data to String" {
+  let buf = @buffer.T::new(size_hint=10)
+  buf.write_bytes(b"\xd8\x3d\xdc\x08")
+  buf.write_bytes(b"\xd8\x3d\xdc\x31")
+  buf.write_bytes(b"\xd8\x3d\xdc\x07")
+  buf.write_bytes(b"\xd8\x3d\xdc\x30")
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xd8\x3d\xdc\x08\xd8\x3d\xdc\x31\xd8\x3d\xdc\x07\xd8\x3d\xdc\x30"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF16BE, buf.to_bytes())
+  inspect!(string_from_lossy_chars(chars), content="🐈🐱🐇🐰")
+}
+
+test "lossy decoding UTF8 encoded data to String" {
+  let buf = @buffer.T::new(size_hint=10)
+  buf.write_bytes(b"\xe4\xbd\xa0")
+  buf.write_bytes(b"\xe5\xa5\xbd")
+  buf.write_bytes(b"\xf0\x9f\x91\x80")
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xe4\xbd\xa0\xe5\xa5\xbd\xf0\x9f\x91\x80"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF8, buf.to_bytes())
+  inspect!(string_from_lossy_chars(chars), content="你好👀")
+}
+
+test "lossy decoding UTF8 encoded bytes to String" {
+  let src = "👋再见"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf8_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xf0\x9f\x91\x8b\xe5\x86\x8d\xe8\xa7\x81"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF8, buf.to_bytes())
+  inspect!(string_from_lossy_chars(chars), content=src)
+}
+
+test "lossy decoding UTF8 encoded data" {
+  let src = "👋再见"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf8_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xf0\x9f\x91\x8b\xe5\x86\x8d\xe8\xa7\x81"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF8, buf.to_bytes())
+  inspect!(chars.iter().collect(), content="['👋', '再', '见']")
+}
+
+test "lossy decoding UTF16LE encoded data with UTF8" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf16le_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xd1\x8d\x65\x6b\x3c\xd8\xc3\xdf\x38\x6e\xf3\x6c\x3c\xd8\xca\xdf"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF8, buf.to_bytes())
+  inspect!(
+    chars.iter().collect(),
+    content="['э', 'e', 'k', '<', '�', '�', 'n', '�', '�']",
+  )
+}
+
+test "lossy decoding UTF8 encoded data with UTF16LE" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf8_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF16LE, buf.to_bytes())
+  inspect!(
+    chars.iter().collect(),
+    content="['럨', '', 'ꖭ', '鿰', '莏', '룦', '', '뎳', '鿰', '誏']",
+  )
+}
+
+test "lossy decoding UTF16BE encoded data with UTF8" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf16be_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x00\xd1\x00\x65\xd8\x3c\xdf\xc3\x00\x38\x00\xf3\xd8\x3c\xdf\xca"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF8, buf.to_bytes())
+  inspect!(
+    chars.iter().collect(),
+    content=
+      #|['\x00', '�', 'e', '�', '�', '\x00', '8', '\x00', '�', '�']
+    ,
+  )
+}
+
+test "lossy decoding UTF8 encoded data with UTF16BE" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf8_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF16BE, buf.to_bytes())
+  inspect!(
+    chars.iter().collect(),
+    content="['', '釦', '궥', '', '较', '', '룦', '뎳', '', '辊']",
+  )
+}
+
+test "lossy decoding UTF16LE encoded data with UTF16BE" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf16le_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xd1\x8d\x65\x6b\x3c\xd8\xc3\xdf\x38\x6e\xf3\x6c\x3c\xd8\xca\xdf"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF16BE, buf.to_bytes())
+  inspect!(
+    chars.iter().collect(),
+    content="['톍', '敫', '㳘', '쏟', '㡮', '', '㳘', '쫟']",
+  )
+}
+
+test "lossy decoding UTF16BE encoded data with UTF16LE" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf16be_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x00\xd1\x00\x65\xd8\x3c\xdf\xc3\x00\x38\x00\xf3\xd8\x3c\xdf\xca"
+    ,
+  )
+  let chars = @encoding.decode_lossy(UTF16LE, buf.to_bytes())
+  inspect!(
+    chars.iter().collect(),
+    content="['턀', '攀', '㳘', '쏟', '㠀', '', '㳘', '쫟']",
+  )
+}
+
+// strictly
+
+test "strictly decoding UTF16LE encoded data with UTF8" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf16le_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xd1\x8d\x65\x6b\x3c\xd8\xc3\xdf\x38\x6e\xf3\x6c\x3c\xd8\xca\xdf"
+    ,
+  )
+  let chars = @encoding.decode_strict(UTF8, buf.to_bytes())
+  inspect!(chars.iter().try_collect?(), content="Err(쏘)")
+}
+
+test "strictly decoding UTF8 encoded data with UTF16LE" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf8_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
+    ,
+  )
+  let chars = @encoding.decode_strict(UTF16LE, buf.to_bytes())
+  inspect!(
+    chars.iter().try_collect?(),
+    content="Ok(['럨', '', 'ꖭ', '鿰', '莏', '룦', '', '뎳', '鿰', '誏'])",
+  )
+}
+
+test "strictly decoding UTF8 encoded data with UTF16BE" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf8_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\xe8\xb7\x91\xe6\xad\xa5\xf0\x9f\x8f\x83\xe6\xb8\xb8\xe6\xb3\xb3\xf0\x9f\x8f\x8a"
+    ,
+  )
+  let chars = @encoding.decode_strict(UTF16BE, buf.to_bytes())
+  inspect!(
+    chars.iter().try_collect?(),
+    content="Ok(['', '釦', '궥', '', '较', '', '룦', '뎳', '', '辊'])",
+  )
+}
+
+test "strictly decoding UTF16BE encoded data with UTF8" {
+  let src = "跑步🏃游泳🏊"
+  let buf = @buffer.T::new(size_hint=10)
+  for s in src {
+    @encoding.write_utf16be_char(buf, s)
+  }
+  inspect!(
+    buf.to_bytes(),
+    content=
+      #|b"\x00\xd1\x00\x65\xd8\x3c\xdf\xc3\x00\x38\x00\xf3\xd8\x3c\xdf\xca"
+    ,
+  )
+  let chars = @encoding.decode_strict(UTF8, buf.to_bytes())
+  inspect!(chars.iter().try_collect?(), content="Err(Ñ)")
+}
diff --git a/encoding/encoding.mbt b/encoding/encoding.mbt
new file mode 100644
index 0000000..0cff879
--- /dev/null
+++ b/encoding/encoding.mbt
@@ -0,0 +1,87 @@
+// Copyright 2024 International Digital Economy Academy
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///|
+/// Encode a given string to the specified character encoding and returns the resulting bytes.
+///
+/// # Parameters
+///
+/// - `encoding` : The target encoding format.
+/// - `src`: The input string to be encoded.
+///
+/// # Returns
+///
+/// A `bytes` representing the encoded string in the selected format.
+///
+/// # Examples
+///
+/// ```moonbit
+/// let src = "Hello, World!"
+/// let encoded_bytes = encode(UTF8, src)
+/// ```
+pub fn encode(encoding : Encoding, src : String) -> Bytes {
+  // NOTE: special case: MoonBit String are already valid UTF16(LE) bytes
+  match encoding {
+    UTF16 | UTF16LE => return src.to_bytes()
+    _ => ()
+  }
+  let bytes = src.to_bytes()
+  let chars = decode_strict(UTF16LE, bytes)
+  let new_buf = @buffer.T::new(size_hint=bytes.length())
+  let write = match encoding {
+    UTF8 => write_utf8_char
+    UTF16BE => write_utf16be_char
+    _ => abort("unreachable")
+  }
+  for char in chars {
+    // SAFETY: Assume String are always valid UTF16LE
+    write(new_buf, char.unwrap())
+  }
+  new_buf.to_bytes()
+}
+
+///|
+fn write_char(
+  write : (FixedArray[Byte], Int, Char) -> Int
+) -> (@buffer.T, Char) -> Unit {
+  let fixedArr = FixedArray::makei(4, fn { _ => b'\x00' })
+  fn {
+    buf, value => {
+      let len = write(fixedArr, 0, value)
+      let arr = fixedArr.iter().take(len).collect()
+      buf.write_bytes(@bytes.from_array(arr))
+    }
+  }
+}
+
+///|
+/// Write a char into buffer as UTF8.
+pub let write_utf8_char : (@buffer.T, Char) -> Unit = write_char(
+  FixedArray::set_utf8_char,
+)
+
+///|
+/// Write a char into buffer as UTF16LE.
+/// Alias for `write_utf16le_char`
+pub let write_utf16_char : (@buffer.T, Char) -> Unit = @buffer.write_char
+
+///|
+/// Write a char into buffer as UTF16LE.
+pub let write_utf16le_char : (@buffer.T, Char) -> Unit = @buffer.write_char
+
+///|
+/// Write a char into buffer as UTF16BE.
+pub let write_utf16be_char : (@buffer.T, Char) -> Unit = write_char(
+  FixedArray::set_utf16be_char,
+)
diff --git a/encoding/encoding.mbti b/encoding/encoding.mbti
new file mode 100644
index 0000000..9881812
--- /dev/null
+++ b/encoding/encoding.mbti
@@ -0,0 +1,44 @@
+package moonbitlang/x/encoding
+
+alias @moonbitlang/core/buffer as @buffer
+
+// Values
+fn decode_lossy(Encoding, Bytes) -> LossyChars
+
+fn decode_strict(Encoding, Bytes) -> StrictChars
+
+fn encode(Encoding, String) -> Bytes
+
+fn write_utf16_char(@buffer.T, Char) -> Unit
+
+fn write_utf16be_char(@buffer.T, Char) -> Unit
+
+fn write_utf16le_char(@buffer.T, Char) -> Unit
+
+fn write_utf8_char(@buffer.T, Char) -> Unit
+
+// Types and methods
+type DecodeError
+impl Show for DecodeError
+
+pub(all) enum Encoding {
+  UTF8
+  UTF16
+  UTF16LE
+  UTF16BE
+}
+
+type LossyChars
+impl LossyChars {
+  iter(Self) -> Iter[Char]
+}
+
+type StrictChars
+impl StrictChars {
+  iter(Self) -> Iter[Result[Char, DecodeError]]
+}
+
+// Type aliases
+
+// Traits
+
diff --git a/encoding/encoding_test.mbt b/encoding/encoding_test.mbt
new file mode 100644
index 0000000..2ff086c
--- /dev/null
+++ b/encoding/encoding_test.mbt
@@ -0,0 +1,57 @@
+// Copyright 2024 International Digital Economy Academy
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+test "encoding String to UTF8" {
+  let src = "你好👀"
+  let bytes = @encoding.encode(UTF8, src)
+  inspect!(
+    bytes,
+    content=
+      #|b"\xe4\xbd\xa0\xe5\xa5\xbd\xf0\x9f\x91\x80"
+    ,
+  )
+}
+
+test "encoding String to UTF16 (alias of UTF16LE)" {
+  let src = "LISP programmers know the value of everything"
+  let bytes = @encoding.encode(UTF16, src)
+  inspect!(
+    bytes,
+    content=
+      #|b"\x4c\x00\x49\x00\x53\x00\x50\x00\x20\x00\x70\x00\x72\x00\x6f\x00\x67\x00\x72\x00\x61\x00\x6d\x00\x6d\x00\x65\x00\x72\x00\x73\x00\x20\x00\x6b\x00\x6e\x00\x6f\x00\x77\x00\x20\x00\x74\x00\x68\x00\x65\x00\x20\x00\x76\x00\x61\x00\x6c\x00\x75\x00\x65\x00\x20\x00\x6f\x00\x66\x00\x20\x00\x65\x00\x76\x00\x65\x00\x72\x00\x79\x00\x74\x00\x68\x00\x69\x00\x6e\x00\x67\x00"
+    ,
+  )
+}
+
+test "encoding String to UTF16LE" {
+  let src = "and the cost of nothing"
+  let bytes = @encoding.encode(UTF16LE, src)
+  inspect!(
+    bytes,
+    content=
+      #|b"\x61\x00\x6e\x00\x64\x00\x20\x00\x74\x00\x68\x00\x65\x00\x20\x00\x63\x00\x6f\x00\x73\x00\x74\x00\x20\x00\x6f\x00\x66\x00\x20\x00\x6e\x00\x6f\x00\x74\x00\x68\x00\x69\x00\x6e\x00\x67\x00"
+    ,
+  )
+}
+
+test "encoding String to UTF16BE" {
+  let src = "λf.(λx.f(x x))(λx.f(x x))"
+  let bytes = @encoding.encode(UTF16BE, src)
+  inspect!(
+    bytes,
+    content=
+      #|b"\x00\xbb\x00\x66\x00\x2e\x00\x28\x00\xbb\x00\x78\x00\x2e\x00\x66\x00\x28\x00\x78\x00\x20\x00\x78\x00\x29\x00\x29\x00\x28\x00\xbb\x00\x78\x00\x2e\x00\x66\x00\x28\x00\x78\x00\x20\x00\x78\x00\x29\x00\x29"
+    ,
+  )
+}
diff --git a/encoding/moon.pkg.json b/encoding/moon.pkg.json
new file mode 100644
index 0000000..c600e42
--- /dev/null
+++ b/encoding/moon.pkg.json
@@ -0,0 +1,4 @@
+{
+  "import": [ ],
+  "test-import": [ ]
+}
diff --git a/encoding/types.mbt b/encoding/types.mbt
new file mode 100644
index 0000000..6900c80
--- /dev/null
+++ b/encoding/types.mbt
@@ -0,0 +1,133 @@
+// Copyright 2024 International Digital Economy Academy
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///|
+typealias Cont = (Decoder) -> Decode
+
+///|
+pub(all) enum Encoding {
+  UTF8
+  UTF16 // alias for UTF16LE
+  UTF16LE
+  UTF16BE
+}
+
+// Decoder
+
+///|
+priv struct Decoder {
+  mut i : Bytes
+  mut i_pos : Int
+  mut i_max : Int
+  t : Bytes
+  mut t_len : Int
+  mut t_need : Int
+  mut k : Cont
+}
+
+///|
+priv enum Decode {
+  End
+  Malformed(String)
+  Uchar(Char)
+}
+
+///|
+fn malformed(buf : Bytes, offset : Int, length : Int) -> Decode {
+  Malformed(buf.to_unchecked_string(offset~, length~))
+}
+
+///|
+fn malformed_pair(
+  be : Bool,
+  hi : Int,
+  buf : Bytes,
+  offset : Int,
+  length : Int
+) -> Decode {
+  let bs1 = buf.to_unchecked_string(offset~, length~).to_bytes()
+  let bs0 = b"\x00\x00"
+  let (j0, j1) = if be { (0, 1) } else { (1, 0) }
+  bs0[j0] = (hi >> 8).to_byte()
+  bs0[j1] = hi.land(0xFF).to_byte()
+  let bs = @buffer.new(size_hint=bs0.length() + bs1.length())
+  bs.write_bytes(bs0)
+  bs.write_bytes(bs1)
+  Malformed(bs.to_bytes().to_unchecked_string(offset=0, length=bs.length()))
+}
+
+// Chars
+
+///|
+type LossyChars Decoder
+
+///|
+pub fn iter(self : LossyChars) -> Iter[Char] {
+  Iter::new(
+    fn(yield_) {
+      loop self._.decode() {
+        Uchar(u) => {
+          if yield_(u) == IterEnd {
+            break IterEnd
+          }
+          continue self._.decode()
+        }
+        Malformed(_) => {
+          if yield_(U_REP) == IterEnd {
+            break IterEnd
+          }
+          continue self._.decode()
+        }
+        End => break IterEnd
+      }
+    },
+  )
+}
+
+///|
+type StrictChars Decoder
+
+///|
+type! DecodeError String
+
+///|
+pub impl Show for DecodeError with output(self, logger) {
+  match self {
+    DecodeError(err) => logger.write_string(err)
+  }
+}
+
+///|
+pub fn iter(self : StrictChars) -> Iter[Result[Char, DecodeError]] {
+  Iter::new(
+    fn(yield_) {
+      loop self._.decode() {
+        Uchar(u) => {
+          if yield_(Ok(u)) == IterEnd {
+            break IterEnd
+          }
+          continue self._.decode()
+        }
+        Malformed(s) => {
+          let err = DecodeError(s)
+          if yield_(Err(err)) == IterEnd {
+            break IterEnd
+          }
+          continue self._.decode()
+        }
+        End => break IterEnd
+      }
+    },
+  )
+}