Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new package Encoding #1236

Closed
wants to merge 25 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
moon fmt with block-style
  • Loading branch information
jetjinser committed Nov 22, 2024
commit 0fc9cd40d1eb646de23b7e6ea1ecc4954db39d8a
5 changes: 5 additions & 0 deletions buffer/buffer.mbt
Original file line number Diff line number Diff line change
@@ -100,12 +100,14 @@ pub fn T::new(size_hint~ : Int = 0) -> T {
{ data, len: 0, initial_data: data }
}

///|
pub fn T::from_bytes(bytes : Bytes) -> T {
let buf = T::new(size_hint=bytes.length())
buf.write_bytes(bytes)
buf
}

///|
pub fn T::from_array(arr : Array[Byte]) -> T {
let buf = T::new(size_hint=arr.length())
for byte in arr {
@@ -169,6 +171,7 @@ pub fn write_char(self : T, value : Char) -> Unit {
self.len += inc
}

///|
pub fn write_utf8_char(self : T, value : Char) -> Unit {
self.grow_if_necessary(self.len + 4)
let inc = self.data.set_utf8_char(self.len, value)
@@ -183,6 +186,7 @@ pub fn write_byte(self : T, value : Byte) -> Unit {
self.len += 1
}

///|
pub fn blit(self : T, srcoff : Int, dst : T, dstoff : Int, len : Int) -> Unit {
Bytes::blit(self.to_bytes(), srcoff, dst.to_bytes(), dstoff, len)
}
@@ -208,6 +212,7 @@ pub fn op_set(self : T, index : Int, value : Byte) -> Unit {
self.data[index] = value
jetjinser marked this conversation as resolved.
Show resolved Hide resolved
}

///|
pub fn op_get(self : T, index : Int) -> Byte {
self.data[index]
}
jetjinser marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions builtin/bytes.mbt
Original file line number Diff line number Diff line change
@@ -168,6 +168,7 @@ pub fn set_utf8_char(self : Bytes, offset : Int, value : Char) -> Int {
}
}

///|
pub fn set_utf8_char(
self : FixedArray[Byte],
offset : Int,
22 changes: 22 additions & 0 deletions encoding/decoding.mbt
Original file line number Diff line number Diff line change
@@ -12,8 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.

///|
const U_REP = '\u{FFFD}'

///|
// consider const
let utf_8_len = [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -28,6 +30,7 @@ let utf_8_len = [
4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
]

///|
fn r_utf_8(buf : @buffer.T, offset : Int, length : Int) -> Decode {
fn uchar(c : Int) {
Uchar(Char::from_int(c))
@@ -112,6 +115,7 @@ fn r_utf_8(buf : @buffer.T, offset : Int, length : Int) -> Decode {
}
}

///|
fn r_utf_16(buf : @buffer.T, offset0 : Int, offset1 : Int) -> UTF16Decode {
let b0 = buf[offset0].to_int()
let b1 = buf[offset1].to_int()
@@ -127,6 +131,7 @@ fn r_utf_16(buf : @buffer.T, offset0 : Int, offset1 : Int) -> UTF16Decode {
}
}

///|
fn r_utf_16_lo(
hi : Int,
buf : @buffer.T,
@@ -149,10 +154,12 @@ fn r_utf_16_lo(
}
}

///|
fn decode(self : Decoder) -> Decode {
(self.k)(self)
}

///|
fn decoder(encoding~ : Encoding, src : @buffer.T) -> Decoder {
let i = src
let i_pos = 0
@@ -169,17 +176,20 @@ fn decoder(encoding~ : Encoding, src : @buffer.T) -> Decoder {
{ i, i_pos, i_max, t, t_len, t_need, k }
}

///|
fn ret(self : Decoder, k : Cont, v : Decode) -> Decode {
self.k = k
v
}

///|
priv enum UTF16Decode {
Hi(Int)
UTF16Malformed(String)
UTF16Uchar(Char)
}

///|
fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
if decoder.t_len < decoder.t_need {
decoder.ret(
@@ -191,6 +201,7 @@ fn t_decode_utf_16le_lo(hi : Int, decoder : Decoder) -> Decode {
}
}

///|
fn t_decode_utf_16le(self : Decoder) -> Decode {
if self.t_len < self.t_need {
self.ret(decode_utf_16le, malformed(self.t, 0, self.t_len))
@@ -199,6 +210,7 @@ fn t_decode_utf_16le(self : Decoder) -> Decode {
}
}

///|
fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
match v {
UTF16Uchar(u) => Uchar(u)
@@ -217,6 +229,7 @@ fn decode_utf_16le_lo(self : Decoder, v : UTF16Decode) -> Decode {
}
}

///|
fn decode_utf_16le(self : Decoder) -> Decode {
let rem = self.i_rem()
match rem.compare(0) {
@@ -239,6 +252,7 @@ fn decode_utf_16le(self : Decoder) -> Decode {
}
}

///|
fn t_decode_utf_8(self : Decoder) -> Decode {
if self.t_len < self.t_need {
malformed(self.t, 0, self.t_len)
@@ -247,6 +261,7 @@ fn t_decode_utf_8(self : Decoder) -> Decode {
}
}

///|
fn decode_utf_8(self : Decoder) -> Decode {
let rem = self.i_rem()
match rem.compare(0) {
@@ -276,27 +291,32 @@ fn decode_utf_8(self : Decoder) -> Decode {
}
}

///|
fn i_rem(self : Decoder) -> Int {
self.i_max - self.i_pos + 1
}

///|
fn eoi(self : Decoder) -> Unit {
self.i = @buffer.new()
self.i_pos = 0
self.i_max = @int.min_value
}

///|
fn refill(self : Decoder, k : Cont) -> Decode {
// only Buffer
self.eoi()
k(self)
}

///|
fn t_need(self : Decoder, need : Int) -> Unit {
self.t_len = 0
self.t_need = need
}

///|
fn t_fill(k : Cont, decoder : Decoder) -> Decode {
fn blit(decoder : Decoder, l : Int) -> Unit {
decoder.i.blit(decoder.i_pos, decoder.t, decoder.t_len, l)
@@ -319,11 +339,13 @@ fn t_fill(k : Cont, decoder : Decoder) -> Decode {
}
}

///|
pub fn decode_lossy(encoding~ : Encoding = UTF8, src : @buffer.T) -> Stream {
let decoder = decoder(encoding~, src)
{ decoder, lossy: true }
}

///|
pub fn decode_strict(encoding~ : Encoding = UTF8, src : @buffer.T) -> Stream {
let decoder = decoder(encoding~, src)
{ decoder, lossy: false }
8 changes: 8 additions & 0 deletions encoding/types.mbt
Original file line number Diff line number Diff line change
@@ -12,8 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.

///|
typealias Cont = (Decoder) -> Decode

///|
pub(all) enum Encoding {
UTF8
UTF16
@@ -23,6 +25,7 @@ pub(all) enum Encoding {

// Decoder

///|
priv struct Decoder {
mut i : @buffer.T
mut i_pos : Int
@@ -33,16 +36,19 @@ priv struct Decoder {
mut k : Cont
}

///|
priv enum Decode {
End
Malformed(String)
Uchar(Char)
}

///|
fn malformed(buf : @buffer.T, offset : Int, length : Int) -> Decode {
Malformed(buf.to_unchecked_string(offset~, length~))
}

///|
fn malformed_pair(
be : Bool,
hi : Int,
@@ -63,11 +69,13 @@ fn malformed_pair(

// Stream

///|
struct Stream {
decoder : Decoder
lossy : Bool
}

///|
pub fn iter(self : Stream) -> Iter[Char] {
Iter::new(
fn(yield_) {
1 change: 1 addition & 0 deletions int/int.mbt
Original file line number Diff line number Diff line change
@@ -42,6 +42,7 @@ pub fn abs(self : Int) -> Int {
}
}

///|
pub fn minimum(self : Int, x : Int) -> Int {
if self > x {
x
1 change: 1 addition & 0 deletions string/string.mbt
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@ pub fn String::from_array(chars : Array[Char]) -> String {
buf.to_string()
}

///|
pub fn String::from_iter(iter : Iter[Char]) -> String {
let chars = iter.collect()
String::from_array(chars)