From 4031acf12914aa01977fb3472bbb48b98b1f0e39 Mon Sep 17 00:00:00 2001 From: Flash Date: Mon, 17 Jun 2024 17:34:32 +0800 Subject: [PATCH] chore: move immut/sparse_array to internal lib (#562) * chore: move immut/sparse_array to internal lib * fix: pass moon test --enable-coverage * internal: move to internal/sparse_array/ * update --------- Co-authored-by: Hongbo Zhang --- immut/hashmap/HAMT.mbt | 12 +-- immut/hashmap/moon.pkg.json | 3 +- immut/hashmap/sparse_array.mbt | 82 ------------------- immut/hashset/HAMT.mbt | 12 +-- immut/hashset/bitset.mbt | 53 ------------ immut/hashset/bitset_test.mbt | 68 --------------- immut/hashset/moon.pkg.json | 3 +- immut/hashset/sparse_array_test.mbt | 53 ------------ .../sparse_array}/bitset.mbt | 72 +++++++++------- immut/internal/sparse_array/moon.pkg.json | 10 +++ .../sparse_array}/sparse_array.mbt | 20 +++-- immut/internal/sparse_array/sparse_array.mbti | 32 ++++++++ .../sparse_array}/sparse_array_test.mbt | 0 13 files changed, 112 insertions(+), 308 deletions(-) delete mode 100644 immut/hashmap/sparse_array.mbt delete mode 100644 immut/hashset/bitset.mbt delete mode 100644 immut/hashset/bitset_test.mbt delete mode 100644 immut/hashset/sparse_array_test.mbt rename immut/{hashmap => internal/sparse_array}/bitset.mbt (80%) create mode 100644 immut/internal/sparse_array/moon.pkg.json rename immut/{hashset => internal/sparse_array}/sparse_array.mbt (82%) create mode 100644 immut/internal/sparse_array/sparse_array.mbti rename immut/{hashmap => internal/sparse_array}/sparse_array_test.mbt (100%) diff --git a/immut/hashmap/HAMT.mbt b/immut/hashmap/HAMT.mbt index b8ab87126..eada0e4a1 100644 --- a/immut/hashmap/HAMT.mbt +++ b/immut/hashmap/HAMT.mbt @@ -16,13 +16,13 @@ // // Hash-Array-Mapped-Trie (HAMT) is a persistent hash-table data structure. // It is a trie over the hash of keys (i.e. strings of binary digits) -// +// // Every level in a HAMT can have up to 32 branches (5 digits), // so HAMT has a tree height of at most 7, // and is more efficient compared to most other tree data structures. -// +// // HAMT uses bitmap-based sparse array to avoid space waste -// +// // Some references: // - // - @@ -32,7 +32,7 @@ enum Map[K, V] { Empty Leaf(K, V) // optimize for the case of no collision Collision(Bucket[K, V]) // use a list of buckets to resolve collision - Branch(SparseArray[Map[K, V]]) + Branch(@sparse_array.SparseArray[Map[K, V]]) } // The number of bits consumed at every [Branch] node @@ -50,7 +50,7 @@ pub fn find[K : Eq + Hash, V](self : Map[K, V], key : K) -> V? { Empty, _ => None Leaf(key1, value), _ => if key == key1 { Some(value) } else { None } Collision(bucket), _ => bucket.find(key) - // get the first segment (lower 5 bits) of the hash value + // get the first segment (lower 5 bits) of the hash value // inline the hot path of Sparse_array::op_get Branch(children), hash => { let idx = hash.land(segment_mask) @@ -86,7 +86,7 @@ fn add_with_hash[K : Eq, V]( hash.lsr(segment_length), value, ) - Map::Branch(singleton(idx, child)) + Map::Branch(@sparse_array.SparseArray::singleton(idx, child)) } } diff --git a/immut/hashmap/moon.pkg.json b/immut/hashmap/moon.pkg.json index 2ef137c71..8924612d4 100644 --- a/immut/hashmap/moon.pkg.json +++ b/immut/hashmap/moon.pkg.json @@ -2,7 +2,8 @@ "import": [ "moonbitlang/core/builtin", "moonbitlang/core/array", - "moonbitlang/core/coverage" + "moonbitlang/core/coverage", + "moonbitlang/core/immut/internal/sparse_array" ], "test-import" : [ "moonbitlang/core/assertion", diff --git a/immut/hashmap/sparse_array.mbt b/immut/hashmap/sparse_array.mbt deleted file mode 100644 index c6bae181c..000000000 --- a/immut/hashmap/sparse_array.mbt +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2024 International Digital Economy Academy -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/// A sparse array with at most 32 elements, where elements are not required to have contiguous index. -/// Empty elements don't waste any space, without losing constant-time access -priv struct SparseArray[X] { - // record which elements are present - elem_info : Bitset - data : Array[X] -} - -fn SparseArray::singleton[X](idx : Int, value : X) -> SparseArray[X] { - { elem_info: empty_bitset.add(idx), data: [value] } -} - -fn has[X](self : SparseArray[X], idx : Int) -> Bool { - self.elem_info.has(idx) -} - -fn op_get[X](self : SparseArray[X], idx : Int) -> X? { - if self.elem_info.has(idx) { - Some(self.data[self.elem_info.index_of(idx)]) - } else { - None - } -} - -/// `add(self: SparseArray[X], idx: Int, value: X)` -/// -/// Add a new element into the sparse array. -/// [idx] must be absent from [self] -fn add[X](self : SparseArray[X], idx : Int, value : X) -> SparseArray[X] { - let old_data = self.data - let old_len = old_data.length() - let new_len = old_len + 1 - let pos_of_new_item = self.elem_info.index_of(idx) - let new_data = Array::make(new_len, value) - old_data.blit_to(new_data, len=pos_of_new_item) - old_data.blit_to( - new_data, - len=old_len - pos_of_new_item, - src_offset=pos_of_new_item, - dst_offset=pos_of_new_item + 1, - ) - { elem_info: self.elem_info.add(idx), data: new_data } -} - -/// `replace(self: SparseArray[X], idx: Int, value: X)` -/// -// replace an existing element in the sparse array. -fn replace[X](self : SparseArray[X], idx : Int, value : X) -> SparseArray[X] { - let new_data = self.data.copy() - new_data[self.elem_info.index_of(idx)] = value - { elem_info: self.elem_info, data: new_data } -} - -/// `size(self: SparseArray[X]) -> Int` -/// -/// Return the size of a sparse array -fn size[X](self : SparseArray[X]) -> Int { - self.data.length() -} - -/// `iter(self: SparseArray[X], f: (X) -> Unit) -> Unit` -/// -/// Iterate through elements in a sparse array -fn iter[X](self : SparseArray[X], f : (X) -> Unit) -> Unit { - for i = 0; i < self.elem_info.size(); i = i + 1 { - f(self.data[i]) - } -} diff --git a/immut/hashset/HAMT.mbt b/immut/hashset/HAMT.mbt index 7487117b7..1bbcbbbc2 100644 --- a/immut/hashset/HAMT.mbt +++ b/immut/hashset/HAMT.mbt @@ -16,13 +16,13 @@ // // Hash-Array-Mapped-Trie (HAMT) is a persistent hash-table data structure. // It is a trie over the hash of keys (i.e. strings of binary digits) -// +// // Every level in a HAMT can have up to 32 branches (5 digits), // so HAMT has a tree height of at most 7, // and is more efficient compared to most other tree data structures. -// +// // HAMT uses bitmap-based sparse array to avoid space waste -// +// // Some references: // - // - @@ -32,7 +32,7 @@ enum Set[T] { Empty Leaf(T) // optimize for the case of no collision Collision(Bucket[T]) // use a list of buckets to resolve collision - Branch(SparseArray[Set[T]]) + Branch(@sparse_array.SparseArray[Set[T]]) } // The number of bits consumed at every [Branch] node @@ -50,7 +50,7 @@ pub fn contain[T : Eq + Hash](self : Set[T], key : T) -> Bool { Empty, _ => false Leaf(key1), _ => key == key1 Collision(bucket), _ => bucket.find(key) - // get the first segment (lower 5 bits) of the hash value + // get the first segment (lower 5 bits) of the hash value // inline the hot path of Sparse_array::op_get Branch(children), hash => { let idx = hash.land(segment_mask) @@ -80,7 +80,7 @@ fn add_with_hash[T : Eq]( key, hash.lsr(segment_length), ) - Set::Branch(singleton(idx, child)) + Set::Branch(@sparse_array.SparseArray::singleton(idx, child)) } } diff --git a/immut/hashset/bitset.mbt b/immut/hashset/bitset.mbt deleted file mode 100644 index 1d3e82311..000000000 --- a/immut/hashset/bitset.mbt +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2024 International Digital Economy Academy -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/// a simple bit set to store a set of integers less than 32 -priv type Bitset Int - -let empty_bitset : Bitset = Bitset(0) - -/// `has(self: Bitset, idx: Int)` -/// -/// Check if the given index is present in the bitset. -fn has(self : Bitset, idx : Int) -> Bool { - self.0.land((1).lsl(idx)) != 0 -} - -/// `index_of(self: Bitset, idx: Int)` -/// -/// Get the index of the bit in the bitset. -fn index_of(self : Bitset, idx : Int) -> Int { - self.0.land((1).lsl(idx) - 1).popcnt() -} - -/// `add(self: Bitset, idx: Int)` -/// -/// Add a new index to the bitset. -fn add(self : Bitset, idx : Int) -> Bitset { - Bitset(self.0.lor((1).lsl(idx))) -} - -/// `remove(self: Bitset, idx: Int)` -/// -/// Remove an index from the bitset. -fn remove(self : Bitset, idx : Int) -> Bitset { - Bitset(self.0.lxor((1).lsl(idx))) -} - -/// `size(self: Bitset) -> Int` -/// -/// Calculate the size of a bitset -fn size(self : Bitset) -> Int { - self.0.popcnt() -} diff --git a/immut/hashset/bitset_test.mbt b/immut/hashset/bitset_test.mbt deleted file mode 100644 index 987a1f80e..000000000 --- a/immut/hashset/bitset_test.mbt +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2024 International Digital Economy Academy -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -test "Bitset::has" { - let b = empty_bitset.add(2) - inspect(b.has(0), content="false")? - inspect(b.has(2), content="true")? - let b = b.add(0) - inspect(b.has(0), content="true")? - inspect(b.has(2), content="true")? -} - -test "Bitset::index_of" { - let b = empty_bitset.add(2) - inspect(b.index_of(2), content="0")? - let b = b.add(0) - inspect(b.index_of(2), content="1")? - let b = b.add(5) - inspect(b.index_of(2), content="1")? - // when elem is missing - inspect(b.index_of(3), content="2")? - inspect(b.index_of(4), content="2")? - // 5 is - inspect(b.index_of(5), content="2")? - // 6 is not - inspect(b.index_of(6), content="3")? -} - -test "Bitset::remove" { - let b = empty_bitset.add(2).add(3) - inspect(b.has(2), content="true")? - inspect(b.has(3), content="true")? - inspect(b.index_of(2), content="0")? - inspect(b.index_of(3), content="1")? - let b = b.remove(2) - inspect(b.has(2), content="false")? - inspect(b.has(3), content="true")? - inspect(b.index_of(3), content="0")? -} - -test "Bitset::size" { - let b = empty_bitset - inspect(b.size(), content="0")? - let b = b.add(0) - inspect(b.size(), content="1")? - let b = b.add(1) - inspect(b.size(), content="2")? - let b = b.add(1) - inspect(b.size(), content="2")? -} - -test "Bitset::ctpop" { - inspect( - ([0, 0xf0f0f0f0, 0x3c3c0ff0] : Array[_]).map(fn { x => x.popcnt() }), - content="[0, 16, 16]", - )? -} diff --git a/immut/hashset/moon.pkg.json b/immut/hashset/moon.pkg.json index 00917086b..4820e3ea1 100644 --- a/immut/hashset/moon.pkg.json +++ b/immut/hashset/moon.pkg.json @@ -2,7 +2,8 @@ "import": [ "moonbitlang/core/builtin", "moonbitlang/core/array", - "moonbitlang/core/coverage" + "moonbitlang/core/coverage", + "moonbitlang/core/immut/internal/sparse_array" ], "test-import" : [ "moonbitlang/core/assertion", diff --git a/immut/hashset/sparse_array_test.mbt b/immut/hashset/sparse_array_test.mbt deleted file mode 100644 index 1df547d56..000000000 --- a/immut/hashset/sparse_array_test.mbt +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2024 International Digital Economy Academy -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -test "SparseArray" { - let arr0 = singleton(1, 1) - inspect( - (arr0.has(0), arr0[0], arr0.has(1), arr0[1], arr0.has(2), arr0[2]), - content="(false, None, true, Some(1), false, None)", - )? - let arr1 = arr0.add(2, 2) - inspect( - (arr1.has(0), arr1[0], arr1.has(1), arr1[1], arr1.has(2), arr1[2]), - content="(false, None, true, Some(1), true, Some(2))", - )? - let arr2 = arr1.add(0, 0) - inspect( - (arr2.has(0), arr2[0], arr2.has(1), arr2[1], arr2.has(2), arr2[2]), - content="(true, Some(0), true, Some(1), true, Some(2))", - )? - let arr3 = arr2.replace(1, 42) - inspect( - (arr3.has(0), arr3[0], arr3.has(1), arr3[1], arr3.has(2), arr3[2]), - content="(true, Some(0), true, Some(42), true, Some(2))", - )? -} - -test "SparseArray::iter" { - let arr = singleton(0, 0).add(1, 1).add(3, 3).add(31, 31) - let buf = Buffer::make(0) - let mut is_first = true - arr.iter( - fn(x) { - if is_first { - is_first = false - } else { - buf.write_string(", ") - } - buf.write_string(x.to_string()) - }, - ) - inspect(buf, content="0, 1, 3, 31")? -} diff --git a/immut/hashmap/bitset.mbt b/immut/internal/sparse_array/bitset.mbt similarity index 80% rename from immut/hashmap/bitset.mbt rename to immut/internal/sparse_array/bitset.mbt index 0c3198f55..0925ac881 100644 --- a/immut/hashmap/bitset.mbt +++ b/immut/internal/sparse_array/bitset.mbt @@ -13,17 +13,45 @@ // limitations under the License. /// a simple bit set to store a set of integers less than 32 -priv type Bitset Int +pub type Bitset Int let empty_bitset : Bitset = Bitset(0) /// `has(self: Bitset, idx: Int)` /// /// Check if the given index is present in the bitset. -fn has(self : Bitset, idx : Int) -> Bool { +pub fn has(self : Bitset, idx : Int) -> Bool { self.0.land((1).lsl(idx)) != 0 } +/// `index_of(self: Bitset, idx: Int)` +/// +/// Get the index of the bit in the bitset. +pub fn index_of(self : Bitset, idx : Int) -> Int { + self.0.land((1).lsl(idx) - 1).popcnt() +} + +/// `add(self: Bitset, idx: Int)` +/// +/// Add a new index to the bitset. +pub fn add(self : Bitset, idx : Int) -> Bitset { + Bitset(self.0.lor((1).lsl(idx))) +} + +/// `remove(self: Bitset, idx: Int)` +/// +/// Remove an index from the bitset. +pub fn remove(self : Bitset, idx : Int) -> Bitset { + Bitset(self.0.lxor((1).lsl(idx))) +} + +/// `size(self: Bitset) -> Int` +/// +/// Calculate the size of a bitset +pub fn size(self : Bitset) -> Int { + self.0.popcnt() +} + test "Bitset::has" { let b = empty_bitset.add(2) inspect(b.has(0), content="false")? @@ -33,13 +61,6 @@ test "Bitset::has" { inspect(b.has(2), content="true")? } -/// `index_of(self: Bitset, idx: Int)` -/// -/// Get the index of the bit in the bitset. -fn index_of(self : Bitset, idx : Int) -> Int { - self.0.land((1).lsl(idx) - 1).popcnt() -} - test "Bitset::index_of" { let b = empty_bitset.add(2) inspect(b.index_of(2), content="0")? @@ -47,29 +68,15 @@ test "Bitset::index_of" { inspect(b.index_of(2), content="1")? let b = b.add(5) inspect(b.index_of(2), content="1")? - // when elem is missing + // when elem is missing inspect(b.index_of(3), content="2")? inspect(b.index_of(4), content="2")? - // 5 is + // 5 is inspect(b.index_of(5), content="2")? // 6 is not inspect(b.index_of(6), content="3")? } -/// `add(self: Bitset, idx: Int)` -/// -/// Add a new index to the bitset. -fn add(self : Bitset, idx : Int) -> Bitset { - Bitset(self.0.lor((1).lsl(idx))) -} - -/// `remove(self: Bitset, idx: Int)` -/// -/// Remove an index from the bitset. -fn remove(self : Bitset, idx : Int) -> Bitset { - Bitset(self.0.lxor((1).lsl(idx))) -} - test "Bitset::remove" { let b = empty_bitset.add(2).add(3) inspect(b.has(2), content="true")? @@ -82,11 +89,16 @@ test "Bitset::remove" { inspect(b.index_of(3), content="0")? } -/// `size(self: Bitset) -> Int` -/// -/// Calculate the size of a bitset -fn size(self : Bitset) -> Int { - self.0.popcnt() +test "Bitset::remove" { + let b = empty_bitset.add(2).add(3) + inspect(b.has(2), content="true")? + inspect(b.has(3), content="true")? + inspect(b.index_of(2), content="0")? + inspect(b.index_of(3), content="1")? + let b = b.remove(2) + inspect(b.has(2), content="false")? + inspect(b.has(3), content="true")? + inspect(b.index_of(3), content="0")? } test "Bitset::size" { diff --git a/immut/internal/sparse_array/moon.pkg.json b/immut/internal/sparse_array/moon.pkg.json new file mode 100644 index 000000000..c4c88f14b --- /dev/null +++ b/immut/internal/sparse_array/moon.pkg.json @@ -0,0 +1,10 @@ +{ + "import": [ + "moonbitlang/core/builtin", + "moonbitlang/core/array", + "moonbitlang/core/coverage" + ], + "test-import" : [ + "moonbitlang/core/tuple" + ] + } \ No newline at end of file diff --git a/immut/hashset/sparse_array.mbt b/immut/internal/sparse_array/sparse_array.mbt similarity index 82% rename from immut/hashset/sparse_array.mbt rename to immut/internal/sparse_array/sparse_array.mbt index 4f5cd5984..115f07443 100644 --- a/immut/hashset/sparse_array.mbt +++ b/immut/internal/sparse_array/sparse_array.mbt @@ -14,21 +14,21 @@ /// A sparse array with at most 32 elements, where elements are not required to have contiguous index. /// Empty elements don't waste any space, without losing constant-time access -priv struct SparseArray[X] { +pub struct SparseArray[X] { // record which elements are present elem_info : Bitset data : FixedArray[X] } -fn SparseArray::singleton[X](idx : Int, value : X) -> SparseArray[X] { +pub fn SparseArray::singleton[X](idx : Int, value : X) -> SparseArray[X] { { elem_info: empty_bitset.add(idx), data: [value] } } -fn has[X](self : SparseArray[X], idx : Int) -> Bool { +pub fn has[X](self : SparseArray[X], idx : Int) -> Bool { self.elem_info.has(idx) } -fn op_get[X](self : SparseArray[X], idx : Int) -> X? { +pub fn op_get[X](self : SparseArray[X], idx : Int) -> X? { if self.elem_info.has(idx) { Some(self.data[self.elem_info.index_of(idx)]) } else { @@ -40,7 +40,7 @@ fn op_get[X](self : SparseArray[X], idx : Int) -> X? { /// /// Add a new element into the sparse array. /// [idx] must be absent from [self] -fn add[X](self : SparseArray[X], idx : Int, value : X) -> SparseArray[X] { +pub fn add[X](self : SparseArray[X], idx : Int, value : X) -> SparseArray[X] { let old_data = self.data let old_len = old_data.length() let new_len = old_len + 1 @@ -59,7 +59,11 @@ fn add[X](self : SparseArray[X], idx : Int, value : X) -> SparseArray[X] { /// `replace(self: SparseArray[X], idx: Int, value: X)` /// // replace an existing element in the sparse array. -fn replace[X](self : SparseArray[X], idx : Int, value : X) -> SparseArray[X] { +pub fn replace[X]( + self : SparseArray[X], + idx : Int, + value : X +) -> SparseArray[X] { let new_data = self.data.copy() new_data[self.elem_info.index_of(idx)] = value { elem_info: self.elem_info, data: new_data } @@ -68,14 +72,14 @@ fn replace[X](self : SparseArray[X], idx : Int, value : X) -> SparseArray[X] { /// `size(self: SparseArray[X]) -> Int` /// /// Return the size of a sparse array -fn size[X](self : SparseArray[X]) -> Int { +pub fn size[X](self : SparseArray[X]) -> Int { self.data.length() } /// `iter(self: SparseArray[X], f: (X) -> Unit) -> Unit` /// /// Iterate through elements in a sparse array -fn iter[X](self : SparseArray[X], f : (X) -> Unit) -> Unit { +pub fn iter[X](self : SparseArray[X], f : (X) -> Unit) -> Unit { for i = 0; i < self.elem_info.size(); i = i + 1 { f(self.data[i]) } diff --git a/immut/internal/sparse_array/sparse_array.mbti b/immut/internal/sparse_array/sparse_array.mbti new file mode 100644 index 000000000..a5eca9cbb --- /dev/null +++ b/immut/internal/sparse_array/sparse_array.mbti @@ -0,0 +1,32 @@ +package moonbitlang/core/immut/internal/sparse_array + +// Values + +// Types and methods +pub type Bitset Int +impl Bitset { + add(Self, Int) -> Self + has(Self, Int) -> Bool + index_of(Self, Int) -> Int + remove(Self, Int) -> Self + size(Self) -> Int +} + +pub struct SparseArray { + pub elem_info : Bitset + pub data : FixedArray[X] +} +impl SparseArray { + add[X](Self[X], Int, X) -> Self[X] + has[X](Self[X], Int) -> Bool + iter[X](Self[X], (X) -> Unit) -> Unit + op_get[X](Self[X], Int) -> Option[X] + replace[X](Self[X], Int, X) -> Self[X] + singleton[X](Int, X) -> Self[X] + size[X](Self[X]) -> Int +} + +// Traits + +// Extension Methods + diff --git a/immut/hashmap/sparse_array_test.mbt b/immut/internal/sparse_array/sparse_array_test.mbt similarity index 100% rename from immut/hashmap/sparse_array_test.mbt rename to immut/internal/sparse_array/sparse_array_test.mbt