From 4c78c99087eb6bddbcedfc8bc7c368f819c1dc3d Mon Sep 17 00:00:00 2001 From: yankun <1939810907@qq.com> Date: Wed, 19 Jul 2023 22:20:30 +0800 Subject: [PATCH] fix: support java #7 --- .github/workflows/test_java.yml | 48 ++ .gitignore | 10 + .mill-version | 1 + Cargo.toml | 3 +- build.sc | 54 ++ fastbloom-rs/Cargo.toml | 2 +- fastbloomjvm/native/Cargo.toml | 12 + fastbloomjvm/native/src/lib.rs | 612 ++++++++++++++++++ .../github/yankun1992/bloom/BloomFilter.java | 228 +++++++ .../yankun1992/bloom/CountingBloomFilter.java | 242 +++++++ .../yankun1992/bloom/FilterBuilder.java | 122 ++++ .../yankun1992/bloom/BloomFilterTest.java | 163 +++++ .../bloom/CountingBloomFilterTest.java | 92 +++ .../yankun1992/bloom/CrossLanguageTest.java | 51 ++ .../yankun1992/bloom/FilterBuilderTest.java | 113 ++++ millw | 189 ++++++ millw.bat | 173 +++++ py_tests/test_save_file.py | 40 ++ 18 files changed, 2153 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/test_java.yml create mode 100644 .mill-version create mode 100644 build.sc create mode 100644 fastbloomjvm/native/Cargo.toml create mode 100644 fastbloomjvm/native/src/lib.rs create mode 100644 fastbloomjvm/src/io/github/yankun1992/bloom/BloomFilter.java create mode 100644 fastbloomjvm/src/io/github/yankun1992/bloom/CountingBloomFilter.java create mode 100644 fastbloomjvm/src/io/github/yankun1992/bloom/FilterBuilder.java create mode 100644 fastbloomjvm/test/src/io/github/yankun1992/bloom/BloomFilterTest.java create mode 100644 fastbloomjvm/test/src/io/github/yankun1992/bloom/CountingBloomFilterTest.java create mode 100644 fastbloomjvm/test/src/io/github/yankun1992/bloom/CrossLanguageTest.java create mode 100644 fastbloomjvm/test/src/io/github/yankun1992/bloom/FilterBuilderTest.java create mode 100644 millw create mode 100644 millw.bat create mode 100644 py_tests/test_save_file.py diff --git a/.github/workflows/test_java.yml b/.github/workflows/test_java.yml new file mode 100644 index 0000000..0ae649b --- /dev/null +++ b/.github/workflows/test_java.yml @@ -0,0 +1,48 @@ +name: Test Java + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +env: + CARGO_TERM_COLOR: always + CONDA_PREFIX: /usr/share/miniconda + +jobs: + test: + name: Build and test Java + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ "macos-latest", "windows-latest", "ubuntu-latest"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.7 + - name: Add conda to system path + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + echo $CONDA/bin >> $GITHUB_PATH + echo $CONDA + - name: Install python dependencies + run: | + python -m pip install --upgrade pip + pip install -r build.requirements.txt + - name: Maturin develop + uses: PyO3/maturin-action@v1 + with: + maturin-version: latest + command: develop + args: --release + - name: save bloom filter to file + run: python py_tests/test_save_file.py + - uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '8' + - run: chmod +x millw + - run: ./millw fastbloomjvm.test diff --git a/.gitignore b/.gitignore index 089aaf9..661ed48 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,13 @@ Cargo.lock *.pyc __pychche__ + +out +fastbloomjvm/native/target +fastbloomjvm/native/Cargo.lock +fastbloomjvm/native/.idea + +.pytest_cache +.vscode +py_tests/.pytest_cache +data \ No newline at end of file diff --git a/.mill-version b/.mill-version new file mode 100644 index 0000000..5111446 --- /dev/null +++ b/.mill-version @@ -0,0 +1 @@ +0.10.12 \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 36304e6..667aade 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fastbloom_rs" -version = "0.5.4" +version = "0.5.5" edition = "2021" authors = ["Yan Kun "] description = "Some fast bloom filter implemented by Rust for Python and Rust! 10x faster than pybloom!" @@ -39,6 +39,7 @@ rand = "0.8" [workspace] members = ["fastbloom-rs"] +exclude = ["fastbloomjvm/native"] [[bench]] name = "fastbloom" diff --git a/build.sc b/build.sc new file mode 100644 index 0000000..2344aef --- /dev/null +++ b/build.sc @@ -0,0 +1,54 @@ +import mill._, scalalib._, publish._ +import mill.define.Sources +import mill.scalalib.JavaModule +import $ivy.`io.github.otavia-projects::mill-rust_mill$MILL_BIN_PLATFORM:0.2.1` +import io.github.otavia.jni.plugin.RustJniModule + +object ProjectInfo { + + def description: String = "Some fast bloom filter implemented by Rust for Python and Java!" + + def organization: String = "io.github.yankun1992" + + def organizationUrl: String = "" + + def projectUrl: String = "" + + def licenses = Seq() + + def author = Seq("Yan Kun ") + + def version = "0.5.5-SNAPSHOT" + + def buildTool = "mill" + + def buildToolVersion = mill.BuildInfo.millVersion + +} + +object fastbloomjvm extends RustJniModule with PublishModule { + + override def release: Boolean = true + + override def publishVersion: T[String] = ProjectInfo.version + + override def pomSettings: T[PomSettings] = PomSettings( + description = ProjectInfo.description, + organization = ProjectInfo.organization, + url = "", + licenses = ProjectInfo.licenses, + versionControl = VersionControl(), + developers = Seq(Developer("yan_kun", "Yan Kun", "", Some("icekredit"), Some(""))) + ) + + override def artifactName = "fastbloomjvm" + + override def artifactId = "fastbloom" + + override def ivyDeps = Agg(ivy"io.github.otavia-projects:jni-loader:0.2.1") + + object test extends Tests with TestModule.Junit4 { + + } + +} \ No newline at end of file diff --git a/fastbloom-rs/Cargo.toml b/fastbloom-rs/Cargo.toml index d998c05..789d5de 100644 --- a/fastbloom-rs/Cargo.toml +++ b/fastbloom-rs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fastbloom-rs" -version = "0.5.4" +version = "0.5.5" edition = "2021" authors = ["Yan Kun "] description = "Some fast bloom filter implemented by Rust for Python and Rust!" diff --git a/fastbloomjvm/native/Cargo.toml b/fastbloomjvm/native/Cargo.toml new file mode 100644 index 0000000..2b67a6d --- /dev/null +++ b/fastbloomjvm/native/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "fastbloom" # generated by nativeInit with defaultNativeName +version = "0.5.5" +authors = ["Yan Kun "] +edition = "2021" + +[dependencies] +jni = "0.21" +fastbloom-rs = { path = "../../fastbloom-rs" } + +[lib] +crate_type = ["cdylib"] \ No newline at end of file diff --git a/fastbloomjvm/native/src/lib.rs b/fastbloomjvm/native/src/lib.rs new file mode 100644 index 0000000..4dd1983 --- /dev/null +++ b/fastbloomjvm/native/src/lib.rs @@ -0,0 +1,612 @@ +use std::ptr::slice_from_raw_parts; + +use fastbloom_rs::{BloomFilter, CountingBloomFilter, Deletable, FilterBuilder, Hashes, Membership}; +use jni::JNIEnv; +use jni::objects::*; +use jni::sys::*; + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_FilterBuilder_new0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, expected_elements: jlong, false_positive_probability: jdouble, +) -> jlong { + let mut builder = FilterBuilder::new(expected_elements as u64, false_positive_probability as f64); + + let builder = Box::new(builder); + + Box::into_raw(builder) as jlong +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_FilterBuilder_fromSizeAndHashes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, size: jlong, hashes: jint, +) -> jlong { + let mut builder = FilterBuilder::from_size_and_hashes(size as u64, hashes as u32); + + let builder = Box::new(builder); + + Box::into_raw(builder) as jlong +} + + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_FilterBuilder_enableRepeatInsert0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, enable: jboolean, +) { + let mut builder = Box::from_raw(raw as *mut FilterBuilder); + + builder.enable_repeat_insert(enable != 0); + + Box::into_raw(builder); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_FilterBuilder_buildBloomFilter0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) -> jlong { + let mut builder = Box::from_raw(raw as *mut FilterBuilder); + + let filter = Box::new(builder.build_bloom_filter()); + + Box::into_raw(builder); // keep builder alive. + + Box::into_raw(filter) as jlong +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_FilterBuilder_buildCountingBloomFilter0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) -> jlong { + let mut builder = Box::from_raw(raw as *mut FilterBuilder); + + let filter = Box::new(builder.build_counting_bloom_filter()); + + Box::into_raw(builder); // keep builder alive. + + Box::into_raw(filter) as jlong +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_FilterBuilder_close0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) { + let mut builder = Box::from_raw(raw as *mut FilterBuilder); + + drop(builder); +} + + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_hashes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) -> jint { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let hashes = filter.hashes(); + + Box::into_raw(filter); // keep builder alive. + + hashes as jint +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_addInt0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: jint, +) { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let element = element as i32; + + filter.add(&i32::to_le_bytes(element)); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_addLong0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: jlong, +) { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let element = element as i64; + + filter.add(&i64::to_le_bytes(element)); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_addIntBatch0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, array: JIntArray<'local>, +) { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let len = env.get_array_length(&array).unwrap() as usize; + let mut buf = vec![0; len]; + + env.get_int_array_region(array, 0, &mut buf).unwrap(); + + + for element in buf { + let element = element as i32; + + filter.add(&i32::to_le_bytes(element)); + } + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_addStr0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: JString<'local>, +) { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let element = env.get_string(&element).unwrap(); + + filter.add(element.to_bytes()); + + Box::into_raw(filter); // keep builder alive. +} + + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_addBytes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: JByteArray<'local>, +) { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let element = env.convert_byte_array(element).unwrap(); + + filter.add(&element); + + Box::into_raw(filter); // keep builder alive. +} + + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_containsInt0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: jint, +) -> jboolean { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let element = element as i32; + + let res = filter.contains(&i32::to_le_bytes(element)); + + Box::into_raw(filter); // keep builder alive. + + res as jboolean +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_containsLong0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: jlong, +) -> jboolean { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let element = element as i64; + + let res = filter.contains(&i64::to_le_bytes(element)); + + Box::into_raw(filter); // keep builder alive. + + res as jboolean +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_containsStr0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: JString<'local>, +) -> jboolean { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let element = env.get_string(&element).unwrap(); + + let res = filter.contains(element.to_bytes()); + + Box::into_raw(filter); // keep builder alive. + + res as jboolean +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_containsBytes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: JByteArray<'local>, +) -> jboolean { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let element = env.convert_byte_array(element).unwrap(); + + let res = filter.contains(&element); + + Box::into_raw(filter); // keep builder alive. + + res as jboolean +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_clear0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + filter.clear(); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_fromBytes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, array: JByteArray<'local>, hashes: jint) + -> jlong { + let bytes = env.convert_byte_array(array).unwrap(); + + // println!("len {} {:?}", bytes.len(), &bytes); + + let filter = Box::new(BloomFilter::from_u8_array(&bytes, hashes as u32)); + + Box::into_raw(filter) as jlong +} + +/// if buf.size is too large, JVM will crash. +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_getByteBuffer0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) -> JByteBuffer<'local> { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + let bytes = filter.get_u8_array(); + let mut buf = Vec::with_capacity(bytes.len()); + buf.extend_from_slice(bytes); + // println!("{}", buf.len()); + let ptr = buf.as_mut_ptr(); + let jbuffer = env.new_direct_byte_buffer(ptr, bytes.len()).unwrap(); + Box::into_raw(filter); // keep builder alive. + jbuffer +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_getSize0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) -> jint { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + let size = filter.config().size >> 3; + + Box::into_raw(filter); // keep builder alive. + + size as jint +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_copyBytes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, array: JByteArray<'local>, +) { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let bytes = filter.get_u8_array(); + let len = bytes.len(); + + let i8_ptr = bytes.as_ptr() as *const i8; + + let ptr = slice_from_raw_parts(i8_ptr, len); + + let arr = unsafe { &*ptr }; + + // println!("len {} {:?}", len, bytes); + + env.set_byte_array_region(array, 0, arr).unwrap(); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_union0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, other: jlong, +) { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + let other_filter = Box::from_raw(other as *mut BloomFilter); + + filter.union(&other_filter); + + Box::into_raw(filter); // keep builder alive. + Box::into_raw(other_filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_intersect0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, other: jlong, +) { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + let other_filter = Box::from_raw(other as *mut BloomFilter); + + filter.intersect(&other_filter); + + Box::into_raw(filter); // keep builder alive. + Box::into_raw(other_filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_isEmpty0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) -> jboolean { + let mut filter = Box::from_raw(raw as *mut BloomFilter); + + let res = filter.is_empty(); + + Box::into_raw(filter); // keep builder alive. + + res as jboolean +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_BloomFilter_close0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) { + let filter = Box::from_raw(raw as *mut BloomFilter); + + drop(filter); +} + + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_hashes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) -> jint { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let hashes = filter.hashes(); + + Box::into_raw(filter); // keep builder alive. + + hashes as jint +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_addInt0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: jint, +) { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = element as i32; + + filter.add(&i32::to_le_bytes(element)); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_removeInt0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: jint, +) { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = element as i32; + + filter.remove(&i32::to_le_bytes(element)); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_addLong0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: jlong, +) { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = element as i64; + + filter.add(&i64::to_le_bytes(element)); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_removeLong0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: jlong, +) { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = element as i64; + + filter.remove(&i64::to_le_bytes(element)); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_addStr0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: JString<'local>, +) { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = env.get_string(&element).unwrap(); + + filter.add(element.to_bytes()); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_removeStr0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: JString<'local>, +) { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = env.get_string(&element).unwrap(); + + filter.remove(element.to_bytes()); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_addBytes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: JByteArray<'local>, +) { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = env.convert_byte_array(element).unwrap(); + + filter.add(&element); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_removeBytes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: JByteArray<'local>, +) { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = env.convert_byte_array(element).unwrap(); + + filter.remove(&element); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_containsInt0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: jint, +) -> jboolean { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = element as i32; + + let res = filter.contains(&i32::to_le_bytes(element)); + + Box::into_raw(filter); // keep builder alive. + + res as jboolean +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_containsLong0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: jlong, +) -> jboolean { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = element as i64; + + let res = filter.contains(&i64::to_le_bytes(element)); + + Box::into_raw(filter); // keep builder alive. + + res as jboolean +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_containsStr0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: JString<'local>, +) -> jboolean { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = env.get_string(&element).unwrap(); + + let res = filter.contains(element.to_bytes()); + + Box::into_raw(filter); // keep builder alive. + + res as jboolean +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_containsBytes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, element: JByteArray<'local>, +) -> jboolean { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let element = env.convert_byte_array(element).unwrap(); + + let res = filter.contains(&element); + + Box::into_raw(filter); // keep builder alive. + + res as jboolean +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_getByteBuffer0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) -> JByteBuffer<'local> { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let bytes = filter.get_u8_array(); + + let mut buf = vec![0; bytes.len()]; + + buf.copy_from_slice(bytes); + + let mut ptr = buf.as_mut_ptr(); + + let jbuf = env.new_direct_byte_buffer(ptr, bytes.len()).unwrap(); + + Box::into_raw(filter); // keep builder alive. + + jbuf +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_getSize0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) -> jint { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + let size = filter.config().size >> 1; + + Box::into_raw(filter); // keep builder alive. + + size as jint +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_copyBytes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, array: JByteArray<'local>, +) { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + let bytes = filter.get_u8_array(); + let len = bytes.len(); + + let i8_ptr = bytes.as_ptr() as *const i8; + + let ptr = slice_from_raw_parts(i8_ptr, len); + + let arr = unsafe { &*ptr }; + + // println!("len {} {:?}", len, bytes); + + env.set_byte_array_region(array, 0, arr).unwrap(); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_clear0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) { + let mut filter = Box::from_raw(raw as *mut CountingBloomFilter); + + filter.clear(); + + Box::into_raw(filter); // keep builder alive. +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_close0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, raw: jlong, +) { + let mut builder = Box::from_raw(raw as *mut CountingBloomFilter); + + drop(builder); +} + +#[no_mangle] +pub unsafe extern "C" fn Java_io_github_yankun1992_bloom_CountingBloomFilter_fromBytes0<'local>( + mut env: JNIEnv<'local>, clz: JClass<'local>, array: JByteArray<'local>, hashes: jint, enable_repeat_insert: jboolean) + -> jlong { + let bytes = env.convert_byte_array(array).unwrap(); + + let enable_repeat_insert = enable_repeat_insert != 0; + + let filter = Box::new(CountingBloomFilter::from_u8_array(&bytes, hashes as u32, enable_repeat_insert)); + + Box::into_raw(filter) as jlong +} \ No newline at end of file diff --git a/fastbloomjvm/src/io/github/yankun1992/bloom/BloomFilter.java b/fastbloomjvm/src/io/github/yankun1992/bloom/BloomFilter.java new file mode 100644 index 0000000..e6948f3 --- /dev/null +++ b/fastbloomjvm/src/io/github/yankun1992/bloom/BloomFilter.java @@ -0,0 +1,228 @@ +/* + * Copyright 2023 Yan Kun + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.yankun1992.bloom; + +import io.github.otavia.jni.loader.NativeLoader; + +import java.io.IOException; + +/** + * A Bloom filter is a space-efficient probabilistic data structure, conceived by Burton Howard Bloom in 1970, that is + * used to test whether an element is a member of a set. False positive matches are possible, but false negatives + * are not. + *
+ * Reference: Bloom, B. H. (1970). Space/time trade-offs in hash coding with allowable errors. Communications of + * the ACM, 13(7), 422-426. Full text article + */ +public class BloomFilter extends NativeLoader implements AutoCloseable { + + final long raw; + + BloomFilter(long raw) throws IOException { + super("fastbloom"); + this.raw = raw; + } + + public int hashes() { + return hashes0(raw); + } + + /** + * Add element to the filter. + * + * @param element value to add + * @apiNote In python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + */ + public void addInt(int element) { + addInt0(raw, element); + } + + public void addIntBatch(int[] array) { + addIntBatch0(raw, array); + } + + /** + * Add element to the filter. + * + * @param element value to add + * @apiNote In python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + */ + public void addLong(long element) { + addLong0(raw, element); + } + + /** + * Add element to the filter. + * + * @param element value to add + */ + public void addStr(String element) { + addStr0(raw, element); + } + + /** + * Add element to the filter. + * + * @param element value to add + */ + public void addBytes(byte[] element) { + addBytes0(raw, element); + } + + /** + * Tests whether an element is present in the filter (subject to the specified false positive rate). + * + * @param element to test + * @return true if element is in this filter. + * @apiNote In python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + */ + public boolean containsInt(int element) { + return containsInt0(raw, element); + } + + /** + * Tests whether an element is present in the filter (subject to the specified false positive rate). + * + * @param element to test + * @return true if element is in this filter. + * @apiNote In Python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + */ + public boolean containsLong(long element) { + return containsLong0(raw, element); + } + + /** + * Tests whether an element is present in the filter (subject to the specified false positive rate). + * + * @param element to test + * @return true if element is in this filter. + */ + public boolean containsStr(String element) { + return containsStr0(raw, element); + } + + /** + * Tests whether an element is present in the filter (subject to the specified false positive rate). + * + * @param element to test + * @return true if element is in this filter. + */ + public boolean containsBytes(byte[] element) { + return containsBytes0(raw, element); + } + + /** + * Removes all elements from the filter (i.e. resets all bits to zero). + */ + public void clear() { + clear0(raw); + } + + /** + * Return the underlying byte array of the Bloom filter. + */ + public byte[] getBytes() { + int size = getSize0(raw); + byte[] bytes = new byte[size]; + copyBytes0(raw, bytes); + + return bytes; + } + + /** + * Performs the union operation on two compatible bloom filters. This is achieved through a bitwise OR operation + * on their bit vectors. This operations is lossless, i.e. no elements are lost and the bloom filter is the same + * that would have resulted if all elements wer directly inserted in just one bloom filter. + * + * @param other the other bloom filter + * @return false if not compatible + */ + public boolean union(BloomFilter other) { + return union0(raw, other.raw); + } + + /** + * Performs the intersection operation on two compatible bloom filters. This is achieved through a bitwise AND + * operation on their bit vectors. The operations doesn't introduce any false negatives but it does raise the + * false positive probability. The the false positive probability in the resulting Bloom filter is at most the + * false-positive probability in one of the constituent bloom filters + * + * @param other the other bloom filter + * @return false if not compatible + */ + public boolean intersect(BloomFilter other) { + return intersect0(raw, other.raw); + } + + /** + * @return true if the Bloom filter does not contain any elements + */ + public boolean isEmpty() { + return isEmpty0(raw); + } + + @Override + public void close() throws Exception { + close0(raw); + } + + + public static BloomFilter fromBytes(byte[] array, int hashes) throws IOException { + NativeLoader.load("fastbloom"); + long raw = fromBytes0(array, hashes); + + return new BloomFilter(raw); + } + + private static native int hashes0(long raw); + + private static native void addInt0(long raw, int element); + + private static native void addIntBatch0(long raw, int[] array); + + private static native void addLong0(long raw, long element); + + private static native void addStr0(long raw, String element); + + private static native void addBytes0(long raw, byte[] element); + + private static native boolean containsInt0(long raw, int element); + + private static native boolean containsLong0(long raw, long element); + + private static native boolean containsStr0(long raw, String element); + + private static native boolean containsBytes0(long raw, byte[] element); + + private static native void clear0(long raw); + + private static native long fromBytes0(byte[] array, int hashes); + + private static native boolean union0(long raw, long other); + + private static native boolean intersect0(long raw, long other); + + private static native boolean isEmpty0(long raw); + + private static native void close0(long raw); + + private static native int getSize0(long raw); + + private static native void copyBytes0(long raw, byte[] bytes); + + +} diff --git a/fastbloomjvm/src/io/github/yankun1992/bloom/CountingBloomFilter.java b/fastbloomjvm/src/io/github/yankun1992/bloom/CountingBloomFilter.java new file mode 100644 index 0000000..8989dad --- /dev/null +++ b/fastbloomjvm/src/io/github/yankun1992/bloom/CountingBloomFilter.java @@ -0,0 +1,242 @@ +/* + * Copyright 2023 Yan Kun + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.yankun1992.bloom; + +import io.github.otavia.jni.loader.NativeLoader; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * A Counting Bloom filter works in a similar manner as a regular Bloom filter; however, it is able to keep track of + * insertions and deletions. In a counting Bloom filter, each entry in the Bloom filter is a small counter associated + * with a basic Bloom filter bit. + *
+ * Reference: F. Bonomi, M. Mitzenmacher, R. Panigrahy, S. Singh, and G. Varghese, “An Improved Construction + * for Counting Bloom Filters,” in 14th Annual European Symposium on Algorithms, LNCS 4168, 2006 + */ +public class CountingBloomFilter extends NativeLoader implements AutoCloseable { + + long raw; + + CountingBloomFilter(long raw) throws IOException { + super("fastbloom"); + this.raw = raw; + } + + public int hashes() { + return hashes0(raw); + } + + /** + * Add element to the filter. + * + * @apiNote In python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + */ + public void addInt(int element) { + addInt0(raw, element); + } + + /** + * Remove element from this filter. + * + * @apiNote In python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + */ + public void removeInt(int element) { + removeInt0(raw, element); + } + + /** + * Add element to the filter. + * + * @param element value to add + * @apiNote In python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + */ + public void addLong(long element) { + addLong0(raw, element); + } + + /** + * Remove element from this filter. + * + * @apiNote In python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + */ + public void removeLong(long element) { + removeLong0(raw, element); + } + + /** + * Add element to the filter. + */ + public void addStr(String element) { + addStr0(raw, element); + } + + /** + * Remove element from this filter. + */ + public void removeStr(String element) { + removeStr0(raw, element); + } + + /** + * Add element to the filter. + */ + public void addBytes(byte[] element) { + addBytes0(raw, element); + } + + /** + * Remove element from this filter. + */ + public void removeBytes(byte[] element) { + removeBytes0(raw, element); + } + + /** + * Tests whether an element is present in the filter (subject to the specified false positive rate). + * + * @param element to test + * @return true if element is in this filter. + * @apiNote In python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + */ + public boolean containsInt(int element) { + return containsInt0(raw, element); + } + + /** + * Tests whether an element is present in the filter (subject to the specified false positive rate). + * + * @param element to test + * @return true if element is in this filter. + * @apiNote In Python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + */ + public boolean containsLong(long element) { + return containsLong0(raw, element); + } + + /** + * Tests whether an element is present in the filter (subject to the specified false positive rate). + * + * @param element to test + * @return true if element is in this filter. + */ + public boolean containsStr(String element) { + return containsStr0(raw, element); + } + + /** + * Tests whether an element is present in the filter (subject to the specified false positive rate). + * + * @param element to test + * @return true if element is in this filter. + */ + public boolean containsBytes(byte[] element) { + return containsBytes0(raw, element); + } + + + /** + * Return the underlying byte array of the Bloom filter. + */ + public byte[] getBytes() { + int size = getSize0(raw); + byte[] bytes = new byte[size]; + copyBytes0(raw, bytes); + + return bytes; + } + + /** + * Removes all elements from the filter (i.e. resets all bits to zero). + */ + public void clear() { + clear0(raw); + } + + @Override + public void close() throws Exception { + close0(raw); + } + + /** + * Build a Counting Bloom filter form [u8]. + * + * @param array byte array + * @param hashes hash function number of the Bloom filter + * @param enable_repeat_insert + * @return CountingBloomFilter + */ + public static CountingBloomFilter fromBytes(byte[] array, int hashes, boolean enable_repeat_insert) throws IOException { + NativeLoader.load("fastbloom"); + long raw = fromBytes0(array, hashes, enable_repeat_insert); + return new CountingBloomFilter(raw); + } + + /** + * Build a Counting Bloom filter form [u8]. + * + * @param array byte array + * @param hashes hash function number of the Bloom filter + * @return CountingBloomFilter + */ + public static CountingBloomFilter fromBytes(byte[] array, int hashes) throws IOException { + return fromBytes(array, hashes, true); + } + + private static native int hashes0(long raw); + + private static native void addInt0(long raw, int element); + + private static native void removeInt0(long raw, int element); + + private static native void addLong0(long raw, long element); + + private static native void removeLong0(long raw, long element); + + private static native void addStr0(long raw, String element); + + private static native void removeStr0(long raw, String element); + + private static native void addBytes0(long raw, byte[] element); + + private static native void removeBytes0(long raw, byte[] element); + + private static native boolean containsInt0(long raw, int element); + + private static native boolean containsLong0(long raw, long element); + + private static native boolean containsStr0(long raw, String element); + + private static native boolean containsBytes0(long raw, byte[] element); + + private static native ByteBuffer getByteBuffer0(long raw); + + private static native int getSize0(long raw); + + private static native void copyBytes0(long raw, byte[] bytes); + + + private static native void clear0(long raw); + + + private static native void close0(long raw); + + private static native long fromBytes0(byte[] array, int hashes, boolean enable_repeat_insert); + + +} diff --git a/fastbloomjvm/src/io/github/yankun1992/bloom/FilterBuilder.java b/fastbloomjvm/src/io/github/yankun1992/bloom/FilterBuilder.java new file mode 100644 index 0000000..b9300b8 --- /dev/null +++ b/fastbloomjvm/src/io/github/yankun1992/bloom/FilterBuilder.java @@ -0,0 +1,122 @@ +/* + * Copyright 2023 Yan Kun + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.yankun1992.bloom; + +import io.github.otavia.jni.loader.NativeLoader; + +import java.io.IOException; + +public class FilterBuilder extends NativeLoader implements AutoCloseable { + + final long raw; + + /** + * Constructs a new Bloom Filter Builder by specifying the expected size of the filter and the tolerable false + * positive probability. The size of the BLoom filter in in bits and the optimal number of hash functions will + * be inferred from this. + * + * @param expected_elements expected size of the filter + * @param false_positive_probability tolerable false positive probability + */ + public FilterBuilder(long expected_elements, double false_positive_probability) throws IOException { + this(open(expected_elements, false_positive_probability)); + } + + private FilterBuilder(long raw) throws IOException { + super("fastbloom"); + this.raw = raw; + } + + /** + * Constructs a new Bloom Filter Builder by specifying the size of the bloom filter in bits and the number of + * hashes. The expected size of the filter and the tolerable false positive probability will be inferred from this. + * + * @param size size of the bloom filter in bits + * @param hashes the number of hashes + * @return FilterBuilder + */ + public static FilterBuilder fromSizeAndHashes(long size, int hashes) throws IOException { + long raw = fromSizeAndHashes0(size, hashes); + + return new FilterBuilder(raw); + } + + /** + * Use for CountingBloomFilter. + */ + public void enableRepeatInsert(boolean enable) { + enableRepeatInsert0(raw, enable); + } + + /** + * Constructs a Bloom filter using the specified parameters and computing missing parameters if + * possible (e.g. the optimal Bloom filter bit size). + * + * @return BloomFilter + */ + public BloomFilter buildBloomFilter() throws IOException { + long pointer = buildBloomFilter0(raw); + return new BloomFilter(pointer); + } + + /** + * Constructs a Counting Bloom filter using the specified parameters and computing missing parameters if + * possible (e.g. the optimal Bloom filter bit size). + * + * @return CountingBloomFilter + */ + public CountingBloomFilter buildCountingBloomFilter() throws IOException { + long pointer = buildCountingBloomFilter0(raw); + return new CountingBloomFilter(pointer); + } + + /** + * Checks whether a configuration is compatible to another configuration based on the size of the Bloom + * filter and its hash functions. + */ + public boolean isCompatibleTo(FilterBuilder builder) { + return isCompatibleTo0(raw, builder.raw); + } + + @Override + public void close() throws Exception { + close0(raw); + } + + private static long open(long expected_elements, double false_positive_probability) throws IOException { + NativeLoader.load("fastbloom"); + return new0(expected_elements, false_positive_probability); + } + + private static native long new0(long expected_elements, double false_positive_probability); + + private static native long fromSizeAndHashes0(long size, int hashes); + + private static native void enableRepeatInsert0(long raw, boolean enable); + + private static native void complete0(long raw); + + private static native boolean isCompatibleTo0(long raw, long other); + + private static native void close0(long pointer); + + private static native long buildBloomFilter0(long raw); + + private static native long buildCountingBloomFilter0(long raw); + + +} diff --git a/fastbloomjvm/test/src/io/github/yankun1992/bloom/BloomFilterTest.java b/fastbloomjvm/test/src/io/github/yankun1992/bloom/BloomFilterTest.java new file mode 100644 index 0000000..e4ee998 --- /dev/null +++ b/fastbloomjvm/test/src/io/github/yankun1992/bloom/BloomFilterTest.java @@ -0,0 +1,163 @@ +/* + * Copyright 2023 Yan Kun + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.yankun1992.bloom; + +import org.junit.Assert; +import org.junit.Test; + +public class BloomFilterTest { + + @Test + public void testBloomBuilder() { + + try (FilterBuilder builder = new FilterBuilder(100000000, 0.01)) { + try (BloomFilter bloom = builder.buildBloomFilter()) { + bloom.addBytes("hello".getBytes()); + bloom.addInt(87); + + Assert.assertTrue(bloom.containsInt(87)); + Assert.assertTrue(bloom.containsBytes("hello".getBytes())); + Assert.assertTrue(bloom.containsStr("hello")); + + Assert.assertFalse(bloom.containsBytes("hello world".getBytes())); + + try (BloomFilter bloom2 = BloomFilter.fromBytes(bloom.getBytes(), bloom.hashes())) { + Assert.assertTrue(bloom2.containsInt(87)); + Assert.assertTrue(bloom2.containsBytes("hello".getBytes())); + Assert.assertTrue(bloom2.containsStr("hello")); + + Assert.assertFalse(bloom2.containsBytes("hello world".getBytes())); + } + + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void testBloomAdd() { + try (FilterBuilder builder = new FilterBuilder(100000000, 0.01)) { + try (BloomFilter bloom = builder.buildBloomFilter()) { + for (int i = -1_000_000; i < 1_000_000; i++) { + bloom.addInt(i); + } + for (int i = -1_000_000; i < 1_000_000; i++) { + Assert.assertTrue(bloom.containsInt(i)); + } + + Assert.assertFalse(bloom.containsInt(1000_000_000)); + Assert.assertFalse(bloom.containsInt(-1000_000_000)); + Assert.assertFalse(bloom.containsStr("hello")); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void testBloomOp() { + try (FilterBuilder builder = new FilterBuilder(100000000, 0.01)) { + try (BloomFilter bloom = builder.buildBloomFilter()) { + bloom.addBytes("hello".getBytes()); + bloom.addInt(87); + + Assert.assertTrue(bloom.containsInt(87)); + Assert.assertTrue(bloom.containsBytes("hello".getBytes())); + Assert.assertTrue(bloom.containsStr("hello")); + + bloom.clear(); + + Assert.assertFalse(bloom.containsInt(87)); + Assert.assertFalse(bloom.containsBytes("hello".getBytes())); + Assert.assertFalse(bloom.containsStr("hello")); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void testBloomUnion() { + try (FilterBuilder builder = new FilterBuilder(100000000, 0.01)) { + try (BloomFilter bloom = builder.buildBloomFilter()) { + bloom.addStr("hello"); + Assert.assertTrue(bloom.containsStr("hello")); + Assert.assertFalse(bloom.containsInt(87)); + + try (BloomFilter bloom2 = builder.buildBloomFilter()) { + bloom2.addInt(87); + Assert.assertFalse(bloom2.containsStr("hello")); + Assert.assertTrue(bloom2.containsInt(87)); + + // UNION + bloom.union(bloom2); + Assert.assertTrue(bloom.containsStr("hello")); + Assert.assertTrue(bloom.containsInt(87)); + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void testBloomIntersect() { + try (FilterBuilder builder = new FilterBuilder(100000000, 0.01)) { + try (BloomFilter bloom = builder.buildBloomFilter()) { + bloom.addBytes("hello".getBytes()); + bloom.addInt(87); + + Assert.assertTrue(bloom.containsInt(87)); + Assert.assertTrue(bloom.containsBytes("hello".getBytes())); + + try (BloomFilter bloom2 = builder.buildBloomFilter()) { + bloom2.addInt(87); + Assert.assertFalse(bloom2.containsStr("hello")); + Assert.assertTrue(bloom2.containsInt(87)); + + // INTERSECT + bloom.intersect(bloom2); + Assert.assertTrue(bloom.containsInt(87)); + Assert.assertFalse(bloom.containsBytes("hello".getBytes())); + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void testBloomBatch() { + try (FilterBuilder builder = new FilterBuilder(100000000, 0.01)) { + try (BloomFilter bloom = builder.buildBloomFilter()) { + int[] insert = new int[100_000]; + for (int i = 0; i < insert.length; i++) { + insert[i] = i; + } + bloom.addIntBatch(insert); + for (int i = 0; i < insert.length; i++) { + Assert.assertTrue(bloom.containsInt(i)); + } + Assert.assertFalse(bloom.containsInt(100_001)); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + +} diff --git a/fastbloomjvm/test/src/io/github/yankun1992/bloom/CountingBloomFilterTest.java b/fastbloomjvm/test/src/io/github/yankun1992/bloom/CountingBloomFilterTest.java new file mode 100644 index 0000000..59d114c --- /dev/null +++ b/fastbloomjvm/test/src/io/github/yankun1992/bloom/CountingBloomFilterTest.java @@ -0,0 +1,92 @@ +/* + * Copyright 2023 Yan Kun + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.github.yankun1992.bloom; + +import org.junit.Assert; +import org.junit.Test; + +public class CountingBloomFilterTest { + + @Test + public void testBuilder() throws Exception { + try (FilterBuilder builder = new FilterBuilder(100_000_000, 0.01)) { + builder.enableRepeatInsert(false); + try (CountingBloomFilter bloom = builder.buildCountingBloomFilter()) { + bloom.addBytes("hello".getBytes()); + bloom.addInt(87); + + Assert.assertTrue(bloom.containsInt(87)); + Assert.assertTrue(bloom.containsBytes("hello".getBytes())); + Assert.assertTrue(bloom.containsStr("hello")); + + Assert.assertFalse(bloom.containsBytes("hello world".getBytes())); + + try (CountingBloomFilter bloom2 = CountingBloomFilter.fromBytes(bloom.getBytes(), bloom.hashes(), false)) { + Assert.assertTrue(bloom2.containsInt(87)); + Assert.assertTrue(bloom2.containsBytes("hello".getBytes())); + Assert.assertTrue(bloom2.containsStr("hello")); + + Assert.assertFalse(bloom2.containsBytes("hello world".getBytes())); + } + } + } + } + + @Test + public void testFilter() throws Exception { + try (FilterBuilder builder = new FilterBuilder(100_000_000, 0.01)) { + builder.enableRepeatInsert(false); + try (CountingBloomFilter bloom = builder.buildCountingBloomFilter()) { + bloom.addBytes("hello".getBytes()); + bloom.addInt(87); + bloom.addStr("world"); + bloom.addLong(88); + + Assert.assertTrue(bloom.containsBytes("hello".getBytes())); + Assert.assertTrue(bloom.containsInt(87)); + Assert.assertTrue(bloom.containsStr("world")); + Assert.assertTrue(bloom.containsLong(88)); + + Assert.assertFalse(bloom.containsInt(88)); + Assert.assertFalse(bloom.containsLong(87)); + + bloom.removeBytes("hello".getBytes()); + bloom.removeInt(87); + bloom.removeStr("world"); + bloom.removeLong(88); + + Assert.assertFalse(bloom.containsBytes("hello".getBytes())); + Assert.assertFalse(bloom.containsInt(87)); + Assert.assertFalse(bloom.containsStr("world")); + Assert.assertFalse(bloom.containsLong(88)); + + bloom.addBytes("hello".getBytes()); + bloom.addInt(87); + bloom.addStr("world"); + bloom.addLong(88); + bloom.clear(); + Assert.assertFalse(bloom.containsBytes("hello".getBytes())); + Assert.assertFalse(bloom.containsInt(87)); + Assert.assertFalse(bloom.containsStr("world")); + Assert.assertFalse(bloom.containsLong(88)); + + } + } + } + +} diff --git a/fastbloomjvm/test/src/io/github/yankun1992/bloom/CrossLanguageTest.java b/fastbloomjvm/test/src/io/github/yankun1992/bloom/CrossLanguageTest.java new file mode 100644 index 0000000..45eabeb --- /dev/null +++ b/fastbloomjvm/test/src/io/github/yankun1992/bloom/CrossLanguageTest.java @@ -0,0 +1,51 @@ +package io.github.yankun1992.bloom; + +import org.junit.Assert; +import org.junit.Test; + +import java.io.FileInputStream; +import java.io.IOException; + +public class CrossLanguageTest { + @Test + public void testLoadBloomFilter() throws IOException { + int size = 119816; + int hashes = 7; + FileInputStream bloomStream = new FileInputStream("data/bloom.bin"); + // FileInputStream bloomStream = new FileInputStream("../../../data/bloom.bin"); + byte[] array = new byte[size]; + int read = bloomStream.read(array); + + Assert.assertTrue(read == size); + + try (BloomFilter filter = BloomFilter.fromBytes(array, hashes)) { + Assert.assertTrue(filter.containsBytes("hello".getBytes())); + + // In Python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + Assert.assertTrue(filter.containsLong(87)); + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + + @Test + public void testLoadCountingFilter() throws IOException { + int size = 479264; + int hashes = 7; + FileInputStream stream = new FileInputStream("data/counting.bin"); + // FileInputStream stream = new FileInputStream("../../../data/counting.bin"); + byte[] array = new byte[size]; + int read = stream.read(array); + + Assert.assertTrue(read == size); + + try (CountingBloomFilter filter = CountingBloomFilter.fromBytes(array, hashes)) { + Assert.assertTrue(filter.containsBytes("hello".getBytes())); + // In Python API, `add_int` is same as `addLong` in java, because python `int` type is `i64` in Rust + Assert.assertTrue(filter.containsLong(87)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/fastbloomjvm/test/src/io/github/yankun1992/bloom/FilterBuilderTest.java b/fastbloomjvm/test/src/io/github/yankun1992/bloom/FilterBuilderTest.java new file mode 100644 index 0000000..416df51 --- /dev/null +++ b/fastbloomjvm/test/src/io/github/yankun1992/bloom/FilterBuilderTest.java @@ -0,0 +1,113 @@ +/* + * Copyright 2023 Yan Kun + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.github.yankun1992.bloom; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; + +public class FilterBuilderTest { + + private FilterBuilder builder; + private BloomFilter bloomFilter; + private CountingBloomFilter countingBloomFilter; + + @Before + public void create() throws IOException { + builder = new FilterBuilder(10000000, 0.01); + bloomFilter = builder.buildBloomFilter(); + countingBloomFilter = builder.buildCountingBloomFilter(); + } + + @After + public void close() throws Exception { + builder.close(); + bloomFilter.close(); + countingBloomFilter.close(); + builder = null; + bloomFilter = null; + countingBloomFilter = null; + } + + @Test + public void testBloom() { + bloomFilter.addInt(1); + Assert.assertTrue(bloomFilter.containsInt(1)); + Assert.assertFalse(bloomFilter.containsInt(2)); + + bloomFilter.addStr("hello"); + Assert.assertTrue(bloomFilter.containsStr("hello")); + Assert.assertFalse(bloomFilter.containsStr("world")); + + byte[] arr = {0, 1, 2, 5}; + byte[] arr2 = {0, 1, 2, 5, 6}; + bloomFilter.addBytes(arr); + Assert.assertTrue(bloomFilter.containsBytes(arr)); + Assert.assertFalse(bloomFilter.containsBytes(arr2)); + } + + @Test + public void testCounting() { + countingBloomFilter.addInt(1); + Assert.assertTrue(countingBloomFilter.containsInt(1)); + Assert.assertFalse(countingBloomFilter.containsInt(2)); + + countingBloomFilter.addStr("hello"); + Assert.assertTrue(countingBloomFilter.containsStr("hello")); + Assert.assertFalse(countingBloomFilter.containsStr("world")); + + byte[] arr = {0, 1, 2, 5}; + byte[] arr2 = {0, 1, 2, 5, 6}; + countingBloomFilter.addBytes(arr); + Assert.assertTrue(countingBloomFilter.containsBytes(arr)); + Assert.assertFalse(countingBloomFilter.containsBytes(arr2)); + } + + @Test + public void testRepeat() { + try (FilterBuilder builder1 = FilterBuilder.fromSizeAndHashes(1000000, 7)) { + builder1.enableRepeatInsert(true); + try (CountingBloomFilter filter = builder1.buildCountingBloomFilter()) { + filter.addStr("hello"); + filter.addStr("hello"); + filter.removeStr("hello"); + Assert.assertTrue(filter.containsStr("hello")); + filter.removeStr("hello"); + Assert.assertFalse(filter.containsStr("hello")); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + + try (FilterBuilder builder1 = FilterBuilder.fromSizeAndHashes(1000000, 7)) { + builder1.enableRepeatInsert(false); + try (CountingBloomFilter filter = builder1.buildCountingBloomFilter()) { + filter.addStr("hello"); + filter.addStr("hello"); + Assert.assertTrue(filter.containsStr("hello")); + filter.removeStr("hello"); + Assert.assertFalse(filter.containsStr("hello")); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + + } +} diff --git a/millw b/millw new file mode 100644 index 0000000..01bc664 --- /dev/null +++ b/millw @@ -0,0 +1,189 @@ +#!/usr/bin/env sh + +# This is a wrapper script, that automatically download mill from GitHub release pages +# You can give the required mill version with --mill-version parameter +# If no version is given, it falls back to the value of DEFAULT_MILL_VERSION +# +# Project page: https://github.com/lefou/millw +# Script Version: 0.4.5 +# +# If you want to improve this script, please also contribute your changes back! +# +# Licensed under the Apache License, Version 2.0 + +set -e + +if [ -z "${DEFAULT_MILL_VERSION}" ] ; then + DEFAULT_MILL_VERSION=0.10.9 +fi + +GITHUB_CDN="https://gh.cyp0633.icu/" + +MILL_REPO_URL="https://github.com/com-lihaoyi/mill" + +if [ -z "${CURL_CMD}" ] ; then + CURL_CMD=curl +fi + +# Explicit commandline argument takes precedence over all other methods +if [ "$1" = "--mill-version" ] ; then + shift + if [ "x$1" != "x" ] ; then + MILL_VERSION="$1" + shift + else + echo "You specified --mill-version without a version." 1>&2 + echo "Please provide a version that matches one provided on" 1>&2 + echo "${MILL_REPO_URL}/releases" 1>&2 + false + fi +fi + +# Please note, that if a MILL_VERSION is already set in the environment, +# We reuse it's value and skip searching for a value. + +# If not already set, read .mill-version file +if [ -z "${MILL_VERSION}" ] ; then + if [ -f ".mill-version" ] ; then + MILL_VERSION="$(head -n 1 .mill-version 2> /dev/null)" + elif [ -f ".config/mill-version" ] ; then + MILL_VERSION="$(head -n 1 .config/mill-version 2> /dev/null)" + fi +fi + +if [ -n "${XDG_CACHE_HOME}" ] ; then + MILL_DOWNLOAD_PATH="${XDG_CACHE_HOME}/mill/download" +else + MILL_DOWNLOAD_PATH="${HOME}/.cache/mill/download" +fi + +# If not already set, try to fetch newest from Github +if [ -z "${MILL_VERSION}" ] ; then + # TODO: try to load latest version from release page + echo "No mill version specified." 1>&2 + echo "You should provide a version via '.mill-version' file or --mill-version option." 1>&2 + + mkdir -p "${MILL_DOWNLOAD_PATH}" + LANG=C touch -d '1 hour ago' "${MILL_DOWNLOAD_PATH}/.expire_latest" 2>/dev/null || ( + # we might be on OSX or BSD which don't have -d option for touch + # but probably a -A [-][[hh]mm]SS + touch "${MILL_DOWNLOAD_PATH}/.expire_latest"; touch -A -010000 "${MILL_DOWNLOAD_PATH}/.expire_latest" + ) || ( + # in case we still failed, we retry the first touch command with the intention + # to show the (previously suppressed) error message + LANG=C touch -d '1 hour ago' "${MILL_DOWNLOAD_PATH}/.expire_latest" + ) + + # POSIX shell variant of bash's -nt operator, see https://unix.stackexchange.com/a/449744/6993 + # if [ "${MILL_DOWNLOAD_PATH}/.latest" -nt "${MILL_DOWNLOAD_PATH}/.expire_latest" ] ; then + if [ -n "$(find -L "${MILL_DOWNLOAD_PATH}/.latest" -prune -newer "${MILL_DOWNLOAD_PATH}/.expire_latest")" ]; then + # we know a current latest version + MILL_VERSION=$(head -n 1 "${MILL_DOWNLOAD_PATH}"/.latest 2> /dev/null) + fi + + if [ -z "${MILL_VERSION}" ] ; then + # we don't know a current latest version + echo "Retrieving latest mill version ..." 1>&2 + LANG=C ${CURL_CMD} -s -i -f -I ${MILL_REPO_URL}/releases/latest 2> /dev/null | grep --ignore-case Location: | sed s'/^.*tag\///' | tr -d '\r\n' > "${MILL_DOWNLOAD_PATH}/.latest" + MILL_VERSION=$(head -n 1 "${MILL_DOWNLOAD_PATH}"/.latest 2> /dev/null) + fi + + if [ -z "${MILL_VERSION}" ] ; then + # Last resort + MILL_VERSION="${DEFAULT_MILL_VERSION}" + echo "Falling back to hardcoded mill version ${MILL_VERSION}" 1>&2 + else + echo "Using mill version ${MILL_VERSION}" 1>&2 + fi +fi + +MILL="${MILL_DOWNLOAD_PATH}/${MILL_VERSION}" + +try_to_use_system_mill() { + MILL_IN_PATH="$(command -v mill || true)" + + if [ -z "${MILL_IN_PATH}" ]; then + return + fi + + UNIVERSAL_SCRIPT_MAGIC="@ 2>/dev/null # 2>nul & echo off & goto BOF" + + if ! head -c 128 "${MILL_IN_PATH}" | grep -qF "${UNIVERSAL_SCRIPT_MAGIC}"; then + if [ -n "${MILLW_VERBOSE}" ]; then + echo "Could not determine mill version of ${MILL_IN_PATH}, as it does not start with the universal script magic2" 1>&2 + fi + return + fi + + # Roughly the size of the universal script. + MILL_VERSION_SEARCH_RANGE="2403" + MILL_IN_PATH_VERSION=$(head -c "${MILL_VERSION_SEARCH_RANGE}" "${MILL_IN_PATH}" |\ + sed -n 's/^.*-DMILL_VERSION=\([^\s]*\) .*$/\1/p' |\ + head -n 1) + + if [ -z "${MILL_IN_PATH_VERSION}" ]; then + echo "Could not determine mill version, even though ${MILL_IN_PATH} has the universal script magic" 1>&2 + return + fi + + if [ "${MILL_IN_PATH_VERSION}" = "${MILL_VERSION}" ]; then + MILL="${MILL_IN_PATH}" + fi +} +try_to_use_system_mill + +# If not already downloaded, download it +if [ ! -s "${MILL}" ] ; then + + # support old non-XDG download dir + MILL_OLD_DOWNLOAD_PATH="${HOME}/.mill/download" + OLD_MILL="${MILL_OLD_DOWNLOAD_PATH}/${MILL_VERSION}" + if [ -x "${OLD_MILL}" ] ; then + MILL="${OLD_MILL}" + else + VERSION_PREFIX="$(echo $MILL_VERSION | cut -b -4)" + case $VERSION_PREFIX in + 0.0. | 0.1. | 0.2. | 0.3. | 0.4. ) + DOWNLOAD_SUFFIX="" + ;; + *) + DOWNLOAD_SUFFIX="-assembly" + ;; + esac + unset VERSION_PREFIX + + DOWNLOAD_FILE=$(mktemp mill.XXXXXX) + # TODO: handle command not found + echo "Downloading mill ${MILL_VERSION} from ${MILL_REPO_URL}/releases ..." 1>&2 + MILL_VERSION_TAG=$(echo $MILL_VERSION | sed -E 's/([^-]+)(-M[0-9]+)?(-.*)?/\1\2/') + ${CURL_CMD} -f -L -o "${DOWNLOAD_FILE}" "${MILL_REPO_URL}/releases/download/${MILL_VERSION_TAG}/${MILL_VERSION}${DOWNLOAD_SUFFIX}" + chmod +x "${DOWNLOAD_FILE}" + mkdir -p "${MILL_DOWNLOAD_PATH}" + mv "${DOWNLOAD_FILE}" "${MILL}" + + unset DOWNLOAD_FILE + unset DOWNLOAD_SUFFIX + fi +fi + +if [ -z "$MILL_MAIN_CLI" ] ; then + MILL_MAIN_CLI="${0}" +fi + +MILL_FIRST_ARG="" +if [ "$1" = "--bsp" ] || [ "$1" = "-i" ] || [ "$1" = "--interactive" ] || [ "$1" = "--no-server" ] || [ "$1" = "--repl" ] || [ "$1" = "--help" ] ; then + # Need to preserve the first position of those listed options + MILL_FIRST_ARG=$1 + shift +fi + +unset MILL_DOWNLOAD_PATH +unset MILL_OLD_DOWNLOAD_PATH +unset OLD_MILL +unset MILL_VERSION +unset MILL_VERSION_TAG +unset MILL_REPO_URL + +# We don't quote MILL_FIRST_ARG on purpose, so we can expand the empty value without quotes +# shellcheck disable=SC2086 +exec "${MILL}" $MILL_FIRST_ARG -D "mill.main.cli=${MILL_MAIN_CLI}" "$@" \ No newline at end of file diff --git a/millw.bat b/millw.bat new file mode 100644 index 0000000..f1f0141 --- /dev/null +++ b/millw.bat @@ -0,0 +1,173 @@ +@echo off + +rem This is a wrapper script, that automatically download mill from GitHub release pages +rem You can give the required mill version with --mill-version parameter +rem If no version is given, it falls back to the value of DEFAULT_MILL_VERSION +rem +rem Project page: https://github.com/lefou/millw +rem Script Version: 0.4.6 +rem +rem If you want to improve this script, please also contribute your changes back! +rem +rem Licensed under the Apache License, Version 2.0 + +rem setlocal seems to be unavailable on Windows 95/98/ME +rem but I don't think we need to support them in 2019 +setlocal enabledelayedexpansion + +if [!DEFAULT_MILL_VERSION!]==[] ( + set "DEFAULT_MILL_VERSION=0.10.10" +) + +if [!GITHUB_RELEASE_CDN!]==[] ( + set "GITHUB_RELEASE_CDN=" +) + +set "MILL_REPO_URL=https://github.com/com-lihaoyi/mill" + +rem %~1% removes surrounding quotes +if [%~1%]==[--mill-version] ( + if not [%~2%]==[] ( + set MILL_VERSION=%~2% + rem shift command doesn't work within parentheses + set "STRIP_VERSION_PARAMS=true" + ) else ( + echo You specified --mill-version without a version. 1>&2 + echo Please provide a version that matches one provided on 1>&2 + echo %MILL_REPO_URL%/releases 1>&2 + exit /b 1 + ) +) + +if not defined STRIP_VERSION_PARAMS GOTO AfterStripVersionParams +rem strip the: --mill-version {version} +shift +shift +:AfterStripVersionParams + +if [!MILL_VERSION!]==[] ( + if exist .mill-version ( + set /p MILL_VERSION=<.mill-version + ) else ( + if exist .config\mill-version ( + set /p MILL_VERSION=<.config\mill-version + ) + ) +) + +if [!MILL_VERSION!]==[] ( + set MILL_VERSION=%DEFAULT_MILL_VERSION% +) + +set MILL_DOWNLOAD_PATH=%USERPROFILE%\.mill\download + +rem without bat file extension, cmd doesn't seem to be able to run it +set MILL=%MILL_DOWNLOAD_PATH%\!MILL_VERSION!.bat + +if not exist "%MILL%" ( + set VERSION_PREFIX=%MILL_VERSION:~0,4% + set DOWNLOAD_SUFFIX=-assembly + if [!VERSION_PREFIX!]==[0.0.] set DOWNLOAD_SUFFIX= + if [!VERSION_PREFIX!]==[0.1.] set DOWNLOAD_SUFFIX= + if [!VERSION_PREFIX!]==[0.2.] set DOWNLOAD_SUFFIX= + if [!VERSION_PREFIX!]==[0.3.] set DOWNLOAD_SUFFIX= + if [!VERSION_PREFIX!]==[0.4.] set DOWNLOAD_SUFFIX= + set VERSION_PREFIX= + + for /F "delims=- tokens=1" %%A in ("!MILL_VERSION!") do set MILL_VERSION_BASE=%%A + for /F "delims=- tokens=2" %%A in ("!MILL_VERSION!") do set MILL_VERSION_MILESTONE=%%A + set VERSION_MILESTONE_START=!MILL_VERSION_MILESTONE:~0,1! + if [!VERSION_MILESTONE_START!]==[M] ( + set MILL_VERSION_TAG="!MILL_VERSION_BASE!-!MILL_VERSION_MILESTONE!" + ) else ( + set MILL_VERSION_TAG=!MILL_VERSION_BASE! + ) + + rem there seems to be no way to generate a unique temporary file path (on native Windows) + set DOWNLOAD_FILE=%MILL%.%random%.tmp + + set DOWNLOAD_URL=!GITHUB_RELEASE_CDN!%MILL_REPO_URL%/releases/download/!MILL_VERSION_TAG!/!MILL_VERSION!!DOWNLOAD_SUFFIX! + + echo Downloading mill %MILL_VERSION% from !DOWNLOAD_URL! ... 1>&2 + + if not exist "%MILL_DOWNLOAD_PATH%" mkdir "%MILL_DOWNLOAD_PATH%" + rem curl is bundled with recent Windows 10 + rem but I don't think we can expect all the users to have it in 2019 + where /Q curl + if %ERRORLEVEL% EQU 0 ( + curl -f -L "!DOWNLOAD_URL!" -o "!DOWNLOAD_FILE!" + ) else ( + rem bitsadmin seems to be available on Windows 7 + rem without /dynamic, github returns 403 + rem bitsadmin is sometimes needlessly slow but it looks better with /priority foreground + bitsadmin /transfer millDownloadJob /dynamic /priority foreground "!DOWNLOAD_URL!" "!DOWNLOAD_FILE!" + ) + if not exist "!DOWNLOAD_FILE!" ( + echo Could not download mill %MILL_VERSION% 1>&2 + exit /b 1 + ) + + move /y "!DOWNLOAD_FILE!" "%MILL%" + + set DOWNLOAD_FILE= + set DOWNLOAD_SUFFIX= +) + +set MILL_DOWNLOAD_PATH= +set MILL_VERSION= +set MILL_REPO_URL= + +if [!MILL_MAIN_CLI!]==[] ( + set "MILL_MAIN_CLI=%0" +) + +rem Need to preserve the first position of those listed options +set MILL_FIRST_ARG= +if [%~1%]==[--bsp] ( + set MILL_FIRST_ARG=%1% +) else ( + if [%~1%]==[-i] ( + set MILL_FIRST_ARG=%1% + ) else ( + if [%~1%]==[--interactive] ( + set MILL_FIRST_ARG=%1% + ) else ( + if [%~1%]==[--no-server] ( + set MILL_FIRST_ARG=%1% + ) else ( + if [%~1%]==[--repl] ( + set MILL_FIRST_ARG=%1% + ) else ( + if [%~1%]==[--help] ( + set MILL_FIRST_ARG=%1% + ) + ) + ) + ) + ) +) + +set "MILL_PARAMS=%*%" + +if not [!MILL_FIRST_ARG!]==[] ( + if defined STRIP_VERSION_PARAMS ( + for /f "tokens=1-3*" %%a in ("%*") do ( + set "MILL_PARAMS=%%d" + ) + ) else ( + for /f "tokens=1*" %%a in ("%*") do ( + set "MILL_PARAMS=%%b" + ) + ) +) else ( + if defined STRIP_VERSION_PARAMS ( + for /f "tokens=1-2*" %%a in ("%*") do ( + rem strip %%a - It's the "--mill-version" option. + rem strip %%b - it's the version number that comes after the option. + rem keep %%c - It's the remaining options. + set "MILL_PARAMS=%%c" + ) + ) +) + +"%MILL%" %MILL_FIRST_ARG% -D "mill.main.cli=%MILL_MAIN_CLI%" %MILL_PARAMS% diff --git a/py_tests/test_save_file.py b/py_tests/test_save_file.py new file mode 100644 index 0000000..7a03f5e --- /dev/null +++ b/py_tests/test_save_file.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +import os +import shutil + +from fastbloom_rs import BloomFilter, FilterBuilder + + +def test_save(): + bloom = BloomFilter(100_000, 0.01) + bloom.add_bytes(b'hello') + bloom.add(87) + + builder = FilterBuilder(100_000, 0.01) + cbf = builder.build_counting_bloom_filter() # type: CountingBloomFilter + + cbf.add('hello') + cbf.add(87) + + if os.path.exists('data'): + shutil.rmtree('data') + os.makedirs('data') + try: + os.remove('data/bloom.bin') + os.remove('data/counting.bin') + except Exception as e: + pass + + with open('data/bloom.bin', "wb") as f: + array = bloom.get_bytes() + f.write(array) + print(len(array), bloom.hashes()) + + with open('data/counting.bin', 'wb') as f: + array = cbf.get_bytes() + f.write(array) + print(len(array), cbf.hashes()) + + +if __name__ == '__main__': + test_save()