From 416227263e2a9e7856f7b0e55f65860636847ce4 Mon Sep 17 00:00:00 2001 From: broccoliSpicy Date: Tue, 12 Nov 2024 14:59:14 -0500 Subject: [PATCH 1/6] feat: fsst with miniblock --- protos/encodings.proto | 6 + rust/lance-encoding/src/decoder.rs | 4 + rust/lance-encoding/src/encoder.rs | 17 +- rust/lance-encoding/src/encodings/physical.rs | 1 + .../src/encodings/physical/fsst.rs | 154 +++++++++++++++++- rust/lance-encoding/src/format.rs | 16 +- 6 files changed, 188 insertions(+), 10 deletions(-) diff --git a/protos/encodings.proto b/protos/encodings.proto index c44ec9d61a..359539ccbf 100644 --- a/protos/encodings.proto +++ b/protos/encodings.proto @@ -233,6 +233,11 @@ message Binary { message BinaryMiniBlock { } +message FsstMiniBlock { + ArrayEncoding BinaryMiniBlock = 1; + bytes symbol_table = 2; +} + message Fsst { ArrayEncoding binary = 1; bytes symbol_table = 2; @@ -273,6 +278,7 @@ message ArrayEncoding { Constant constant = 13; Bitpack2 bitpack2 = 14; BinaryMiniBlock binary_mini_block = 15; + FsstMiniBlock fsst_mini_block = 16; } } diff --git a/rust/lance-encoding/src/decoder.rs b/rust/lance-encoding/src/decoder.rs index 0e0f366a46..e2983a77db 100644 --- a/rust/lance-encoding/src/decoder.rs +++ b/rust/lance-encoding/src/decoder.rs @@ -248,6 +248,7 @@ use crate::encodings::logical::r#struct::{ }; use crate::encodings::physical::binary::BinaryMiniBlockDecompressor; use crate::encodings::physical::bitpack_fastlanes::BitpackMiniBlockDecompressor; +use crate::encodings::physical::fsst::FsstMiniBlockDecompressor; use crate::encodings::physical::value::{ConstantDecompressor, ValueDecompressor}; use crate::encodings::physical::{ColumnBuffers, FileBuffers}; use crate::format::pb::{self, column_encoding}; @@ -498,6 +499,9 @@ impl DecompressorStrategy for CoreDecompressorStrategy { pb::array_encoding::ArrayEncoding::BinaryMiniBlock(_) => { Ok(Box::new(BinaryMiniBlockDecompressor::default())) } + pb::array_encoding::ArrayEncoding::FsstMiniBlock(description) => { + Ok(Box::new(FsstMiniBlockDecompressor::new(description))) + } _ => todo!(), } } diff --git a/rust/lance-encoding/src/encoder.rs b/rust/lance-encoding/src/encoder.rs index 57c73ff8e3..1bb837d1d3 100644 --- a/rust/lance-encoding/src/encoder.rs +++ b/rust/lance-encoding/src/encoder.rs @@ -31,7 +31,7 @@ use crate::encodings::physical::bitpack_fastlanes::{ }; use crate::encodings::physical::block_compress::{CompressionConfig, CompressionScheme}; use crate::encodings::physical::dictionary::AlreadyDictionaryEncoder; -use crate::encodings::physical::fsst::FsstArrayEncoder; +use crate::encodings::physical::fsst::{FsstArrayEncoder, FsstMiniBlockEncoder}; use crate::encodings::physical::packed_struct::PackedStructEncoder; use crate::format::ProtobufUtils; use crate::repdef::RepDefBuilder; @@ -791,7 +791,7 @@ impl CompressionStrategy for CoreArrayEncodingStrategy { if let DataBlock::FixedWidth(ref fixed_width_data) = data { let bit_widths = data .get_stat(Stat::BitWidth) - .expect("FixedWidthDataBlock should have valid bit width statistics"); + .expect("FixedWidthDataBlock should have valid `Stat::BitWidth` statistics"); // Temporary hack to work around https://github.com/lancedb/lance/issues/3102 // Ideally we should still be able to bit-pack here (either to 0 or 1 bit per value) let has_all_zeros = bit_widths @@ -810,6 +810,19 @@ impl CompressionStrategy for CoreArrayEncodingStrategy { } if let DataBlock::VariableWidth(ref variable_width_data) = data { if variable_width_data.bits_per_offset == 32 { + let data_size = variable_width_data.get_stat(Stat::DataSize).expect( + "VariableWidth DataBlock should have valid `Stat::DataSize` statistics", + ); + let data_size = data_size.as_primitive::().value(0); + + let max_len = variable_width_data.get_stat(Stat::MaxLength).expect( + "VariableWidth DataBlock should have valid `Stat::DataSize` statistics", + ); + let max_len = max_len.as_primitive::().value(0); + + if max_len > 4 && data_size >= 4 * 1024 * 1024 { + return Ok(Box::new(FsstMiniBlockEncoder::default())); + } return Ok(Box::new(BinaryMiniBlockEncoder::default())); } } diff --git a/rust/lance-encoding/src/encodings/physical.rs b/rust/lance-encoding/src/encodings/physical.rs index 5384455248..5449dd4bdc 100644 --- a/rust/lance-encoding/src/encodings/physical.rs +++ b/rust/lance-encoding/src/encodings/physical.rs @@ -285,6 +285,7 @@ pub fn decoder_from_array_encoding( pb::array_encoding::ArrayEncoding::Constant(_) => unreachable!(), pb::array_encoding::ArrayEncoding::Bitpack2(_) => unreachable!(), pb::array_encoding::ArrayEncoding::BinaryMiniBlock(_) => unreachable!(), + pb::array_encoding::ArrayEncoding::FsstMiniBlock(_) => unreachable!(), } } diff --git a/rust/lance-encoding/src/encodings/physical/fsst.rs b/rust/lance-encoding/src/encodings/physical/fsst.rs index 34f488306f..681425bfaf 100644 --- a/rust/lance-encoding/src/encodings/physical/fsst.rs +++ b/rust/lance-encoding/src/encodings/physical/fsst.rs @@ -7,17 +7,23 @@ use arrow_buffer::ScalarBuffer; use arrow_schema::DataType; use futures::{future::BoxFuture, FutureExt}; -use lance_core::Result; +use lance_core::{Error, Result}; +use snafu::{location, Location}; use crate::{ buffer::LanceBuffer, data::{BlockInfo, DataBlock, NullableDataBlock, UsedEncoding, VariableWidthBlock}, - decoder::{PageScheduler, PrimitivePageDecoder}, - encoder::{ArrayEncoder, EncodedArray}, - format::ProtobufUtils, + decoder::{MiniBlockDecompressor, PageScheduler, PrimitivePageDecoder}, + encoder::{ArrayEncoder, EncodedArray, MiniBlockCompressed, MiniBlockCompressor}, + format::{ + pb::{self}, + ProtobufUtils, + }, EncodingsIo, }; +use super::binary::{BinaryMiniBlockDecompressor, BinaryMiniBlockEncoder}; + #[derive(Debug)] pub struct FsstPageScheduler { inner_scheduler: Box, @@ -201,6 +207,121 @@ impl ArrayEncoder for FsstArrayEncoder { } } +#[derive(Debug, Default)] +pub struct FsstMiniBlockEncoder {} + +impl MiniBlockCompressor for FsstMiniBlockEncoder { + fn compress( + &self, + data: DataBlock, + ) -> Result<(MiniBlockCompressed, crate::format::pb::ArrayEncoding)> { + match data { + DataBlock::VariableWidth(mut variable_width) => { + let offsets = variable_width.offsets.borrow_to_typed_slice::(); + let offsets_slice = offsets.as_ref(); + let bytes_data = variable_width.data.into_buffer(); + + // prepare compression output buffer + let mut dest_offsets = vec![0_i32; offsets_slice.len() * 2]; + let mut dest_values = vec![0_u8; bytes_data.len() * 2]; + let mut symbol_table = vec![0_u8; fsst::fsst::FSST_SYMBOL_TABLE_SIZE]; + + // fsst compression + fsst::fsst::compress( + &mut symbol_table, + bytes_data.as_slice(), + offsets_slice, + &mut dest_values, + &mut dest_offsets, + )?; + + // construct `DataBlock` for BinaryMiniBlockEncoder, we may want some `DataBlock` construct methods later + let data_block = DataBlock::VariableWidth(VariableWidthBlock { + data: LanceBuffer::reinterpret_vec(dest_values), + bits_per_offset: 32, + offsets: LanceBuffer::reinterpret_vec(dest_offsets), + num_values: variable_width.num_values, + block_info: BlockInfo::new(), + used_encodings: UsedEncoding::new(), + }); + + // compress the fsst compressed data using `BinaryMiniBlockEnocder` + let binary_compressor = + Box::new(BinaryMiniBlockEncoder::default()) as Box; + + let (binary_miniblock_compressed, binary_array_encoding) = + binary_compressor.compress(data_block)?; + + Ok(( + binary_miniblock_compressed, + ProtobufUtils::fsst_mini_block(binary_array_encoding, symbol_table), + )) + } + _ => Err(Error::InvalidInput { + source: format!( + "Cannot compress a data block of type {} with BinaryMiniBlockEncoder", + data.name() + ) + .into(), + location: location!(), + }), + } + } +} + +#[derive(Debug)] +pub struct FsstMiniBlockDecompressor { + symbol_table: Vec, +} + +impl FsstMiniBlockDecompressor { + pub fn new(description: &pb::FsstMiniBlock) -> Self { + Self { + symbol_table: description.symbol_table.clone(), + } + } +} + +impl MiniBlockDecompressor for FsstMiniBlockDecompressor { + fn decompress(&self, data: LanceBuffer, num_values: u64) -> Result { + let binary_decompressor = + Box::new(BinaryMiniBlockDecompressor::default()) as Box; + + let compressed_data_block = binary_decompressor.decompress(data, num_values)?; + + let mut compressed_data_block = match compressed_data_block { + DataBlock::VariableWidth(variable) => variable, + _ => panic!("Received non-variable width data from BinaryMiniBlockDecompressor."), + }; + + let bytes = compressed_data_block.data.borrow_to_typed_slice::(); + let bytes = bytes.as_ref(); + + let offsets = compressed_data_block.offsets.borrow_to_typed_slice::(); + let offsets = offsets.as_ref(); + + let mut decompress_bytes_buf = vec![0u8; 4 * 1024 * 8]; + let mut decompress_offset_buf = vec![0i32; 1024]; + + fsst::fsst::decompress( + &self.symbol_table, + &bytes, + &offsets, + &mut decompress_bytes_buf, + &mut decompress_offset_buf, + )?; + + Ok(DataBlock::VariableWidth(VariableWidthBlock { + data: LanceBuffer::Owned(decompress_bytes_buf), + offsets: LanceBuffer::reinterpret_vec(decompress_offset_buf), + bits_per_offset: 32, + num_values, + block_info: BlockInfo::new(), + used_encodings: UsedEncoding::new(), + })) + } +} + #[cfg(test)] mod tests { @@ -208,7 +329,10 @@ mod tests { use lance_datagen::{ByteCount, RowCount}; - use crate::testing::{check_round_trip_encoding_of_data, TestCases}; + use crate::{ + testing::{check_round_trip_encoding_of_data, TestCases}, + version::LanceFileVersion, + }; #[test_log::test(tokio::test)] async fn test_fsst() { @@ -218,6 +342,24 @@ mod tests { .unwrap() .column(0) .clone(); - check_round_trip_encoding_of_data(vec![arr], &TestCases::default(), HashMap::new()).await; + check_round_trip_encoding_of_data( + vec![arr], + &TestCases::default().with_file_version(LanceFileVersion::V2_1), + HashMap::new(), + ) + .await; + + let arr = lance_datagen::gen() + .anon_col(lance_datagen::array::rand_utf8(ByteCount::from(64), false)) + .into_batch_rows(RowCount::from(1_000_000)) + .unwrap() + .column(0) + .clone(); + check_round_trip_encoding_of_data( + vec![arr], + &TestCases::default().with_file_version(LanceFileVersion::V2_1), + HashMap::new(), + ) + .await; } } diff --git a/rust/lance-encoding/src/format.rs b/rust/lance-encoding/src/format.rs index 83b86d5cca..c0a4873a78 100644 --- a/rust/lance-encoding/src/format.rs +++ b/rust/lance-encoding/src/format.rs @@ -20,8 +20,8 @@ use pb::{ nullable::{AllNull, NoNull, Nullability, SomeNull}, page_layout::Layout, AllNullLayout, ArrayEncoding, Binary, BinaryMiniBlock, Bitpack2, Bitpacked, BitpackedForNonNeg, - Dictionary, FixedSizeBinary, FixedSizeList, Flat, Fsst, MiniBlockLayout, Nullable, - PackedStruct, PageLayout, + Dictionary, FixedSizeBinary, FixedSizeList, Flat, Fsst, FsstMiniBlock, MiniBlockLayout, + Nullable, PackedStruct, PageLayout, }; use crate::encodings::physical::block_compress::CompressionConfig; @@ -139,6 +139,18 @@ impl ProtobufUtils { } } + // Construct a `FsstMiniBlock` ArrayEncoding, the inner `binary_mini_block` encoding is actually + // not used and `FsstMiniBlockDecompressor` constructs a `binary_mini_block` in a `hard-coded` fashion. + // This can be an optimization later. + pub fn fsst_mini_block(data: ArrayEncoding, symbol_table: Vec) -> ArrayEncoding { + ArrayEncoding { + array_encoding: Some(ArrayEncodingEnum::FsstMiniBlock(Box::new(FsstMiniBlock { + binary_mini_block: Some(Box::new(data)), + symbol_table, + }))), + } + } + pub fn packed_struct( child_encodings: Vec, packed_buffer_index: u32, From a7f803de9ead868dc37d4067f4e89316a9a11db5 Mon Sep 17 00:00:00 2001 From: broccoliSpicy Date: Wed, 13 Nov 2024 12:14:24 -0500 Subject: [PATCH 2/6] lint --- rust/lance-encoding/src/encodings/physical/fsst.rs | 13 +++++++------ rust/lance-encoding/src/format.rs | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/rust/lance-encoding/src/encodings/physical/fsst.rs b/rust/lance-encoding/src/encodings/physical/fsst.rs index 681425bfaf..526a37b4b6 100644 --- a/rust/lance-encoding/src/encodings/physical/fsst.rs +++ b/rust/lance-encoding/src/encodings/physical/fsst.rs @@ -284,29 +284,30 @@ impl FsstMiniBlockDecompressor { impl MiniBlockDecompressor for FsstMiniBlockDecompressor { fn decompress(&self, data: LanceBuffer, num_values: u64) -> Result { + // Step 1. decompress data use `BinaryMiniBlockDecompressor` let binary_decompressor = Box::new(BinaryMiniBlockDecompressor::default()) as Box; - let compressed_data_block = binary_decompressor.decompress(data, num_values)?; - let mut compressed_data_block = match compressed_data_block { DataBlock::VariableWidth(variable) => variable, _ => panic!("Received non-variable width data from BinaryMiniBlockDecompressor."), }; + // Step 2. FSST decompress let bytes = compressed_data_block.data.borrow_to_typed_slice::(); let bytes = bytes.as_ref(); - let offsets = compressed_data_block.offsets.borrow_to_typed_slice::(); let offsets = offsets.as_ref(); + // FSST decompression output buffer, the `MiniBlock` has a size limit of `4 KiB` and + // the FSST decompression algorithm output is at most `8 * input_size` + // Since `MiniBlock Size` <= 4 KiB and `offsets` are type `i32, it has number of `offsets` <= 1024. let mut decompress_bytes_buf = vec![0u8; 4 * 1024 * 8]; let mut decompress_offset_buf = vec![0i32; 1024]; - fsst::fsst::decompress( &self.symbol_table, - &bytes, - &offsets, + bytes, + offsets, &mut decompress_bytes_buf, &mut decompress_offset_buf, )?; diff --git a/rust/lance-encoding/src/format.rs b/rust/lance-encoding/src/format.rs index c0a4873a78..a47544f2f7 100644 --- a/rust/lance-encoding/src/format.rs +++ b/rust/lance-encoding/src/format.rs @@ -139,7 +139,7 @@ impl ProtobufUtils { } } - // Construct a `FsstMiniBlock` ArrayEncoding, the inner `binary_mini_block` encoding is actually + // Construct a `FsstMiniBlock` ArrayEncoding, the inner `binary_mini_block` encoding is actually // not used and `FsstMiniBlockDecompressor` constructs a `binary_mini_block` in a `hard-coded` fashion. // This can be an optimization later. pub fn fsst_mini_block(data: ArrayEncoding, symbol_table: Vec) -> ArrayEncoding { From afa1768409417d9e874c5e4155da9cb67a56e65d Mon Sep 17 00:00:00 2001 From: broccoliSpicy Date: Wed, 13 Nov 2024 12:32:11 -0500 Subject: [PATCH 3/6] use FSST_LEAST_INPUT_SIZE and FSST_LEAST_INPUT_MAX_LENGTH constant --- rust/lance-encoding/src/compression_algo/fsst/src/fsst.rs | 7 +++++-- rust/lance-encoding/src/encoder.rs | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/rust/lance-encoding/src/compression_algo/fsst/src/fsst.rs b/rust/lance-encoding/src/compression_algo/fsst/src/fsst.rs index d2ad1e33c1..d59c1aff72 100644 --- a/rust/lance-encoding/src/compression_algo/fsst/src/fsst.rs +++ b/rust/lance-encoding/src/compression_algo/fsst/src/fsst.rs @@ -18,8 +18,11 @@ const FSST_CODE_MASK: u16 = FSST_CODE_MAX - 1; const FSST_SAMPLETARGET: usize = 1 << 14; const FSST_SAMPLEMAXSZ: usize = 2 * FSST_SAMPLETARGET; -// the the input size is less than 4MB, we mark the file header and copy the input to the output as is -const FSST_LEAST_INPUT_SIZE: usize = 4 * 1024 * 1024; // 4MB +// if the input size is less than 4MB, we mark the file header and copy the input to the output as is +pub const FSST_LEAST_INPUT_SIZE: usize = 4 * 1024 * 1024; // 4MB + +// if the max length of the input strings are less than `FSST_LEAST_INPUT_MAX_LENGTH`, we shouldn't use FSST. +pub const FSST_LEAST_INPUT_MAX_LENGTH: u64 = 5; // we only use the lower 32 bits in icl, so we can use 1 << 32 to represent a free slot in the hash table const FSST_ICL_FREE: u64 = 1 << 32; diff --git a/rust/lance-encoding/src/encoder.rs b/rust/lance-encoding/src/encoder.rs index 1bb837d1d3..5ae9e7c349 100644 --- a/rust/lance-encoding/src/encoder.rs +++ b/rust/lance-encoding/src/encoder.rs @@ -49,6 +49,7 @@ use crate::{ }, format::pb, }; +use fsst::fsst::{FSST_LEAST_INPUT_MAX_LENGTH, FSST_LEAST_INPUT_SIZE}; use hyperloglogplus::{HyperLogLog, HyperLogLogPlus}; use std::collections::hash_map::RandomState; @@ -820,7 +821,7 @@ impl CompressionStrategy for CoreArrayEncodingStrategy { ); let max_len = max_len.as_primitive::().value(0); - if max_len > 4 && data_size >= 4 * 1024 * 1024 { + if max_len >= FSST_LEAST_INPUT_MAX_LENGTH && data_size >= FSST_LEAST_INPUT_SIZE as u64 { return Ok(Box::new(FsstMiniBlockEncoder::default())); } return Ok(Box::new(BinaryMiniBlockEncoder::default())); From a0ed83dcb11de89c57c5373376e615987501ca4f Mon Sep 17 00:00:00 2001 From: broccoliSpicy Date: Wed, 13 Nov 2024 12:38:04 -0500 Subject: [PATCH 4/6] lint --- rust/lance-encoding/src/decoder.rs | 2 +- rust/lance-encoding/src/encoder.rs | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/rust/lance-encoding/src/decoder.rs b/rust/lance-encoding/src/decoder.rs index e5974eceab..805c7685ab 100644 --- a/rust/lance-encoding/src/decoder.rs +++ b/rust/lance-encoding/src/decoder.rs @@ -248,8 +248,8 @@ use crate::encodings::logical::r#struct::{ }; use crate::encodings::physical::binary::BinaryMiniBlockDecompressor; use crate::encodings::physical::bitpack_fastlanes::BitpackMiniBlockDecompressor; -use crate::encodings::physical::fsst::FsstMiniBlockDecompressor; use crate::encodings::physical::fixed_size_list::FslPerValueDecompressor; +use crate::encodings::physical::fsst::FsstMiniBlockDecompressor; use crate::encodings::physical::value::{ConstantDecompressor, ValueDecompressor}; use crate::encodings::physical::{ColumnBuffers, FileBuffers}; use crate::format::pb::{self, column_encoding}; diff --git a/rust/lance-encoding/src/encoder.rs b/rust/lance-encoding/src/encoder.rs index 50c97f2943..07787f222a 100644 --- a/rust/lance-encoding/src/encoder.rs +++ b/rust/lance-encoding/src/encoder.rs @@ -30,8 +30,8 @@ use crate::encodings::physical::bitpack_fastlanes::{ }; use crate::encodings::physical::block_compress::{CompressionConfig, CompressionScheme}; use crate::encodings::physical::dictionary::AlreadyDictionaryEncoder; -use crate::encodings::physical::fsst::{FsstArrayEncoder, FsstMiniBlockEncoder}; use crate::encodings::physical::fixed_size_list::FslPerValueCompressor; +use crate::encodings::physical::fsst::{FsstArrayEncoder, FsstMiniBlockEncoder}; use crate::encodings::physical::packed_struct::PackedStructEncoder; use crate::format::ProtobufUtils; use crate::repdef::RepDefBuilder; @@ -822,7 +822,9 @@ impl CompressionStrategy for CoreArrayEncodingStrategy { ); let max_len = max_len.as_primitive::().value(0); - if max_len >= FSST_LEAST_INPUT_MAX_LENGTH && data_size >= FSST_LEAST_INPUT_SIZE as u64 { + if max_len >= FSST_LEAST_INPUT_MAX_LENGTH + && data_size >= FSST_LEAST_INPUT_SIZE as u64 + { return Ok(Box::new(FsstMiniBlockEncoder::default())); } return Ok(Box::new(BinaryMiniBlockEncoder::default())); From 09e968c36f99b6e35a2aa3606016a336547466c8 Mon Sep 17 00:00:00 2001 From: broccoliSpicy Date: Wed, 13 Nov 2024 12:49:38 -0500 Subject: [PATCH 5/6] lint --- rust/lance-encoding/src/encodings/physical/fsst.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rust/lance-encoding/src/encodings/physical/fsst.rs b/rust/lance-encoding/src/encodings/physical/fsst.rs index 526a37b4b6..fa5e5cec9a 100644 --- a/rust/lance-encoding/src/encodings/physical/fsst.rs +++ b/rust/lance-encoding/src/encodings/physical/fsst.rs @@ -288,9 +288,8 @@ impl MiniBlockDecompressor for FsstMiniBlockDecompressor { let binary_decompressor = Box::new(BinaryMiniBlockDecompressor::default()) as Box; let compressed_data_block = binary_decompressor.decompress(data, num_values)?; - let mut compressed_data_block = match compressed_data_block { - DataBlock::VariableWidth(variable) => variable, - _ => panic!("Received non-variable width data from BinaryMiniBlockDecompressor."), + let DataBlock::VariableWidth(mut compressed_data_block) = compressed_data_block else { + panic!("BinaryMiniBlockDecompressor should output VariableWidth DataBlock") }; // Step 2. FSST decompress From 2397c1921bf76de7a83b3653c5dd1e74244016b3 Mon Sep 17 00:00:00 2001 From: broccoliSpicy Date: Wed, 13 Nov 2024 12:51:35 -0500 Subject: [PATCH 6/6] typo --- rust/lance-encoding/src/encodings/physical/fsst.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance-encoding/src/encodings/physical/fsst.rs b/rust/lance-encoding/src/encodings/physical/fsst.rs index fa5e5cec9a..382c93fce5 100644 --- a/rust/lance-encoding/src/encodings/physical/fsst.rs +++ b/rust/lance-encoding/src/encodings/physical/fsst.rs @@ -245,7 +245,7 @@ impl MiniBlockCompressor for FsstMiniBlockEncoder { used_encodings: UsedEncoding::new(), }); - // compress the fsst compressed data using `BinaryMiniBlockEnocder` + // compress the fsst compressed data using `BinaryMiniBlockEncoder` let binary_compressor = Box::new(BinaryMiniBlockEncoder::default()) as Box;