Skip to content

Commit

Permalink
chore: fix miniblock selection logic error (#3107)
Browse files Browse the repository at this point in the history
This PR tries to fix a logic error during selecting miniblock encoder.
  • Loading branch information
broccoliSpicy authored Nov 8, 2024
1 parent 3f2faf2 commit c237bcb
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 14 deletions.
14 changes: 1 addition & 13 deletions rust/lance-encoding/src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{collections::HashMap, env, sync::Arc};

use arrow::array::AsArray;
use arrow::datatypes::UInt64Type;
use arrow_array::PrimitiveArray;
use arrow_array::{Array, ArrayRef, RecordBatch, UInt8Array};
use arrow_schema::DataType;
use bytes::{Bytes, BytesMut};
Expand Down Expand Up @@ -780,8 +779,6 @@ impl ArrayEncodingStrategy for CoreArrayEncodingStrategy {
}
}

const MINIBLOCK_MAX_BYTE_LENGTH_PER_VALUE: u64 = 256;

impl CompressionStrategy for CoreArrayEncodingStrategy {
fn create_miniblock_compressor(
&self,
Expand Down Expand Up @@ -810,16 +807,7 @@ impl CompressionStrategy for CoreArrayEncodingStrategy {
}
if let DataBlock::VariableWidth(ref variable_width_data) = data {
if variable_width_data.bits_per_offset == 32 {
let max_len = data
.get_stat(Stat::MaxLength)
.expect("VariableWidthDataBlock should have valid max length statistics");
let max_len = max_len
.as_any()
.downcast_ref::<PrimitiveArray<UInt64Type>>()
.unwrap();
if max_len.value(0) < MINIBLOCK_MAX_BYTE_LENGTH_PER_VALUE {
return Ok(Box::new(BinaryMiniBlockEncoder::default()));
}
return Ok(Box::new(BinaryMiniBlockEncoder::default()));
}
}
Ok(Box::new(ValueEncoder::default()))
Expand Down
4 changes: 3 additions & 1 deletion rust/lance-encoding/src/encodings/logical/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1588,12 +1588,14 @@ impl PrimitiveStructuralEncoder {
// data doesn't even fit in a mini-block and the block overhead gets
// too large and we prefer zipped.
fn is_narrow(data_block: &DataBlock) -> bool {
const MINIBLOCK_MAX_BYTE_LENGTH_PER_VALUE: u64 = 256;

if let Some(max_len_array) = data_block.get_stat(Stat::MaxLength) {
let max_len_array = max_len_array
.as_any()
.downcast_ref::<PrimitiveArray<UInt64Type>>()
.unwrap();
if max_len_array.value(0) < 128 {
if max_len_array.value(0) < MINIBLOCK_MAX_BYTE_LENGTH_PER_VALUE {
return true;
}
}
Expand Down

0 comments on commit c237bcb

Please sign in to comment.