Skip to content

Commit

Permalink
Merge branch 'main' into expect-stat
Browse files Browse the repository at this point in the history
  • Loading branch information
broccoliSpicy authored Nov 14, 2024
2 parents 4a9cdff + f257489 commit 71a901f
Show file tree
Hide file tree
Showing 16 changed files with 32 additions and 132 deletions.
26 changes: 13 additions & 13 deletions python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 0 additions & 58 deletions rust/lance-encoding/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
//! and decoders easier (since they can rely on a normalized representation)

use std::{
collections::HashSet,
ops::Range,
sync::{Arc, RwLock},
};
Expand All @@ -34,17 +33,6 @@ use crate::{
statistics::{ComputeStat, Stat},
};

/// `Encoding` enum serves as a encoding registration center.
///
/// All the encodings added to Lance should register here, and
/// these encodings can be dynamically selected during encoding,
/// users can also specify the particular encoding they want to use in the field metadata.
#[derive(Eq, Hash, PartialEq, Debug)]
pub enum Encoding {
Bitpack,
Fsst,
FixedSizeBinary,
}
/// A data block with no buffers where everything is null
///
/// Note: this data block should not be used for future work. It will be deprecated
Expand Down Expand Up @@ -104,29 +92,6 @@ impl PartialEq for BlockInfo {
*self_info == *other_info
}
}
// `UsedEncoding` is used to record the encodings that has applied to a `DataBlock`
#[derive(Debug, Clone)]
pub struct UsedEncoding(Arc<RwLock<HashSet<Encoding>>>);

impl Default for UsedEncoding {
fn default() -> Self {
Self::new()
}
}

impl UsedEncoding {
pub fn new() -> Self {
Self(Arc::new(RwLock::new(HashSet::new())))
}
}

impl PartialEq for UsedEncoding {
fn eq(&self, other: &Self) -> bool {
let self_used = self.0.read().unwrap();
let other_used = other.0.read().unwrap();
*self_used == *other_used
}
}

/// Wraps a data block and adds nullability information to it
///
Expand All @@ -141,8 +106,6 @@ pub struct NullableDataBlock {
pub nulls: LanceBuffer,

pub block_info: BlockInfo,

pub used_encoding: UsedEncoding,
}

impl NullableDataBlock {
Expand All @@ -168,7 +131,6 @@ impl NullableDataBlock {
data: Box::new(self.data.borrow_and_clone()),
nulls: self.nulls.borrow_and_clone(),
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
}
}

Expand All @@ -177,7 +139,6 @@ impl NullableDataBlock {
data: Box::new(self.data.try_clone()?),
nulls: self.nulls.try_clone()?,
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
})
}

Expand Down Expand Up @@ -236,8 +197,6 @@ pub struct FixedWidthDataBlock {
pub num_values: u64,

pub block_info: BlockInfo,

pub used_encoding: UsedEncoding,
}

impl FixedWidthDataBlock {
Expand Down Expand Up @@ -274,7 +233,6 @@ impl FixedWidthDataBlock {
bits_per_value: self.bits_per_value,
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
}
}

Expand All @@ -284,7 +242,6 @@ impl FixedWidthDataBlock {
bits_per_value: self.bits_per_value,
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
})
}

Expand Down Expand Up @@ -342,7 +299,6 @@ impl DataBlockBuilderImpl for VariableWidthDataBlockBuilder {
bits_per_offset: 32,
num_values,
block_info: BlockInfo::new(),
used_encodings: UsedEncoding::new(),
})
}
}
Expand Down Expand Up @@ -380,7 +336,6 @@ impl DataBlockBuilderImpl for FixedWidthDataBlockBuilder {
bits_per_value: self.bits_per_value,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
})
}
}
Expand Down Expand Up @@ -528,7 +483,6 @@ pub struct OpaqueBlock {
pub buffers: Vec<LanceBuffer>,
pub num_values: u64,
pub block_info: BlockInfo,
pub used_encoding: UsedEncoding,
}

impl OpaqueBlock {
Expand All @@ -541,7 +495,6 @@ impl OpaqueBlock {
.collect(),
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
}
}

Expand All @@ -554,7 +507,6 @@ impl OpaqueBlock {
.collect::<Result<_>>()?,
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
})
}

Expand All @@ -578,8 +530,6 @@ pub struct VariableWidthBlock {
pub num_values: u64,

pub block_info: BlockInfo,

pub used_encodings: UsedEncoding,
}

impl VariableWidthBlock {
Expand Down Expand Up @@ -609,7 +559,6 @@ impl VariableWidthBlock {
bits_per_offset: self.bits_per_offset,
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encodings: self.used_encodings.clone(),
}
}

Expand All @@ -620,7 +569,6 @@ impl VariableWidthBlock {
bits_per_offset: self.bits_per_offset,
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encodings: self.used_encodings.clone(),
})
}

Expand Down Expand Up @@ -1109,7 +1057,6 @@ fn arrow_binary_to_data_block(
bits_per_offset,
num_values,
block_info: BlockInfo::new(),
used_encodings: UsedEncoding::new(),
})
}

Expand Down Expand Up @@ -1275,15 +1222,13 @@ fn arrow_dictionary_to_data_block(arrays: &[ArrayRef], validity: Option<NullBuff
bits_per_value: bits_per_index,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}
} else {
FixedWidthDataBlock {
data: LanceBuffer::Borrowed(indices.to_data().buffers()[0].clone()),
bits_per_value: indices.data_type().byte_width() as u64 * 8,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}
};

Expand Down Expand Up @@ -1367,7 +1312,6 @@ impl DataBlock {
bits_per_value: 1,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
})
}
DataType::Date32
Expand Down Expand Up @@ -1397,7 +1341,6 @@ impl DataBlock {
bits_per_value: data_type.byte_width() as u64 * 8,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
})
}
DataType::Null => Self::AllNull(AllNullDataBlock { num_values }),
Expand Down Expand Up @@ -1449,7 +1392,6 @@ impl DataBlock {
data: Box::new(encoded),
nulls: LanceBuffer::Borrowed(nulls.into_inner().into_inner()),
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}),
_ => unreachable!(),
}
Expand Down
3 changes: 1 addition & 2 deletions rust/lance-encoding/src/encodings/logical/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use lance_core::{cache::FileMetadataCache, Error, Result};

use crate::{
buffer::LanceBuffer,
data::{BlockInfo, DataBlock, FixedWidthDataBlock, UsedEncoding},
data::{BlockInfo, DataBlock, FixedWidthDataBlock},
decoder::{
DecodeArrayTask, DecodeBatchScheduler, FieldScheduler, FilterExpression, ListPriorityRange,
LogicalPageDecoder, MessageType, NextDecodeTask, PageEncoding, PriorityRange,
Expand Down Expand Up @@ -1083,7 +1083,6 @@ impl ListOffsetsEncoder {
data: LanceBuffer::reinterpret_vec(offsets),
num_values: num_offsets,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
});
inner_encoder.encode(offsets_data, &DataType::UInt64, buffer_index)
}
Expand Down
4 changes: 1 addition & 3 deletions rust/lance-encoding/src/encodings/logical/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use lance_core::{datatypes::Field, utils::tokio::spawn_cpu, Result};

use crate::{
buffer::LanceBuffer,
data::{BlockInfo, DataBlockBuilder, FixedWidthDataBlock, UsedEncoding},
data::{BlockInfo, DataBlockBuilder, FixedWidthDataBlock},
decoder::{
BlockDecompressor, ColumnInfo, DecodeArrayTask, DecodePageTask, DecodedArray, DecodedPage,
DecompressorStrategy, FieldScheduler, FilterExpression, LoadedPage, LogicalPageDecoder,
Expand Down Expand Up @@ -1958,7 +1958,6 @@ impl PrimitiveStructuralEncoder {
bits_per_value: 16,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
});
let levels_field = Field::new_arrow("", DataType::UInt16, false)?;
// Pick a block compressor
Expand All @@ -1978,7 +1977,6 @@ impl PrimitiveStructuralEncoder {
bits_per_value: 16,
num_values: chunk_num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
});
let compressed_levels = compressor.compress(chunk_levels_block)?;
off += level_bytes;
Expand Down
Loading

0 comments on commit 71a901f

Please sign in to comment.