Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: remove used_coding as it is not used currently #3081

Merged
merged 7 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 0 additions & 57 deletions rust/lance-encoding/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
//! and decoders easier (since they can rely on a normalized representation)

use std::{
collections::HashSet,
ops::Range,
sync::{Arc, RwLock},
};
Expand All @@ -31,17 +30,6 @@ use lance_core::{Error, Result};

use crate::{buffer::LanceBuffer, statistics::Stat};

/// `Encoding` enum serves as a encoding registration center.
///
/// All the encodings added to Lance should register here, and
/// these encodings can be dynamically selected during encoding,
/// users can also specify the particular encoding they want to use in the field metadata.
#[derive(Eq, Hash, PartialEq, Debug)]
pub enum Encoding {
Bitpack,
Fsst,
FixedSizeBinary,
}
/// A data block with no buffers where everything is null
///
/// Note: this data block should not be used for future work. It will be deprecated
Expand Down Expand Up @@ -101,29 +89,6 @@ impl PartialEq for BlockInfo {
*self_info == *other_info
}
}
// `UsedEncoding` is used to record the encodings that has applied to a `DataBlock`
#[derive(Debug, Clone)]
pub struct UsedEncoding(Arc<RwLock<HashSet<Encoding>>>);

impl Default for UsedEncoding {
fn default() -> Self {
Self::new()
}
}

impl UsedEncoding {
pub fn new() -> Self {
Self(Arc::new(RwLock::new(HashSet::new())))
}
}

impl PartialEq for UsedEncoding {
fn eq(&self, other: &Self) -> bool {
let self_used = self.0.read().unwrap();
let other_used = other.0.read().unwrap();
*self_used == *other_used
}
}

/// Wraps a data block and adds nullability information to it
///
Expand All @@ -138,8 +103,6 @@ pub struct NullableDataBlock {
pub nulls: LanceBuffer,

pub block_info: BlockInfo,

pub used_encoding: UsedEncoding,
}

impl NullableDataBlock {
Expand All @@ -165,7 +128,6 @@ impl NullableDataBlock {
data: Box::new(self.data.borrow_and_clone()),
nulls: self.nulls.borrow_and_clone(),
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
}
}

Expand All @@ -174,7 +136,6 @@ impl NullableDataBlock {
data: Box::new(self.data.try_clone()?),
nulls: self.nulls.try_clone()?,
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
})
}

Expand Down Expand Up @@ -233,8 +194,6 @@ pub struct FixedWidthDataBlock {
pub num_values: u64,

pub block_info: BlockInfo,

pub used_encoding: UsedEncoding,
}

impl FixedWidthDataBlock {
Expand Down Expand Up @@ -271,7 +230,6 @@ impl FixedWidthDataBlock {
bits_per_value: self.bits_per_value,
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
}
}

Expand All @@ -281,7 +239,6 @@ impl FixedWidthDataBlock {
bits_per_value: self.bits_per_value,
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
})
}

Expand Down Expand Up @@ -323,7 +280,6 @@ impl DataBlockBuilderImpl for FixedWidthDataBlockBuilder {
bits_per_value: self.bits_per_value,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
})
}
}
Expand Down Expand Up @@ -444,7 +400,6 @@ pub struct OpaqueBlock {
pub buffers: Vec<LanceBuffer>,
pub num_values: u64,
pub block_info: BlockInfo,
pub used_encoding: UsedEncoding,
}

impl OpaqueBlock {
Expand All @@ -457,7 +412,6 @@ impl OpaqueBlock {
.collect(),
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
}
}

Expand All @@ -470,7 +424,6 @@ impl OpaqueBlock {
.collect::<Result<_>>()?,
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encoding: self.used_encoding.clone(),
})
}

Expand All @@ -494,8 +447,6 @@ pub struct VariableWidthBlock {
pub num_values: u64,

pub block_info: BlockInfo,

pub used_encodings: UsedEncoding,
}

impl VariableWidthBlock {
Expand Down Expand Up @@ -525,7 +476,6 @@ impl VariableWidthBlock {
bits_per_offset: self.bits_per_offset,
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encodings: self.used_encodings.clone(),
}
}

Expand All @@ -536,7 +486,6 @@ impl VariableWidthBlock {
bits_per_offset: self.bits_per_offset,
num_values: self.num_values,
block_info: self.block_info.clone(),
used_encodings: self.used_encodings.clone(),
})
}

Expand Down Expand Up @@ -989,7 +938,6 @@ fn arrow_binary_to_data_block(
bits_per_offset,
num_values,
block_info: BlockInfo::new(),
used_encodings: UsedEncoding::new(),
})
}

Expand Down Expand Up @@ -1155,15 +1103,13 @@ fn arrow_dictionary_to_data_block(arrays: &[ArrayRef], validity: Option<NullBuff
bits_per_value: bits_per_index,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}
} else {
FixedWidthDataBlock {
data: LanceBuffer::Borrowed(indices.to_data().buffers()[0].clone()),
bits_per_value: indices.data_type().byte_width() as u64 * 8,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}
};

Expand Down Expand Up @@ -1247,7 +1193,6 @@ impl DataBlock {
bits_per_value: 1,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
})
}
DataType::Date32
Expand Down Expand Up @@ -1277,7 +1222,6 @@ impl DataBlock {
bits_per_value: data_type.byte_width() as u64 * 8,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
})
}
DataType::Null => Self::AllNull(AllNullDataBlock { num_values }),
Expand Down Expand Up @@ -1325,7 +1269,6 @@ impl DataBlock {
data: Box::new(encoded),
nulls: LanceBuffer::Borrowed(nulls.into_inner().into_inner()),
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}),
_ => unreachable!(),
}
Expand Down
3 changes: 1 addition & 2 deletions rust/lance-encoding/src/encodings/logical/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use lance_core::{cache::FileMetadataCache, Error, Result};

use crate::{
buffer::LanceBuffer,
data::{BlockInfo, DataBlock, FixedWidthDataBlock, UsedEncoding},
data::{BlockInfo, DataBlock, FixedWidthDataBlock},
decoder::{
DecodeArrayTask, DecodeBatchScheduler, FieldScheduler, FilterExpression, ListPriorityRange,
LogicalPageDecoder, MessageType, NextDecodeTask, PageEncoding, PriorityRange,
Expand Down Expand Up @@ -1083,7 +1083,6 @@ impl ListOffsetsEncoder {
data: LanceBuffer::reinterpret_vec(offsets),
num_values: num_offsets,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
});
inner_encoder.encode(offsets_data, &DataType::UInt64, buffer_index)
}
Expand Down
4 changes: 1 addition & 3 deletions rust/lance-encoding/src/encodings/logical/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use lance_core::{datatypes::Field, utils::tokio::spawn_cpu, Result};

use crate::{
buffer::LanceBuffer,
data::{BlockInfo, DataBlock, DataBlockBuilder, FixedWidthDataBlock, UsedEncoding},
data::{BlockInfo, DataBlock, DataBlockBuilder, FixedWidthDataBlock},
decoder::{
BlockDecompressor, ColumnInfo, DecodeArrayTask, DecodePageTask, DecodedArray, DecodedPage,
DecompressorStrategy, FieldScheduler, FilterExpression, LoadedPage, LogicalPageDecoder,
Expand Down Expand Up @@ -1643,7 +1643,6 @@ impl PrimitiveStructuralEncoder {
bits_per_value: 16,
num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
});
let levels_field = Field::new_arrow("", DataType::UInt16, false)?;
// Pick a block compressor
Expand All @@ -1663,7 +1662,6 @@ impl PrimitiveStructuralEncoder {
bits_per_value: 16,
num_values: chunk_num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
});
let compressed_levels = compressor.compress(chunk_levels_block)?;
off += level_bytes;
Expand Down
4 changes: 1 addition & 3 deletions rust/lance-encoding/src/encodings/physical/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use futures::{future::BoxFuture, FutureExt};
use log::trace;

use crate::{
data::{AllNullDataBlock, BlockInfo, DataBlock, NullableDataBlock, UsedEncoding},
data::{AllNullDataBlock, BlockInfo, DataBlock, NullableDataBlock},
decoder::{PageScheduler, PrimitivePageDecoder},
encoder::{ArrayEncoder, EncodedArray},
format::ProtobufUtils,
Expand Down Expand Up @@ -166,7 +166,6 @@ impl PrimitivePageDecoder for BasicPageDecoder {
data: Box::new(values),
nulls: validity.data,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}))
}
DataNullStatus::All => Ok(DataBlock::AllNull(AllNullDataBlock {
Expand Down Expand Up @@ -218,7 +217,6 @@ impl ArrayEncoder for BasicEncoder {
data: Box::new(encoded_values.data),
nulls: nullable.nulls,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
});
Ok(EncodedArray {
data: encoded,
Expand Down
9 changes: 1 addition & 8 deletions rust/lance-encoding/src/encodings/physical/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::encodings::logical::primitive::PrimitiveFieldDecoder;

use crate::buffer::LanceBuffer;
use crate::data::{
BlockInfo, DataBlock, FixedWidthDataBlock, NullableDataBlock, UsedEncoding, VariableWidthBlock,
BlockInfo, DataBlock, FixedWidthDataBlock, NullableDataBlock, VariableWidthBlock,
};
use crate::format::ProtobufUtils;
use crate::{
Expand Down Expand Up @@ -327,14 +327,12 @@ impl PrimitivePageDecoder for BinaryPageDecoder {
num_values: num_rows,
offsets: LanceBuffer::from(offsets_buffer),
block_info: BlockInfo::new(),
used_encodings: UsedEncoding::new(),
});
if let Some(validity) = validity_buffer {
Ok(DataBlock::Nullable(NullableDataBlock {
data: Box::new(string_data),
nulls: LanceBuffer::from(validity),
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}))
} else {
Ok(string_data)
Expand Down Expand Up @@ -372,7 +370,6 @@ impl BinaryEncoder {
num_values,
offsets: LanceBuffer::reinterpret_vec(vec![0_u32; num_values as usize + 1]),
block_info: BlockInfo::new(),
used_encodings: UsedEncoding::new(),
}
} else {
VariableWidthBlock {
Expand All @@ -381,7 +378,6 @@ impl BinaryEncoder {
num_values,
offsets: LanceBuffer::reinterpret_vec(vec![0_u64; num_values as usize + 1]),
block_info: BlockInfo::new(),
used_encodings: UsedEncoding::new(),
}
}
}
Expand Down Expand Up @@ -422,7 +418,6 @@ fn get_indices_from_string_arrays(
data: LanceBuffer::empty(),
num_values: 0,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}),
0,
);
Expand Down Expand Up @@ -452,7 +447,6 @@ fn get_indices_from_string_arrays(
data: LanceBuffer::reinterpret_vec(indices),
num_values: num_rows as u64,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
});
(indices, null_adjustment)
}
Expand Down Expand Up @@ -505,7 +499,6 @@ impl ArrayEncoder for BinaryEncoder {
data: data.data,
num_values: data.num_values,
block_info: BlockInfo::new(),
used_encodings: UsedEncoding::new(),
});

let bytes_buffer_index = *buffer_index;
Expand Down
3 changes: 1 addition & 2 deletions rust/lance-encoding/src/encodings/physical/bitmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use log::trace;

use crate::{
buffer::LanceBuffer,
data::{BlockInfo, DataBlock, FixedWidthDataBlock, UsedEncoding},
data::{BlockInfo, DataBlock, FixedWidthDataBlock},
decoder::{PageScheduler, PrimitivePageDecoder},
EncodingsIo,
};
Expand Down Expand Up @@ -119,7 +119,6 @@ impl PrimitivePageDecoder for BitmapDecoder {
bits_per_value: 1,
num_values: num_rows,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}))
}
}
Expand Down
4 changes: 1 addition & 3 deletions rust/lance-encoding/src/encodings/physical/bitpack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use lance_arrow::DataTypeExt;
use lance_core::{Error, Result};

use crate::buffer::LanceBuffer;
use crate::data::{BlockInfo, DataBlock, FixedWidthDataBlock, UsedEncoding};
use crate::data::{BlockInfo, DataBlock, FixedWidthDataBlock};
use crate::decoder::{PageScheduler, PrimitivePageDecoder};
use crate::encoder::{ArrayEncoder, EncodedArray};
use crate::format::ProtobufUtils;
Expand Down Expand Up @@ -159,7 +159,6 @@ impl ArrayEncoder for BitpackedArrayEncoder {
data: LanceBuffer::Owned(dst_buffer),
num_values: unpacked.num_values,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
});

let bitpacked_buffer_index = *buffer_index;
Expand Down Expand Up @@ -489,7 +488,6 @@ impl PrimitivePageDecoder for BitpackedPageDecoder {
bits_per_value: self.uncompressed_bits_per_value,
num_values: num_rows,
block_info: BlockInfo::new(),
used_encoding: UsedEncoding::new(),
}))
}
}
Expand Down
Loading
Loading