Skip to content

Commit

Permalink
Offset list counts zero offsets (#454)
Browse files Browse the repository at this point in the history
This change allows the OffsetList to count the length of a zero prefix of
all offsets without actually storing the zero elements. This avoids
consuming memory in a frequently-occurring pattern where at least the first
element is zero, or if the singleton update optimization kicks in, a large
prefix of elements (up to all) are zero.

Signed-off-by: Moritz Hoffmann <[email protected]>
  • Loading branch information
antiguru authored Jan 7, 2024
1 parent 66be417 commit dc5ebef
Showing 1 changed file with 14 additions and 5 deletions.
19 changes: 14 additions & 5 deletions src/trace/implementations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ use crate::trace::cursor::MyTrait;
/// A list of unsigned integers that uses `u32` elements as long as they are small enough, and switches to `u64` once they are not.
#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Debug, Abomonation)]
pub struct OffsetList {
/// Length of a prefix of zero elements.
pub zero_prefix: usize,
/// Offsets that fit within a `u32`.
pub smol: Vec<u32>,
/// Offsets that either do not fit in a `u32`, or are inserted after some offset that did not fit.
Expand All @@ -210,13 +212,17 @@ impl OffsetList {
/// Allocate a new list with a specified capacity.
pub fn with_capacity(cap: usize) -> Self {
Self {
zero_prefix: 0,
smol: Vec::with_capacity(cap),
chonk: Vec::new(),
}
}
/// Inserts the offset, as a `u32` if that is still on the table.
pub fn push(&mut self, offset: usize) {
if self.chonk.is_empty() {
if self.smol.is_empty() && self.chonk.is_empty() && offset == 0 {
self.zero_prefix += 1;
}
else if self.chonk.is_empty() {
if let Ok(smol) = offset.try_into() {
self.smol.push(smol);
}
Expand All @@ -230,16 +236,19 @@ impl OffsetList {
}
/// Like `std::ops::Index`, which we cannot implement as it must return a `&usize`.
pub fn index(&self, index: usize) -> usize {
if index < self.smol.len() {
self.smol[index].try_into().unwrap()
if index < self.zero_prefix {
0
}
else if index - self.zero_prefix < self.smol.len() {
self.smol[index - self.zero_prefix].try_into().unwrap()
}
else {
self.chonk[index - self.smol.len()].try_into().unwrap()
self.chonk[index - self.zero_prefix - self.smol.len()].try_into().unwrap()
}
}
/// The number of offsets in the list.
pub fn len(&self) -> usize {
self.smol.len() + self.chonk.len()
self.zero_prefix + self.smol.len() + self.chonk.len()
}
}

Expand Down

0 comments on commit dc5ebef

Please sign in to comment.