Skip to content

Commit

Permalink
refactor: change InvertedIndexWriter method signature to offsets to f…
Browse files Browse the repository at this point in the history
…acilliate caching
  • Loading branch information
v0y4g3r committed Jul 2, 2024
1 parent f2c08b8 commit 144aad0
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 70 deletions.
15 changes: 5 additions & 10 deletions src/index/src/inverted_index/format/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ mod footer;

use async_trait::async_trait;
use common_base::BitVec;
use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas};
use greptime_proto::v1::index::InvertedIndexMetas;

use crate::inverted_index::error::Result;
pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader;
Expand All @@ -30,14 +30,9 @@ pub trait InvertedIndexReader: Send {
/// Retrieve metadata of all inverted indices stored within the blob.
async fn metadata(&mut self) -> Result<InvertedIndexMetas>;

/// Retrieve the finite state transducer (FST) map for a given inverted index metadata entry.
async fn fst(&mut self, meta: &InvertedIndexMeta) -> Result<FstMap>;
/// Retrieve the finite state transducer (FST) map from the given offset and size.
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap>;

/// Retrieve the bitmap for a given inverted index metadata entry at the specified offset and size.
async fn bitmap(
&mut self,
meta: &InvertedIndexMeta,
relative_offset: u32,
size: u32,
) -> Result<BitVec>;
/// Retrieve the bitmap from the given offset and size.
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec>;
}
49 changes: 30 additions & 19 deletions src/index/src/inverted_index/format/reader/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::io::SeekFrom;
use async_trait::async_trait;
use common_base::BitVec;
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas};
use greptime_proto::v1::index::InvertedIndexMetas;
use snafu::{ensure, ResultExt};

use crate::inverted_index::error::{
Expand Down Expand Up @@ -61,23 +61,22 @@ impl<R: AsyncRead + AsyncSeek + Unpin + Send> InvertedIndexReader for InvertedIn
footer_reader.metadata().await
}

async fn fst(&mut self, meta: &InvertedIndexMeta) -> Result<FstMap> {
let offset = SeekFrom::Start(meta.base_offset + meta.relative_fst_offset as u64);
self.source.seek(offset).await.context(SeekSnafu)?;
let mut buf = vec![0u8; meta.fst_size as usize];
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap> {
self.source
.seek(SeekFrom::Start(offset))
.await
.context(SeekSnafu)?;
let mut buf = vec![0u8; size as usize];
self.source.read_exact(&mut buf).await.context(ReadSnafu)?;

FstMap::new(buf).context(DecodeFstSnafu)
}

async fn bitmap(
&mut self,
meta: &InvertedIndexMeta,
relative_offset: u32,
size: u32,
) -> Result<BitVec> {
let offset = SeekFrom::Start(meta.base_offset + relative_offset as u64);
self.source.seek(offset).await.context(SeekSnafu)?;
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec> {
self.source
.seek(SeekFrom::Start(offset))
.await
.context(SeekSnafu)?;
let mut buf = vec![0u8; size as usize];
self.source.read_exact(&mut buf).await.context(ReadSnafu)?;

Expand Down Expand Up @@ -202,13 +201,25 @@ mod tests {
let metas = blob_reader.metadata().await.unwrap();
let meta = metas.metas.get("tag0").unwrap();

let fst_map = blob_reader.fst(meta).await.unwrap();
let fst_map = blob_reader
.fst(
meta.base_offset + meta.relative_fst_offset as u64,
meta.fst_size,
)
.await
.unwrap();
assert_eq!(fst_map.len(), 2);
assert_eq!(fst_map.get("key1".as_bytes()), Some(1));
assert_eq!(fst_map.get("key2".as_bytes()), Some(2));

let meta = metas.metas.get("tag1").unwrap();
let fst_map = blob_reader.fst(meta).await.unwrap();
let fst_map = blob_reader
.fst(
meta.base_offset + meta.relative_fst_offset as u64,
meta.fst_size,
)
.await
.unwrap();
assert_eq!(fst_map.len(), 2);
assert_eq!(fst_map.get("key1".as_bytes()), Some(1));
assert_eq!(fst_map.get("key2".as_bytes()), Some(2));
Expand All @@ -222,17 +233,17 @@ mod tests {
let metas = blob_reader.metadata().await.unwrap();
let meta = metas.metas.get("tag0").unwrap();

let bitmap = blob_reader.bitmap(meta, 0, 2).await.unwrap();
let bitmap = blob_reader.bitmap(meta.base_offset, 2).await.unwrap();
assert_eq!(bitmap.into_vec(), create_fake_bitmap());
let bitmap = blob_reader.bitmap(meta, 2, 2).await.unwrap();
let bitmap = blob_reader.bitmap(meta.base_offset + 2, 2).await.unwrap();
assert_eq!(bitmap.into_vec(), create_fake_bitmap());

let metas = blob_reader.metadata().await.unwrap();
let meta = metas.metas.get("tag1").unwrap();

let bitmap = blob_reader.bitmap(meta, 0, 2).await.unwrap();
let bitmap = blob_reader.bitmap(meta.base_offset, 2).await.unwrap();
assert_eq!(bitmap.into_vec(), create_fake_bitmap());
let bitmap = blob_reader.bitmap(meta, 2, 2).await.unwrap();
let bitmap = blob_reader.bitmap(meta.base_offset + 2, 2).await.unwrap();
assert_eq!(bitmap.into_vec(), create_fake_bitmap());
}
}
46 changes: 38 additions & 8 deletions src/index/src/inverted_index/format/writer/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,16 +174,31 @@ mod tests {
assert_eq!(stats0.null_count, 1);
assert_eq!(stats0.min_value, Bytes::from("a"));
assert_eq!(stats0.max_value, Bytes::from("c"));
let fst0 = reader.fst(tag0).await.unwrap();
let fst0 = reader
.fst(
tag0.base_offset + tag0.relative_fst_offset as u64,
tag0.fst_size,
)
.await
.unwrap();
assert_eq!(fst0.len(), 3);
let [offset, size] = unpack(fst0.get(b"a").unwrap());
let bitmap = reader.bitmap(tag0, offset, size).await.unwrap();
let bitmap = reader
.bitmap(tag0.base_offset + offset as u64, size)
.await
.unwrap();
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
let [offset, size] = unpack(fst0.get(b"b").unwrap());
let bitmap = reader.bitmap(tag0, offset, size).await.unwrap();
let bitmap = reader
.bitmap(tag0.base_offset + offset as u64, size)
.await
.unwrap();
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
let [offset, size] = unpack(fst0.get(b"c").unwrap());
let bitmap = reader.bitmap(tag0, offset, size).await.unwrap();
let bitmap = reader
.bitmap(tag0.base_offset + offset as u64, size)
.await
.unwrap();
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));

// tag1
Expand All @@ -193,16 +208,31 @@ mod tests {
assert_eq!(stats1.null_count, 1);
assert_eq!(stats1.min_value, Bytes::from("x"));
assert_eq!(stats1.max_value, Bytes::from("z"));
let fst1 = reader.fst(tag1).await.unwrap();
let fst1 = reader
.fst(
tag1.base_offset + tag1.relative_fst_offset as u64,
tag1.fst_size,
)
.await
.unwrap();
assert_eq!(fst1.len(), 3);
let [offset, size] = unpack(fst1.get(b"x").unwrap());
let bitmap = reader.bitmap(tag1, offset, size).await.unwrap();
let bitmap = reader
.bitmap(tag1.base_offset + offset as u64, size)
.await
.unwrap();
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
let [offset, size] = unpack(fst1.get(b"y").unwrap());
let bitmap = reader.bitmap(tag1, offset, size).await.unwrap();
let bitmap = reader
.bitmap(tag1.base_offset + offset as u64, size)
.await
.unwrap();
assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000]));
let [offset, size] = unpack(fst1.get(b"z").unwrap());
let bitmap = reader.bitmap(tag1, offset, size).await.unwrap();
let bitmap = reader
.bitmap(tag1.base_offset + offset as u64, size)
.await
.unwrap();
assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001]));
}
}
4 changes: 2 additions & 2 deletions src/index/src/inverted_index/search/fst_values_mapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ impl<'a> FstValuesMapper<'a> {

let bm = self
.reader
.bitmap(self.metadata, relative_offset, size)
.bitmap(self.metadata.base_offset + relative_offset as u64, size)
.await?;

// Ensure the longest BitVec is the left operand to prevent truncation during OR.
Expand Down Expand Up @@ -79,7 +79,7 @@ mod tests {
let mut mock_reader = MockInvertedIndexReader::new();
mock_reader
.expect_bitmap()
.returning(|_, offset, size| match (offset, size) {
.returning(|offset, size| match (offset, size) {
(1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1]),
(2, 1) => Ok(bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1]),
_ => unreachable!(),
Expand Down
59 changes: 28 additions & 31 deletions src/index/src/inverted_index/search/index_apply/predicates_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@ impl IndexApplier for PredicatesIndexApplier {
}
};

let fst = reader.fst(meta).await?;
let fst_offset = meta.base_offset + meta.relative_fst_offset as u64;
let fst_size = meta.fst_size;
let fst = reader.fst(fst_offset, fst_size).await?;
let values = fst_applier.apply(&fst);

let mut mapper = FstValuesMapper::new(&mut *reader, meta);
Expand Down Expand Up @@ -159,15 +161,16 @@ mod tests {
s.to_owned()
}

fn mock_metas(tags: impl IntoIterator<Item = &'static str>) -> InvertedIndexMetas {
fn mock_metas(tags: impl IntoIterator<Item = (&'static str, u32)>) -> InvertedIndexMetas {
let mut metas = InvertedIndexMetas {
total_row_count: 8,
segment_row_count: 1,
..Default::default()
};
for tag in tags.into_iter() {
for (tag, idx) in tags.into_iter() {
let meta = InvertedIndexMeta {
name: s(tag),
relative_fst_offset: idx,
..Default::default()
};
metas.metas.insert(s(tag), meta);
Expand Down Expand Up @@ -198,19 +201,16 @@ mod tests {
let mut mock_reader = MockInvertedIndexReader::new();
mock_reader
.expect_metadata()
.returning(|| Ok(mock_metas(["tag-0"])));
.returning(|| Ok(mock_metas([("tag-0", 0)])));
mock_reader.expect_fst().returning(|_offset, _size| {
Ok(FstMap::from_iter([(b"tag-0_value-0", fst_value(2, 1))]).unwrap())
});
mock_reader
.expect_fst()
.returning(|meta| match meta.name.as_str() {
"tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-0", fst_value(2, 1))]).unwrap()),
.expect_bitmap()
.returning(|offset, size| match (offset, size) {
(2, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]),
_ => unreachable!(),
});
mock_reader.expect_bitmap().returning(|meta, offset, size| {
match (meta.name.as_str(), offset, size) {
("tag-0", 2, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]),
_ => unreachable!(),
}
});
let output = applier
.apply(SearchContext::default(), &mut mock_reader)
.await
Expand All @@ -224,13 +224,10 @@ mod tests {
let mut mock_reader = MockInvertedIndexReader::new();
mock_reader
.expect_metadata()
.returning(|| Ok(mock_metas(["tag-0"])));
mock_reader
.expect_fst()
.returning(|meta| match meta.name.as_str() {
"tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-1", fst_value(2, 1))]).unwrap()),
_ => unreachable!(),
});
.returning(|| Ok(mock_metas([("tag-0", 0)])));
mock_reader.expect_fst().returning(|_offset, _size| {
Ok(FstMap::from_iter([(b"tag-0_value-1", fst_value(2, 1))]).unwrap())
});
let output = applier
.apply(SearchContext::default(), &mut mock_reader)
.await
Expand All @@ -252,21 +249,21 @@ mod tests {
let mut mock_reader = MockInvertedIndexReader::new();
mock_reader
.expect_metadata()
.returning(|| Ok(mock_metas(["tag-0", "tag-1"])));
.returning(|| Ok(mock_metas([("tag-0", 0), ("tag-1", 1)])));
mock_reader
.expect_fst()
.returning(|meta| match meta.name.as_str() {
"tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-0", fst_value(1, 1))]).unwrap()),
"tag-1" => Ok(FstMap::from_iter([(b"tag-1_value-a", fst_value(2, 1))]).unwrap()),
.returning(|offset, _size| match offset {
0 => Ok(FstMap::from_iter([(b"tag-0_value-0", fst_value(1, 1))]).unwrap()),
1 => Ok(FstMap::from_iter([(b"tag-1_value-a", fst_value(2, 1))]).unwrap()),
_ => unreachable!(),
});
mock_reader.expect_bitmap().returning(|meta, offset, size| {
match (meta.name.as_str(), offset, size) {
("tag-0", 1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]),
("tag-1", 2, 1) => Ok(bitvec![u8, Lsb0; 1, 1, 0, 1, 1, 0, 1, 1]),
mock_reader
.expect_bitmap()
.returning(|offset, size| match (offset, size) {
(1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]),
(2, 1) => Ok(bitvec![u8, Lsb0; 1, 1, 0, 1, 1, 0, 1, 1]),
_ => unreachable!(),
}
});
});

let output = applier
.apply(SearchContext::default(), &mut mock_reader)
Expand All @@ -287,7 +284,7 @@ mod tests {
let mut mock_reader: MockInvertedIndexReader = MockInvertedIndexReader::new();
mock_reader
.expect_metadata()
.returning(|| Ok(mock_metas(["tag-0"])));
.returning(|| Ok(mock_metas([("tag-0", 0)])));

let output = applier
.apply(SearchContext::default(), &mut mock_reader)
Expand Down

0 comments on commit 144aad0

Please sign in to comment.