Skip to content

Commit

Permalink
[week1] 1.4 finished
Browse files Browse the repository at this point in the history
  • Loading branch information
HeartLinked committed Oct 2, 2024
1 parent 4ee60ac commit 5a013ac
Show file tree
Hide file tree
Showing 6 changed files with 360 additions and 24 deletions.
1 change: 1 addition & 0 deletions mini-lsm-starter/src/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
pub use iterator::BlockIterator;

/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted key-value pairs.
#[derive(Default)]
pub struct Block {
pub(crate) data: Vec<u8>,
pub(crate) offsets: Vec<u16>,
Expand Down
32 changes: 24 additions & 8 deletions mini-lsm-starter/src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ mod iterator;

use bytes::Bytes;
use std::fs::File;
use std::io::{Read, Seek};
use std::io::{Read, Seek, SeekFrom};
use std::os::unix::fs::FileExt;
use std::path::Path;
use std::sync::Arc;

Expand Down Expand Up @@ -38,11 +39,7 @@ impl BlockMeta {
/// Encode block meta to a buffer.
/// You may add extra fields to the buffer,
/// in order to help keep track of `first_key` when decoding from the same buffer in the future.
pub fn encode_block_meta(
block_meta: &[BlockMeta],
#[allow(clippy::ptr_arg)] // remove this allow after you finish
buf: &mut Vec<u8>,
) {
pub fn encode_block_meta(block_meta: &[BlockMeta], buf: &mut Vec<u8>) {
for meta in block_meta {
// 编码 offset,使用 8 字节(u64)来存储
buf.extend_from_slice(&(meta.offset as u64).to_le_bytes());
Expand Down Expand Up @@ -205,7 +202,24 @@ impl SsTable {

/// Read a block from the disk.
pub fn read_block(&self, block_idx: usize) -> Result<Arc<Block>> {
unimplemented!()
let file = match &self.file.0 {
Some(f) => f,
None => return Err(anyhow!("File not exists")),
};

let (start, length) = match (
self.block_meta.get(block_idx),
self.block_meta.get(block_idx + 1),
) {
(Some(fir), Some(sec)) => (fir.offset, sec.offset - fir.offset),
(Some(fir), None) => (fir.offset, self.block_meta_offset - fir.offset),
_ => return Ok(Arc::default()),
};

let mut buffer = vec![0; length];
file.read_exact_at(&mut buffer, start as u64)?;

Ok(Arc::new(Block::decode(&buffer)))
}

/// Read a block from disk, with block cache. (Day 4)
Expand All @@ -217,7 +231,9 @@ impl SsTable {
/// Note: You may want to make use of the `first_key` stored in `BlockMeta`.
/// You may also assume the key-value pairs stored in each consecutive block are sorted.
pub fn find_block_idx(&self, key: KeySlice) -> usize {
unimplemented!()
self.block_meta
.binary_search_by_key(&key, |b| b.first_key.as_key_slice())
.unwrap_or_else(|idx| idx)
}

/// Get number of data blocks.
Expand Down
2 changes: 1 addition & 1 deletion mini-lsm-starter/src/table/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ impl SsTableBuilder {
if self.builder.add(key, value) == false {
// block 满,添加失败,创建新 block
self.split_new_block();
let _ = self.builder.add(key, value);
}
let _ = self.builder.add(key, value);
}

fn split_new_block(&mut self) {
Expand Down
56 changes: 41 additions & 15 deletions mini-lsm-starter/src/table/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod

use std::sync::Arc;

use anyhow::Result;
Expand All @@ -18,48 +15,77 @@ pub struct SsTableIterator {
impl SsTableIterator {
/// Create a new iterator and seek to the first key-value pair in the first data block.
pub fn create_and_seek_to_first(table: Arc<SsTable>) -> Result<Self> {
unimplemented!()
let blk_iter = BlockIterator::create_and_seek_to_first(table.read_block(0)?);
Ok(Self {
table,
blk_iter,
blk_idx: 0,
})
}

/// Seek to the first key-value pair in the first data block.
pub fn seek_to_first(&mut self) -> Result<()> {
unimplemented!()
self.blk_idx = 0;
self.blk_iter = BlockIterator::create_and_seek_to_first(self.table.read_block(0)?);
Ok(())
}

/// Create a new iterator and seek to the first key-value pair which >= `key`.
pub fn create_and_seek_to_key(table: Arc<SsTable>, key: KeySlice) -> Result<Self> {
unimplemented!()
let mut iter = Self::create_and_seek_to_first(table)?;
iter.seek_to_key(key)?;
Ok(iter)
}

/// Seek to the first key-value pair which >= `key`.
/// Note: You probably want to review the handout for detailed explanation when implementing
/// this function.
pub fn seek_to_key(&mut self, key: KeySlice) -> Result<()> {
unimplemented!()
let idx = self.table.find_block_idx(key).saturating_sub(1);
let block_meta = &self.table.block_meta[idx];

if key <= block_meta.last_key.as_key_slice() {
let mut blk_iter = BlockIterator::create_and_seek_to_first(self.table.read_block(idx)?);
blk_iter.seek_to_key(key);
self.blk_iter = blk_iter;
self.blk_idx = idx;
} else {
let idx = idx + 1;
let blk_iter = BlockIterator::create_and_seek_to_first(self.table.read_block(idx)?);
self.blk_iter = blk_iter;
self.blk_idx = idx;
}
Ok(())
}
}

impl StorageIterator for SsTableIterator {
type KeyType<'a> = KeySlice<'a>;

/// Return the `key` that's held by the underlying block iterator.
fn key(&self) -> KeySlice {
unimplemented!()
}

/// Return the `value` that's held by the underlying block iterator.
fn value(&self) -> &[u8] {
unimplemented!()
self.blk_iter.value()
}

/// Return the `key` that's held by the underlying block iterator.
fn key(&self) -> KeySlice {
self.blk_iter.key()
}

/// Return whether the current block iterator is valid or not.
fn is_valid(&self) -> bool {
unimplemented!()
self.blk_iter.is_valid()
}

/// Move to the next `key` in the block.
/// Note: You may want to check if the current block iterator is valid after the move.
fn next(&mut self) -> Result<()> {
unimplemented!()
self.blk_iter.next();
if !self.is_valid() && self.blk_idx + 1 < self.table.block_meta.len() {
self.blk_idx += 1;
self.blk_iter =
BlockIterator::create_and_seek_to_first(self.table.read_block(self.blk_idx)?);
}
Ok(())
}
}
147 changes: 147 additions & 0 deletions mini-lsm-starter/src/tests/week1_day3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
use std::sync::Arc;

use bytes::Bytes;

use crate::{
block::{Block, BlockBuilder, BlockIterator},
key::{KeySlice, KeyVec},
};

#[test]
fn test_block_build_single_key() {
let mut builder = BlockBuilder::new(16);
assert!(builder.add(KeySlice::for_testing_from_slice_no_ts(b"233"), b"233333"));
builder.build();
}

#[test]
fn test_block_build_full() {
let mut builder = BlockBuilder::new(16);
assert!(builder.add(KeySlice::for_testing_from_slice_no_ts(b"11"), b"11"));
assert!(!builder.add(KeySlice::for_testing_from_slice_no_ts(b"22"), b"22"));
builder.build();
}

#[test]
fn test_block_build_large_1() {
let mut builder = BlockBuilder::new(16);
assert!(builder.add(
KeySlice::for_testing_from_slice_no_ts(b"11"),
&b"1".repeat(100)
));
builder.build();
}

#[test]
fn test_block_build_large_2() {
let mut builder = BlockBuilder::new(16);
assert!(builder.add(KeySlice::for_testing_from_slice_no_ts(b"11"), b"1"));
assert!(!builder.add(
KeySlice::for_testing_from_slice_no_ts(b"11"),
&b"1".repeat(100)
));
}

fn key_of(idx: usize) -> KeyVec {
KeyVec::for_testing_from_vec_no_ts(format!("key_{:03}", idx * 5).into_bytes())
}

fn value_of(idx: usize) -> Vec<u8> {
format!("value_{:010}", idx).into_bytes()
}

fn num_of_keys() -> usize {
100
}

fn generate_block() -> Block {
let mut builder = BlockBuilder::new(10000);
for idx in 0..num_of_keys() {
let key = key_of(idx);
let value = value_of(idx);
assert!(builder.add(key.as_key_slice(), &value[..]));
}
builder.build()
}

#[test]
fn test_block_build_all() {
generate_block();
}

#[test]
fn test_block_encode() {
let block = generate_block();
block.encode();
}

#[test]
fn test_block_decode() {
let block = generate_block();
let encoded = block.encode();
let decoded_block = Block::decode(&encoded);
assert_eq!(block.offsets, decoded_block.offsets);
assert_eq!(block.data, decoded_block.data);
}

fn as_bytes(x: &[u8]) -> Bytes {
Bytes::copy_from_slice(x)
}

#[test]
fn test_block_iterator() {
let block = Arc::new(generate_block());
let mut iter = BlockIterator::create_and_seek_to_first(block);
for _ in 0..5 {
for i in 0..num_of_keys() {
let key = iter.key();
let value = iter.value();
assert_eq!(
key.for_testing_key_ref(),
key_of(i).for_testing_key_ref(),
"expected key: {:?}, actual key: {:?}",
as_bytes(key_of(i).for_testing_key_ref()),
as_bytes(key.for_testing_key_ref())
);
assert_eq!(
value,
value_of(i),
"expected value: {:?}, actual value: {:?}",
as_bytes(&value_of(i)),
as_bytes(value)
);
iter.next();
}
iter.seek_to_first();
}
}

#[test]
fn test_block_seek_key() {
let block = Arc::new(generate_block());
let mut iter = BlockIterator::create_and_seek_to_key(block, key_of(0).as_key_slice());
for offset in 1..=5 {
for i in 0..num_of_keys() {
let key = iter.key();
let value = iter.value();
assert_eq!(
key.for_testing_key_ref(),
key_of(i).for_testing_key_ref(),
"expected key: {:?}, actual key: {:?}",
as_bytes(key_of(i).for_testing_key_ref()),
as_bytes(key.for_testing_key_ref())
);
assert_eq!(
value,
value_of(i),
"expected value: {:?}, actual value: {:?}",
as_bytes(&value_of(i)),
as_bytes(value)
);
iter.seek_to_key(KeySlice::for_testing_from_slice_no_ts(
&format!("key_{:03}", i * 5 + offset).into_bytes(),
));
}
iter.seek_to_key(KeySlice::for_testing_from_slice_no_ts(b"k"));
}
}
Loading

0 comments on commit 5a013ac

Please sign in to comment.