-
Notifications
You must be signed in to change notification settings - Fork 416
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor!: Implement default
LogStore
trait
Introduce a new trait, `LogStore` and a single implementation that keeps the functionality unchanged. `LogStore` serves as an entry point for interacting with the delta commit log in a centralized way, allowing to read commits and atomically create new commits. This is in preparation for enabling S3 DynamoDb multi-cluster writes in compatibility with the reference JVM implementation.
- Loading branch information
Showing
31 changed files
with
443 additions
and
302 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
//! Default implementation of [`LogStore`] for storage backends with atomic put-if-absent operation | ||
use std::cmp::max; | ||
|
||
use bytes::Bytes; | ||
use futures::StreamExt; | ||
use lazy_static::lazy_static; | ||
use log::debug; | ||
use object_store::{path::Path, Error as ObjectStoreError, ObjectStore}; | ||
use regex::Regex; | ||
|
||
use super::LogStore; | ||
use crate::{ | ||
operations::transaction::TransactionError, | ||
protocol::{get_last_checkpoint, ProtocolError}, | ||
storage::{commit_uri_from_version, ObjectStoreRef}, | ||
DeltaResult, DeltaTableError, | ||
}; | ||
|
||
/// Default [`LogStore`] implementation | ||
#[derive(Debug, Clone)] | ||
pub struct DefaultLogStore { | ||
pub(crate) storage: ObjectStoreRef, | ||
} | ||
|
||
#[async_trait::async_trait] | ||
impl LogStore for DefaultLogStore { | ||
async fn read_commit_entry(&self, version: i64) -> DeltaResult<Bytes> { | ||
let commit_uri = commit_uri_from_version(version); | ||
let data = self.storage.get(&commit_uri).await?.bytes().await?; | ||
Ok(data) | ||
|
||
// TODO: return actual actions instead | ||
// let actions = Self::get_actions(next_version, commit_log_bytes).await; | ||
} | ||
|
||
/// Tries to commit a prepared commit file. Returns [DeltaTableError::VersionAlreadyExists] | ||
/// if the given `version` already exists. The caller should handle the retry logic itself. | ||
/// This is low-level transaction API. If user does not want to maintain the commit loop then | ||
/// the `DeltaTransaction.commit` is desired to be used as it handles `try_commit_transaction` | ||
/// with retry logic. | ||
async fn write_commit_entry( | ||
&self, | ||
version: i64, | ||
tmp_commit: &Path, | ||
) -> Result<(), TransactionError> { | ||
// move temporary commit file to delta log directory | ||
// rely on storage to fail if the file already exists - | ||
self.storage | ||
.rename_if_not_exists(tmp_commit, &commit_uri_from_version(version)) | ||
.await | ||
.map_err(|err| match err { | ||
ObjectStoreError::AlreadyExists { .. } => { | ||
TransactionError::VersionAlreadyExists(version) | ||
} | ||
_ => TransactionError::from(err), | ||
})?; | ||
Ok(()) | ||
} | ||
|
||
async fn get_latest_version(&self, current_version: i64) -> DeltaResult<i64> { | ||
let version_start = match get_last_checkpoint(&self.storage).await { | ||
Ok(last_check_point) => last_check_point.version, | ||
Err(ProtocolError::CheckpointNotFound) => { | ||
// no checkpoint | ||
-1 | ||
} | ||
Err(e) => { | ||
return Err(DeltaTableError::from(e)); | ||
} | ||
}; | ||
|
||
debug!("latest checkpoint version: {version_start}"); | ||
|
||
let version_start = max(current_version, version_start); | ||
|
||
lazy_static! { | ||
static ref DELTA_LOG_REGEX: Regex = | ||
Regex::new(r"_delta_log/(\d{20})\.(json|checkpoint).*$").unwrap(); | ||
} | ||
|
||
// list files to find max version | ||
let version = async { | ||
let mut max_version: i64 = version_start; | ||
let prefix = Some(self.storage.log_path()); | ||
let offset_path = commit_uri_from_version(max_version); | ||
let mut files = self.storage.list_with_offset(prefix, &offset_path).await?; | ||
// let mut files = self.storage.list_with_offset(prefix, &offset_path).await?; | ||
|
||
while let Some(obj_meta) = files.next().await { | ||
let obj_meta = obj_meta?; | ||
if let Some(captures) = DELTA_LOG_REGEX.captures(obj_meta.location.as_ref()) { | ||
let log_version = captures.get(1).unwrap().as_str().parse().unwrap(); | ||
// listing may not be ordered | ||
max_version = max(max_version, log_version); | ||
// also cache timestamp for version, for faster time-travel | ||
// TODO: temporarily disabled because `version_timestamp` is not available in the [`LogStore`] | ||
// self.version_timestamp | ||
// .insert(log_version, obj_meta.last_modified.timestamp()); | ||
} | ||
} | ||
|
||
if max_version < 0 { | ||
return Err(DeltaTableError::not_a_table(self.storage.root_uri())); | ||
} | ||
|
||
Ok::<i64, DeltaTableError>(max_version) | ||
} | ||
.await?; | ||
Ok(version) | ||
} | ||
|
||
fn object_store(&self) -> ObjectStoreRef { | ||
self.storage.clone() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.