From 18b7c67e86070191e950223430fe56328544a909 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 17 Oct 2023 17:56:02 +0100 Subject: [PATCH] Document commit protocol --- object_store/src/aws/dynamo.rs | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/object_store/src/aws/dynamo.rs b/object_store/src/aws/dynamo.rs index a5d7345abf7d..56954e176506 100644 --- a/object_store/src/aws/dynamo.rs +++ b/object_store/src/aws/dynamo.rs @@ -60,20 +60,24 @@ const CONFLICT: &str = "com.amazonaws.dynamodb.v20120810#ConditionalCheckFailedE /// * A numeric attribute named `"generation"` /// * A numeric attribute named `"timeout"` /// -/// The lock procedure is as follows: +/// To perform a conditional operation on an object with a given `path` and `etag` (if exists), +/// the commit protocol is as follows: /// -/// * Error if file exists in S3 -/// * Create a corresponding record in DynamoDB with the path as the `"key"` -/// * On Success: Create object in S3 -/// * On Conflict: -/// * Periodically check if file exists in S3 -/// * After a 60 second timeout attempt to "claim" the lock by incrementing `"generation"` -/// * GOTO start +/// 1. Perform HEAD request on `path` and error on precondition mismatch +/// 2. Create record in DynamoDB with key `{path}#{etag}` with the configured timeout +/// 1. On Success: Perform operation with the configured timeout +/// 2. On Conflict: +/// 1. Periodically re-perform HEAD request on `path` and error on precondition mismatch +/// 2. If etag changed, GOTO 2. +/// 3. If `timeout * max_skew_rate` passed, replace the record incrementing the `"generation"` +/// 1. On Success: GOTO 2.1 +/// 2. On Conflict: GOTO 2.2 /// -/// This is inspired by the [DynamoDB Lock Client] but simplified for the more limited -/// requirements of synchronizing object storage. +/// Provided no writer modifies an object with a given `path` and `etag` without first adding a +/// corresponding record to DynamoDB, we are guaranteed that only one writer will ever commit. /// -/// The major changes are: +/// This is inspired by the [DynamoDB Lock Client] but simplified for the more limited +/// requirements of synchronizing object storage. The major changes are: /// /// * Uses a monotonic generation count instead of a UUID rvn, as this is: /// * Cheaper to generate, serialize and compare @@ -81,8 +85,13 @@ const CONFLICT: &str = "com.amazonaws.dynamodb.v20120810#ConditionalCheckFailedE /// * More human readable / interpretable /// * Relies on [TTL] to eventually clean up old locks /// +/// It also draws inspiration from the DeltaLake [S3 Multi-Cluster] commit protocol, but +/// generalised to not make assumptions about the workload and not rely on first writing +/// to a temporary path. +/// /// [TTL]: https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/howitworks-ttl.html /// [DynamoDB Lock Client]: https://aws.amazon.com/blogs/database/building-distributed-locks-with-the-dynamodb-lock-client/ +/// [S3 Multi-Cluster]: https://docs.google.com/document/d/1Gs4ZsTH19lMxth4BSdwlWjUNR-XhKHicDvBjd2RqNd8/edit#heading=h.mjjuxw9mcz9h #[derive(Debug, Clone)] pub struct DynamoCommit { table_name: String,