From 00b9b3abc92599b9ea9ea75879b01fa825aebe90 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 12 Nov 2024 22:49:08 +0800 Subject: [PATCH 1/3] rfc: Remove Metakey Signed-off-by: Xuanwo --- core/src/docs/rfcs/0000_remove_metakey.md | 120 ++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 core/src/docs/rfcs/0000_remove_metakey.md diff --git a/core/src/docs/rfcs/0000_remove_metakey.md b/core/src/docs/rfcs/0000_remove_metakey.md new file mode 100644 index 000000000000..a43e5a84351b --- /dev/null +++ b/core/src/docs/rfcs/0000_remove_metakey.md @@ -0,0 +1,120 @@ +- Proposal Name: `remove_metakey` +- Start Date: 2024-11-12 +- RFC PR: [apache/opendal#0000](https://github.com/apache/opendal/pull/0000) +- Tracking Issue: [apache/opendal#0000](https://github.com/apache/opendal/issues/0000) + +# Summary + +Remove the `Metakey` concept from OpenDAL and replace it with a simpler and more predictable metadata handling mechanism. + +# Motivation + +The current `Metakey` design has several issues: + +1. Performance Impact: Users often unknowingly trigger expensive operations (e.g., using `Metakey::Full` causes additional stat calls) +2. Usability Issues: Users frequently attempt to access metadata that wasn't explicitly requested +3. API Confusion: Overlap between `Metakey::Version` and the new `version(bool)` parameter +4. Implementation Complexity: Service developers find it challenging to implement `Metakey` correctly + +The expected outcome is a simpler, more predictable API that prevents common mistakes and improves performance by default. + +# Guide-level explanation + +Instead of using `Metakey` to specify which metadata fields to fetch, services will now declare their metadata capabilities upfront through a new `MetadataCapability` struct: + +```rust +let entries = op.list("path").await?; +for entry in entries { + if op.metadata_capability().content_type { + println!("Content-Type: {}", entry.metadata().content_type()); + } +} +``` + +If users need additional metadata not provided by `list`: + +```rust +let entries = op.list("path").await?; +for entry in entries { + let mut meta = entry.metadata(); + if !op.metadata_capability().etag { + meta = op.stat(&entry.path()).await?; + } + println!("Content-Type: {}", meta.etag()); +} +``` + +For existing OpenDAL users, the main changes are: + +- Remove all `metakey()` calls from their code +- Use `metadata_capability()` to check available metadata +- Explicitly call `stat()` when needed + +# Reference-level explanation + +The implementation involves: + +1. Remove the `Metakey` enum +2. Add new `MetadataCapability` struct: +```rust +pub struct MetadataCapability { + pub content_length: bool, + pub content_type: bool, + pub last_modified: bool, + pub etag: bool, + pub mode: bool, + pub version: bool, + ... +} +``` + +3. Add method to Operator to query capabilities: +```rust +impl Operator { + pub fn metadata_capability(&self) -> MetadataCapability; +} +``` + +4. Modify list operation to never perform implicit stat calls +5. Update all service implementations to declare their metadata capabilities + +Each service implementation will need to: +- Remove `Metakey` handling logic +- Implement `metadata_capability()` to accurately reflect what metadata they provide by default +- Ensure list operations return metadata that's always available without extra API calls + +# Drawbacks + +- Breaking change for existing users +- Loss of fine-grained control over metadata fetching +- Potential increased API calls if users need multiple metadata fields + +# Rationale and alternatives + +This design is superior because: +- Prevents performance pitfalls by default +- Makes metadata availability explicit +- Simplifies service implementation +- Provides clearer mental model + +Alternatives considered: +1. Keep `Metakey` but make it more restrictive +2. Add warnings for potentially expensive operations +3. Make stat calls async/lazy + +Not making this change would continue the current issues of performance problems and API misuse. + +# Prior art + +None + +# Unresolved questions + +None + +# Future possibilities + +- Add metadata prefetching optimization +- Add metadata caching layer +- Support for custom metadata fields +- Automated capability detection From 5a05da87582ec9b4a2fab6d487454323806d4f65 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 12 Nov 2024 22:51:24 +0800 Subject: [PATCH 2/3] Rename Signed-off-by: Xuanwo --- .../rfcs/{0000_remove_metakey.md => 5314_remove_metakey.md} | 4 ++-- core/src/docs/rfcs/mod.rs | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) rename core/src/docs/rfcs/{0000_remove_metakey.md => 5314_remove_metakey.md} (95%) diff --git a/core/src/docs/rfcs/0000_remove_metakey.md b/core/src/docs/rfcs/5314_remove_metakey.md similarity index 95% rename from core/src/docs/rfcs/0000_remove_metakey.md rename to core/src/docs/rfcs/5314_remove_metakey.md index a43e5a84351b..719859a7d28e 100644 --- a/core/src/docs/rfcs/0000_remove_metakey.md +++ b/core/src/docs/rfcs/5314_remove_metakey.md @@ -1,7 +1,7 @@ - Proposal Name: `remove_metakey` - Start Date: 2024-11-12 -- RFC PR: [apache/opendal#0000](https://github.com/apache/opendal/pull/0000) -- Tracking Issue: [apache/opendal#0000](https://github.com/apache/opendal/issues/0000) +- RFC PR: [apache/opendal#5313](https://github.com/apache/opendal/pull/5313) +- Tracking Issue: [apache/opendal#5314](https://github.com/apache/opendal/issues/5314) # Summary diff --git a/core/src/docs/rfcs/mod.rs b/core/src/docs/rfcs/mod.rs index d5edefe3f98f..a2c042ed598f 100644 --- a/core/src/docs/rfcs/mod.rs +++ b/core/src/docs/rfcs/mod.rs @@ -236,3 +236,7 @@ pub mod rfc_4382_range_based_read {} /// Executor API #[doc = include_str!("4638_executor.md")] pub mod rfc_4638_executor {} + +/// Remove metakey +#[doc = include_str!("5314_remove_metakey.md")] +pub mod rfc_5314_remove_metakey {} From c496f129d25613850e22ab5d6eb36cc267b16d13 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 13 Nov 2024 11:32:00 +0800 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Qinxuan Chen --- core/src/docs/rfcs/5314_remove_metakey.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/core/src/docs/rfcs/5314_remove_metakey.md b/core/src/docs/rfcs/5314_remove_metakey.md index 719859a7d28e..d9d7a1fc4b1a 100644 --- a/core/src/docs/rfcs/5314_remove_metakey.md +++ b/core/src/docs/rfcs/5314_remove_metakey.md @@ -11,12 +11,12 @@ Remove the `Metakey` concept from OpenDAL and replace it with a simpler and more The current `Metakey` design has several issues: -1. Performance Impact: Users often unknowingly trigger expensive operations (e.g., using `Metakey::Full` causes additional stat calls) -2. Usability Issues: Users frequently attempt to access metadata that wasn't explicitly requested -3. API Confusion: Overlap between `Metakey::Version` and the new `version(bool)` parameter -4. Implementation Complexity: Service developers find it challenging to implement `Metakey` correctly +1. Performance Impact: Users often initiate costly operations unintentionally, such as using `Metakey::Full`, which results in extra stat calls +2. Usability Issues: Users often try to access metadata that hasn't been explicitly requested +3. API Confusion: There's a conflict between `Metakey::Version` and the new `version(bool)` parameter +4. Implementation Complexity: Service developers struggle to implement `Metakey` correctly -The expected outcome is a simpler, more predictable API that prevents common mistakes and improves performance by default. +The goal is a simpler, more intuitive API that prevents common mistakes and improves performance as standard. # Guide-level explanation @@ -75,12 +75,12 @@ impl Operator { } ``` -4. Modify list operation to never perform implicit stat calls +4. Modify list operation to avoid implicit stat calls 5. Update all service implementations to declare their metadata capabilities Each service implementation will need to: - Remove `Metakey` handling logic -- Implement `metadata_capability()` to accurately reflect what metadata they provide by default +- Implement `metadata_capability()` to accurately indicate the metadata provided by default - Ensure list operations return metadata that's always available without extra API calls # Drawbacks @@ -93,13 +93,13 @@ Each service implementation will need to: This design is superior because: - Prevents performance pitfalls by default -- Makes metadata availability explicit +- Makes metadata availability explicitly - Simplifies service implementation - Provides clearer mental model Alternatives considered: 1. Keep `Metakey` but make it more restrictive -2. Add warnings for potentially expensive operations +2. Add warnings for potentially costly operations 3. Make stat calls async/lazy Not making this change would continue the current issues of performance problems and API misuse.