Skip to content

Commit e78d140

Browse files
authored
Add MultiPartStore (#4961) (#4608) (#4971)
* Add MultiPartStore (#4961) (#4608) * Parse CompleteMultipartUploadResult (#4965) * More docs * Add integration test * Fix azure * More docs * Don't gate multipart behind feature flag
1 parent a6a512f commit e78d140

File tree

9 files changed

+329
-85
lines changed

9 files changed

+329
-85
lines changed

object_store/src/aws/client.rs

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ use crate::client::retry::RetryExt;
2727
use crate::client::GetOptionsExt;
2828
use crate::multipart::PartId;
2929
use crate::path::DELIMITER;
30-
use crate::{ClientOptions, GetOptions, ListResult, MultipartId, Path, Result, RetryConfig};
30+
use crate::{
31+
ClientOptions, GetOptions, ListResult, MultipartId, Path, PutResult, Result, RetryConfig,
32+
};
3133
use async_trait::async_trait;
3234
use base64::prelude::BASE64_STANDARD;
3335
use base64::Engine;
@@ -115,6 +117,9 @@ pub(crate) enum Error {
115117
#[snafu(display("Error performing complete multipart request: {}", source))]
116118
CompleteMultipartRequest { source: crate::client::retry::Error },
117119

120+
#[snafu(display("Error getting complete multipart response body: {}", source))]
121+
CompleteMultipartResponseBody { source: reqwest::Error },
122+
118123
#[snafu(display("Got invalid list response: {}", source))]
119124
InvalidListResponse { source: quick_xml::de::DeError },
120125

@@ -162,6 +167,13 @@ struct MultipartPart {
162167
part_number: usize,
163168
}
164169

170+
#[derive(Debug, Deserialize)]
171+
#[serde(rename_all = "PascalCase", rename = "CompleteMultipartUploadResult")]
172+
struct CompleteMultipartResult {
173+
#[serde(rename = "ETag")]
174+
e_tag: String,
175+
}
176+
165177
#[derive(Deserialize)]
166178
#[serde(rename_all = "PascalCase", rename = "DeleteResult")]
167179
struct BatchDeleteResponse {
@@ -506,12 +518,32 @@ impl S3Client {
506518
Ok(response.upload_id)
507519
}
508520

521+
pub async fn put_part(
522+
&self,
523+
path: &Path,
524+
upload_id: &MultipartId,
525+
part_idx: usize,
526+
data: Bytes,
527+
) -> Result<PartId> {
528+
let part = (part_idx + 1).to_string();
529+
530+
let content_id = self
531+
.put_request(
532+
path,
533+
data,
534+
&[("partNumber", &part), ("uploadId", upload_id)],
535+
)
536+
.await?;
537+
538+
Ok(PartId { content_id })
539+
}
540+
509541
pub async fn complete_multipart(
510542
&self,
511543
location: &Path,
512544
upload_id: &str,
513545
parts: Vec<PartId>,
514-
) -> Result<()> {
546+
) -> Result<PutResult> {
515547
let parts = parts
516548
.into_iter()
517549
.enumerate()
@@ -527,7 +559,8 @@ impl S3Client {
527559
let credential = self.get_credential().await?;
528560
let url = self.config.path_url(location);
529561

530-
self.client
562+
let response = self
563+
.client
531564
.request(Method::POST, url)
532565
.query(&[("uploadId", upload_id)])
533566
.body(body)
@@ -542,7 +575,17 @@ impl S3Client {
542575
.await
543576
.context(CompleteMultipartRequestSnafu)?;
544577

545-
Ok(())
578+
let data = response
579+
.bytes()
580+
.await
581+
.context(CompleteMultipartResponseBodySnafu)?;
582+
583+
let response: CompleteMultipartResult =
584+
quick_xml::de::from_reader(data.reader()).context(InvalidMultipartResponseSnafu)?;
585+
586+
Ok(PutResult {
587+
e_tag: Some(response.e_tag),
588+
})
546589
}
547590
}
548591

object_store/src/aws/mod.rs

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ use crate::aws::client::S3Client;
4444
use crate::client::get::GetClientExt;
4545
use crate::client::list::ListClientExt;
4646
use crate::client::CredentialProvider;
47-
use crate::multipart::{PartId, PutPart, WriteMultiPart};
47+
use crate::multipart::{MultiPartStore, PartId, PutPart, WriteMultiPart};
4848
use crate::signer::Signer;
4949
use crate::{
5050
GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, Path, PutResult,
@@ -246,18 +246,9 @@ struct S3MultiPartUpload {
246246
#[async_trait]
247247
impl PutPart for S3MultiPartUpload {
248248
async fn put_part(&self, buf: Vec<u8>, part_idx: usize) -> Result<PartId> {
249-
let part = (part_idx + 1).to_string();
250-
251-
let content_id = self
252-
.client
253-
.put_request(
254-
&self.location,
255-
buf.into(),
256-
&[("partNumber", &part), ("uploadId", &self.upload_id)],
257-
)
258-
.await?;
259-
260-
Ok(PartId { content_id })
249+
self.client
250+
.put_part(&self.location, &self.upload_id, part_idx, buf.into())
251+
.await
261252
}
262253

263254
async fn complete(&self, completed_parts: Vec<PartId>) -> Result<()> {
@@ -268,6 +259,36 @@ impl PutPart for S3MultiPartUpload {
268259
}
269260
}
270261

262+
#[async_trait]
263+
impl MultiPartStore for AmazonS3 {
264+
async fn create_multipart(&self, path: &Path) -> Result<MultipartId> {
265+
self.client.create_multipart(path).await
266+
}
267+
268+
async fn put_part(
269+
&self,
270+
path: &Path,
271+
id: &MultipartId,
272+
part_idx: usize,
273+
data: Bytes,
274+
) -> Result<PartId> {
275+
self.client.put_part(path, id, part_idx, data).await
276+
}
277+
278+
async fn complete_multipart(
279+
&self,
280+
path: &Path,
281+
id: &MultipartId,
282+
parts: Vec<PartId>,
283+
) -> Result<PutResult> {
284+
self.client.complete_multipart(path, id, parts).await
285+
}
286+
287+
async fn abort_multipart(&self, path: &Path, id: &MultipartId) -> Result<()> {
288+
self.client.delete_request(path, &[("uploadId", id)]).await
289+
}
290+
}
291+
271292
#[cfg(test)]
272293
mod tests {
273294
use super::*;
@@ -293,6 +314,8 @@ mod tests {
293314
list_with_delimiter(&integration).await;
294315
rename_and_copy(&integration).await;
295316
stream_get(&integration).await;
317+
multipart(&integration, &integration).await;
318+
296319
if test_not_exists {
297320
copy_if_not_exists(&integration).await;
298321
}

object_store/src/azure/client.rs

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,16 @@ use super::credential::AzureCredential;
1919
use crate::azure::credential::*;
2020
use crate::azure::{AzureCredentialProvider, STORE};
2121
use crate::client::get::GetClient;
22-
use crate::client::header::HeaderConfig;
22+
use crate::client::header::{get_etag, HeaderConfig};
2323
use crate::client::list::ListClient;
2424
use crate::client::retry::RetryExt;
2525
use crate::client::GetOptionsExt;
26+
use crate::multipart::PartId;
2627
use crate::path::DELIMITER;
2728
use crate::util::deserialize_rfc1123;
28-
use crate::{ClientOptions, GetOptions, ListResult, ObjectMeta, Path, Result, RetryConfig};
29+
use crate::{
30+
ClientOptions, GetOptions, ListResult, ObjectMeta, Path, PutResult, Result, RetryConfig,
31+
};
2932
use async_trait::async_trait;
3033
use base64::prelude::BASE64_STANDARD;
3134
use base64::Engine;
@@ -84,6 +87,11 @@ pub(crate) enum Error {
8487
Authorization {
8588
source: crate::azure::credential::Error,
8689
},
90+
91+
#[snafu(display("Unable to extract metadata from headers: {}", source))]
92+
Metadata {
93+
source: crate::client::header::Error,
94+
},
8795
}
8896

8997
impl From<Error> for crate::Error {
@@ -190,6 +198,43 @@ impl AzureClient {
190198
Ok(response)
191199
}
192200

201+
/// PUT a block <https://learn.microsoft.com/en-us/rest/api/storageservices/put-block>
202+
pub async fn put_block(&self, path: &Path, part_idx: usize, data: Bytes) -> Result<PartId> {
203+
let content_id = format!("{part_idx:20}");
204+
let block_id: BlockId = content_id.clone().into();
205+
206+
self.put_request(
207+
path,
208+
Some(data),
209+
true,
210+
&[
211+
("comp", "block"),
212+
("blockid", &BASE64_STANDARD.encode(block_id)),
213+
],
214+
)
215+
.await?;
216+
217+
Ok(PartId { content_id })
218+
}
219+
220+
/// PUT a block list <https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list>
221+
pub async fn put_block_list(&self, path: &Path, parts: Vec<PartId>) -> Result<PutResult> {
222+
let blocks = parts
223+
.into_iter()
224+
.map(|part| BlockId::from(part.content_id))
225+
.collect();
226+
227+
let block_list = BlockList { blocks };
228+
let block_xml = block_list.to_xml();
229+
230+
let response = self
231+
.put_request(path, Some(block_xml.into()), true, &[("comp", "blocklist")])
232+
.await?;
233+
234+
let e_tag = get_etag(response.headers()).context(MetadataSnafu)?;
235+
Ok(PutResult { e_tag: Some(e_tag) })
236+
}
237+
193238
/// Make an Azure Delete request <https://docs.microsoft.com/en-us/rest/api/storageservices/delete-blob>
194239
pub async fn delete_request<T: Serialize + ?Sized + Sync>(
195240
&self,

object_store/src/azure/mod.rs

Lines changed: 35 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,12 @@
2626
//! [ObjectStore::abort_multipart] is a no-op, since Azure Blob Store doesn't provide
2727
//! a way to drop old blocks. Instead unused blocks are automatically cleaned up
2828
//! after 7 days.
29-
use self::client::{BlockId, BlockList};
3029
use crate::{
3130
multipart::{PartId, PutPart, WriteMultiPart},
3231
path::Path,
3332
GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, PutResult, Result,
3433
};
3534
use async_trait::async_trait;
36-
use base64::prelude::BASE64_STANDARD;
37-
use base64::Engine;
3835
use bytes::Bytes;
3936
use futures::stream::BoxStream;
4037
use std::fmt::Debug;
@@ -53,6 +50,7 @@ mod credential;
5350
/// [`CredentialProvider`] for [`MicrosoftAzure`]
5451
pub type AzureCredentialProvider = Arc<dyn CredentialProvider<Credential = AzureCredential>>;
5552
use crate::client::header::get_etag;
53+
use crate::multipart::MultiPartStore;
5654
pub use builder::{AzureConfigKey, MicrosoftAzureBuilder};
5755
pub use credential::AzureCredential;
5856

@@ -151,54 +149,52 @@ struct AzureMultiPartUpload {
151149

152150
#[async_trait]
153151
impl PutPart for AzureMultiPartUpload {
154-
async fn put_part(&self, buf: Vec<u8>, part_idx: usize) -> Result<PartId> {
155-
let content_id = format!("{part_idx:20}");
156-
let block_id: BlockId = content_id.clone().into();
157-
158-
self.client
159-
.put_request(
160-
&self.location,
161-
Some(buf.into()),
162-
true,
163-
&[
164-
("comp", "block"),
165-
("blockid", &BASE64_STANDARD.encode(block_id)),
166-
],
167-
)
168-
.await?;
152+
async fn put_part(&self, buf: Vec<u8>, idx: usize) -> Result<PartId> {
153+
self.client.put_block(&self.location, idx, buf.into()).await
154+
}
169155

170-
Ok(PartId { content_id })
156+
async fn complete(&self, parts: Vec<PartId>) -> Result<()> {
157+
self.client.put_block_list(&self.location, parts).await?;
158+
Ok(())
171159
}
160+
}
172161

173-
async fn complete(&self, completed_parts: Vec<PartId>) -> Result<()> {
174-
let blocks = completed_parts
175-
.into_iter()
176-
.map(|part| BlockId::from(part.content_id))
177-
.collect();
162+
#[async_trait]
163+
impl MultiPartStore for MicrosoftAzure {
164+
async fn create_multipart(&self, _: &Path) -> Result<MultipartId> {
165+
Ok(String::new())
166+
}
178167

179-
let block_list = BlockList { blocks };
180-
let block_xml = block_list.to_xml();
168+
async fn put_part(
169+
&self,
170+
path: &Path,
171+
_: &MultipartId,
172+
part_idx: usize,
173+
data: Bytes,
174+
) -> Result<PartId> {
175+
self.client.put_block(path, part_idx, data).await
176+
}
181177

182-
self.client
183-
.put_request(
184-
&self.location,
185-
Some(block_xml.into()),
186-
true,
187-
&[("comp", "blocklist")],
188-
)
189-
.await?;
178+
async fn complete_multipart(
179+
&self,
180+
path: &Path,
181+
_: &MultipartId,
182+
parts: Vec<PartId>,
183+
) -> Result<PutResult> {
184+
self.client.put_block_list(path, parts).await
185+
}
190186

187+
async fn abort_multipart(&self, _: &Path, _: &MultipartId) -> Result<()> {
188+
// There is no way to drop blocks that have been uploaded. Instead, they simply
189+
// expire in 7 days.
191190
Ok(())
192191
}
193192
}
194193

195194
#[cfg(test)]
196195
mod tests {
197196
use super::*;
198-
use crate::tests::{
199-
copy_if_not_exists, get_opts, list_uses_directories_correctly, list_with_delimiter,
200-
put_get_delete_list_opts, rename_and_copy, stream_get,
201-
};
197+
use crate::tests::*;
202198

203199
#[tokio::test]
204200
async fn azure_blob_test() {
@@ -212,6 +208,7 @@ mod tests {
212208
rename_and_copy(&integration).await;
213209
copy_if_not_exists(&integration).await;
214210
stream_get(&integration).await;
211+
multipart(&integration, &integration).await;
215212
}
216213

217214
#[test]

0 commit comments

Comments
 (0)