Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(core/services-gcs): support user defined metadata #5276

Merged
merged 14 commits into from
Nov 6, 2024
19 changes: 18 additions & 1 deletion core/src/services/gcs/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use std::collections::HashMap;
use std::fmt::Debug;
use std::fmt::Formatter;
use std::sync::Arc;
Expand Down Expand Up @@ -363,6 +364,7 @@ impl Access for GcsBackend {
write_can_empty: true,
write_can_multi: true,
write_with_content_type: true,
write_with_user_metadata: true,
// The min multipart size of Gcs is 5 MiB.
//
// ref: <https://cloud.google.com/storage/docs/xml-api/put-object-multipart>
Expand Down Expand Up @@ -424,6 +426,10 @@ impl Access for GcsBackend {

m.set_last_modified(parse_datetime_from_rfc3339(&meta.updated)?);

if !meta.metadata.is_empty() {
m.with_user_metadata(meta.metadata);
}

Ok(RpStat::new(m))
}

Expand Down Expand Up @@ -593,6 +599,10 @@ struct GetObjectJsonResponse {
///
/// For example: `"contentType": "image/png",`
content_type: String,
/// Custom metadata of this object.
///
/// For example: `"metadata" : { "my-key": "my-value" }`
metadata: HashMap<String, String>,
}

#[cfg(test)]
Expand All @@ -618,7 +628,10 @@ mod tests {
"etag": "CKWasoTgyPkCEAE=",
"timeCreated": "2022-08-15T11:33:34.866Z",
"updated": "2022-08-15T11:33:34.866Z",
"timeStorageClassUpdated": "2022-08-15T11:33:34.866Z"
"timeStorageClassUpdated": "2022-08-15T11:33:34.866Z",
"metadata" : {
"location" : "everywhere"
}
}"#;

let meta: GetObjectJsonResponse =
Expand All @@ -629,5 +642,9 @@ mod tests {
assert_eq!(meta.md5_hash, "fHcEH1vPwA6eTPqxuasXcg==");
assert_eq!(meta.etag, "CKWasoTgyPkCEAE=");
assert_eq!(meta.content_type, "image/png");
assert_eq!(
meta.metadata,
HashMap::from_iter([("location".to_string(), "everywhere".to_string())])
);
}
}
104 changes: 70 additions & 34 deletions core/src/services/gcs/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
use backon::ExponentialBuilder;
use backon::Retryable;
use bytes::Bytes;
use constants::X_GOOG_ACL;
use constants::X_GOOG_STORAGE_CLASS;
use http::header::CONTENT_LENGTH;
use http::header::CONTENT_TYPE;
use http::header::HOST;
Expand All @@ -39,11 +41,17 @@
use reqsign::GoogleTokenLoader;
use serde::Deserialize;
use serde::Serialize;
use serde_json::json;

use super::uri::percent_encode_path;
use crate::raw::*;
use crate::*;
use constants::*;

pub mod constants {
pub const X_GOOG_ACL: &str = "x-goog-acl";
pub const X_GOOG_STORAGE_CLASS: &str = "x-goog-storage-class";
pub const X_GOOG_META_PREFIX: &str = "x-goog-meta-";
}

pub struct GcsCore {
pub endpoint: String,
Expand Down Expand Up @@ -241,19 +249,18 @@
) -> Result<Request<Buffer>> {
let p = build_abs_path(&self.root, path);

let mut metadata = HashMap::new();
if let Some(storage_class) = &self.default_storage_class {
metadata.insert("storageClass", storage_class.as_str());
}
if let Some(cache_control) = op.cache_control() {
metadata.insert("cacheControl", cache_control);
}
let mut request_metadata = InsertRequestMetadata::default();

request_metadata.storage_class = self.default_storage_class.as_ref().map(String::as_str);
request_metadata.cache_control = op.cache_control();
request_metadata.content_type = op.content_type();
request_metadata.metadata = op.user_metadata();

let mut url = format!(
"{}/upload/storage/v1/b/{}/o?uploadType={}&name={}",
self.endpoint,
self.bucket,
if metadata.is_empty() {
if request_metadata.is_empty() {
"media"
} else {
"multipart"
Expand All @@ -269,37 +276,28 @@

req = req.header(CONTENT_LENGTH, size.unwrap_or_default());

if metadata.is_empty() {
if let Some(content_type) = op.content_type() {
req = req.header(CONTENT_TYPE, content_type);
}

if request_metadata.is_empty() {
jorgehermo9 marked this conversation as resolved.
Show resolved Hide resolved
// If the metadata is empty, we do not set any `Content-Type` header,
// since if we had it in the `op.content_type()`, it would be alrady set in the

Check warning on line 281 in core/src/services/gcs/core.rs

View workflow job for this annotation

GitHub Actions / typos

"alrady" should be "already".
// `multipart` metadata body and this branch won't be executed.
let req = req.body(body).map_err(new_request_build_error)?;
Ok(req)
} else {
let mut multipart = Multipart::new();

multipart = multipart.part(
FormDataPart::new("metadata")
.header(
CONTENT_TYPE,
"application/json; charset=UTF-8".parse().unwrap(),
)
.content(json!(metadata).to_string()),
);

let mut media_part = FormDataPart::new("media").content(body);

if let Some(content_type) = op.content_type() {
media_part = media_part.header(
let metadata_part = FormDataPart::new("metadata")
.header(
CONTENT_TYPE,
content_type
.parse()
.map_err(|_| Error::new(ErrorKind::Unexpected, "invalid header value"))?,
"application/json; charset=UTF-8".parse().unwrap(),
)
.content(
serde_json::to_string(&request_metadata)
Xuanwo marked this conversation as resolved.
Show resolved Hide resolved
.expect("metadata serialization should success"),
);
}
multipart = multipart.part(metadata_part);

let media_part = FormDataPart::new("media").content(body);
multipart = multipart.part(media_part);

let req = multipart.apply(Request::post(url))?;
Ok(req)
}
Expand All @@ -318,16 +316,22 @@

let mut req = Request::put(&url);

if let Some(user_metadata) = args.user_metadata() {
jorgehermo9 marked this conversation as resolved.
Show resolved Hide resolved
for (key, value) in user_metadata {
jorgehermo9 marked this conversation as resolved.
Show resolved Hide resolved
req = req.header(format!("{X_GOOG_META_PREFIX}{key}"), value)
}
}

if let Some(content_type) = args.content_type() {
req = req.header(CONTENT_TYPE, content_type);
}

if let Some(acl) = &self.predefined_acl {
req = req.header("x-goog-acl", acl);
req = req.header(X_GOOG_ACL, acl);
jorgehermo9 marked this conversation as resolved.
Show resolved Hide resolved
}

if let Some(storage_class) = &self.default_storage_class {
req = req.header("x-goog-storage-class", storage_class);
req = req.header(X_GOOG_STORAGE_CLASS, storage_class);
}

let req = req.body(body).map_err(new_request_build_error)?;
Expand Down Expand Up @@ -608,6 +612,38 @@
}
}

#[derive(Debug, Serialize)]
#[serde(default, rename_all = "camelCase")]
pub struct InsertRequestMetadata<'a> {
#[serde(skip_serializing_if = "Option::is_none")]
content_type: Option<&'a str>,
#[serde(skip_serializing_if = "Option::is_none")]
storage_class: Option<&'a str>,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<&'a str>,
#[serde(skip_serializing_if = "Option::is_none")]
metadata: Option<&'a HashMap<String, String>>,
}

impl Default for InsertRequestMetadata<'_> {
fn default() -> Self {
Self {
content_type: None,
storage_class: None,
cache_control: None,
metadata: None,
}
}
}

impl InsertRequestMetadata<'_> {
pub fn is_empty(&self) -> bool {
self.content_type.is_none()
&& self.storage_class.is_none()
&& self.cache_control.is_none()
&& self.metadata.is_none()
}
}
/// Response JSON from GCS list objects API.
///
/// refer to https://cloud.google.com/storage/docs/json_api/v1/objects/list for details
Expand Down
Loading