From 961464d03471d4872936268ca017c51bb12f86a6 Mon Sep 17 00:00:00 2001 From: Aditya Manthramurthy Date: Tue, 31 Oct 2023 11:35:56 -0700 Subject: [PATCH] Parse and add tags to list objects output type --- Cargo.toml | 1 + src/s3/error.rs | 2 + src/s3/response/list_objects.rs | 9 +++- src/s3/types.rs | 1 + src/s3/utils.rs | 83 ++++++++++++++++++++++++++++++++- 5 files changed, 93 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a11279e..7f60acb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ lazy_static = "1.4.0" md5 = "0.7.0" multimap = "0.10.0" os_info = "3.7.0" +percent-encoding = "2.3.0" rand = "0.8.5" regex = "1.9.4" serde = { version = "1.0.188", features = ["derive"] } diff --git a/src/s3/error.rs b/src/s3/error.rs index 52d1490..531e00d 100644 --- a/src/s3/error.rs +++ b/src/s3/error.rs @@ -110,6 +110,7 @@ pub enum Error { PostPolicyError(String), InvalidObjectLockConfig(String), NoClientProvided, + TagDecodingError(String, String), } impl std::error::Error for Error {} @@ -214,6 +215,7 @@ impl fmt::Display for Error { Error::PostPolicyError(m) => write!(f, "{}", m), Error::InvalidObjectLockConfig(m) => write!(f, "{}", m), Error::NoClientProvided => write!(f, "no client provided"), + Error::TagDecodingError(input, error_message) => write!(f, "tag decoding failed: {} on input '{}'", error_message, input), } } } diff --git a/src/s3/response/list_objects.rs b/src/s3/response/list_objects.rs index 2b41572..28607d2 100644 --- a/src/s3/response/list_objects.rs +++ b/src/s3/response/list_objects.rs @@ -22,7 +22,7 @@ use crate::s3::{ error::Error, types::{FromS3Response, ListEntry, S3Request}, utils::{ - from_iso8601utc, urldecode, + from_iso8601utc, parse_tags, urldecode, xml::{Element, MergeXmlElements}, }, }; @@ -125,6 +125,11 @@ fn parse_list_objects_contents( }) .collect::>() }); + let user_tags = content + .get_child_text("UserTags") + .as_ref() + .map(|x| parse_tags(x)) + .transpose()?; let is_delete_marker = content.name() == "DeleteMarker"; contents.push(ListEntry { @@ -138,6 +143,7 @@ fn parse_list_objects_contents( is_latest, version_id, user_metadata, + user_tags, is_prefix: false, is_delete_marker, encoding_type: etype, @@ -168,6 +174,7 @@ fn parse_list_objects_common_prefixes( is_latest: false, version_id: None, user_metadata: None, + user_tags: None, is_prefix: true, is_delete_marker: false, encoding_type: encoding_type.as_ref().cloned(), diff --git a/src/s3/types.rs b/src/s3/types.rs index 6ab1ff2..56e15ac 100644 --- a/src/s3/types.rs +++ b/src/s3/types.rs @@ -159,6 +159,7 @@ pub struct ListEntry { pub is_latest: bool, // except ListObjects V1/V2 pub version_id: Option, // except ListObjects V1/V2 pub user_metadata: Option>, + pub user_tags: Option>, pub is_prefix: bool, pub is_delete_marker: bool, pub encoding_type: Option, diff --git a/src/s3/utils.rs b/src/s3/utils.rs index bcaf73c..76a498e 100644 --- a/src/s3/utils.rs +++ b/src/s3/utils.rs @@ -15,7 +15,8 @@ //! Various utility and helper functions -use crate::s3::error::Error; +use std::collections::{BTreeMap, HashMap}; + use base64::engine::general_purpose::STANDARD as BASE64; use base64::engine::Engine as _; use byteorder::{BigEndian, ReadBytesExt}; @@ -24,13 +25,15 @@ use crc::{Crc, CRC_32_ISO_HDLC}; use lazy_static::lazy_static; use md5::compute as md5compute; use multimap::MultiMap; +use percent_encoding::{percent_decode_str, utf8_percent_encode, AsciiSet, NON_ALPHANUMERIC}; use regex::Regex; use sha2::{Digest, Sha256}; -use std::collections::BTreeMap; pub use urlencoding::decode as urldecode; pub use urlencoding::encode as urlencode; use xmltree::Element; +use crate::s3::error::Error; + /// Date and time with UTC timezone pub type UtcTime = DateTime; @@ -392,11 +395,86 @@ pub fn copy_slice(dst: &mut [u8], src: &[u8]) -> usize { c } +// Characters to escape in query strings. Based on RFC 3986 and the golang +// net/url implementation used in the MinIO server. +// +// https://tools.ietf.org/html/rfc3986 +// +// 1. All non-ascii characters are escaped always. +// 2. All reserved characters are escaped. +// 3. Any other characters are not escaped. +// +// Unreserved characters in addition to alphanumeric characters are: '-', '_', +// '.', '~' (§2.3 Unreserved characters (mark)) +// +// Reserved characters for query strings: '$', '&', '+', ',', '/', ':', ';', +// '=', '?', '@' (§3.4) +// +// NON_ALPHANUMERIC already escapes everything non-alphanumeric (it includes all +// the reserved characters). So we only remove the unreserved characters from +// this set. +const QUERY_ESCAPE: &AsciiSet = &NON_ALPHANUMERIC + .remove(b'-') + .remove(b'_') + .remove(b'.') + .remove(b'~'); + +fn unescape(s: &str) -> Result { + percent_decode_str(s) + .decode_utf8() + .map_err(|e| Error::TagDecodingError(s.to_string(), e.to_string())) + .map(|s| s.to_string()) +} + +fn escape(s: &str) -> String { + utf8_percent_encode(s, QUERY_ESCAPE).collect() +} + +// TODO: use this while adding API to set tags. +// +// Handles escaping same as MinIO server - needed for ensuring compatibility. +pub fn encode_tags(h: &HashMap) -> String { + let mut tags = Vec::new(); + for (k, v) in h { + tags.push(format!("{}={}", escape(k), escape(v))); + } + tags.join("&") +} + +pub fn parse_tags(s: &str) -> Result, Error> { + let mut tags = HashMap::new(); + for tag in s.split('&') { + let mut kv = tag.split('='); + let k = match kv.next() { + Some(v) => unescape(v)?, + None => { + return Err(Error::TagDecodingError( + s.to_string(), + "tag key was empty".to_string(), + )) + } + }; + let v = match kv.next() { + Some(v) => unescape(v)?, + None => "".to_owned(), + }; + if kv.next().is_some() { + return Err(Error::TagDecodingError( + s.to_string(), + "tag had too many values for a key".to_string(), + )); + } + tags.insert(k, v); + } + Ok(tags) +} + pub mod xml { use std::collections::HashMap; use crate::s3::error::Error; + #[derive(Debug, Clone)] struct XmlElementIndex { children: HashMap>, } @@ -432,6 +510,7 @@ pub mod xml { } } + #[derive(Debug, Clone)] pub struct Element<'a> { inner: &'a xmltree::Element, child_element_index: XmlElementIndex,