From 99f252f8893c9cdb2b9866edb3c5dd1b103af661 Mon Sep 17 00:00:00 2001 From: "Bala.FA" Date: Sun, 6 Aug 2023 14:49:41 +0530 Subject: [PATCH] Add generic AWS S3 domain support Signed-off-by: Bala.FA --- .github/workflows/rust.yml | 3 +- src/s3/client.rs | 2 +- src/s3/http.rs | 313 ++++++++++++++++++++++++++++--------- src/s3/utils.rs | 41 ++++- 4 files changed, 277 insertions(+), 82 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index b39fb7d..0900fd9 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -13,9 +13,10 @@ jobs: build: runs-on: ubuntu-latest + timeout-minutes: 5 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build run: | cargo fmt --all -- --check diff --git a/src/s3/client.rs b/src/s3/client.rs index d3449f4..0cdd63d 100644 --- a/src/s3/client.rs +++ b/src/s3/client.rs @@ -2373,7 +2373,7 @@ impl<'a> Client<'a> { &self, args: &ListenBucketNotificationArgs<'_>, ) -> Result { - if self.base_url.aws_host { + if self.base_url.is_aws_host() { return Err(Error::UnsupportedApi(String::from( "ListenBucketNotification", ))); diff --git a/src/s3/http.rs b/src/s3/http.rs index 2511dc3..56567b5 100644 --- a/src/s3/http.rs +++ b/src/s3/http.rs @@ -16,12 +16,23 @@ //! HTTP URL definitions use crate::s3::error::Error; +use crate::s3::utils::match_hostname; use crate::s3::utils::{to_query_string, Multimap}; use derivative::Derivative; use hyper::http::Method; use hyper::Uri; +use lazy_static::lazy_static; +use regex::Regex; use std::fmt; +const AWS_S3_PREFIX: &str = r"^(((bucket\.|accesspoint\.)vpce(-[a-z_\d]+)+\.s3\.)|([a-z_\d-]{1,63}\.)s3-control(-[a-z_\d]+)*\.|(s3(-[a-z_\d]+)*\.))"; + +lazy_static! { + static ref AWS_ELB_ENDPOINT_REGEX: Regex = + Regex::new(r"^[a-z_\d-]{1,63}\.[a-z_\d-]{1,63}\.elb\.amazonaws\.com$").unwrap(); + static ref AWS_S3_PREFIX_REGEX: Regex = Regex::new(AWS_S3_PREFIX).unwrap(); +} + #[derive(Derivative)] #[derivative(Clone, Debug, Default)] /// Represents HTTP URL @@ -75,20 +86,120 @@ impl fmt::Display for Url { } } -fn extract_region(host: &str) -> String { - let tokens: Vec<&str> = host.split('.').collect(); - let region = match tokens.get(1) { - Some(r) => match *r { - "dualstack" => match tokens.get(2) { - Some(t) => t, - _ => "", - }, - "amazonaws" => "", - _ => r, - }, - _ => "", - }; - region.to_string() +pub fn match_aws_endpoint(value: &str) -> bool { + lazy_static! { + static ref AWS_ENDPOINT_REGEX: Regex = Regex::new(r".*\.amazonaws\.com(|\.cn)$").unwrap(); + } + + AWS_ENDPOINT_REGEX.is_match(value.to_lowercase().as_str()) +} + +pub fn match_aws_s3_endpoint(value: &str) -> bool { + lazy_static! { + static ref AWS_S3_ENDPOINT_REGEX: Regex = Regex::new( + &(AWS_S3_PREFIX.to_string() + r"([a-z_\d-]{1,63}\.)*amazonaws\.com(|\.cn)$") + ) + .unwrap(); + } + + let binding = value.to_lowercase(); + let lvalue = binding.as_str(); + + if !AWS_S3_ENDPOINT_REGEX.is_match(lvalue) { + return false; + } + + for token in lvalue.split('.') { + if token.starts_with('-') + || token.starts_with('_') + || token.ends_with('-') + || token.ends_with('_') + || token.starts_with("vpce-_") + || token.starts_with("s3-control-_") + || token.starts_with("s3-_") + { + return false; + } + } + + true +} + +fn get_aws_info( + host: &String, + https: bool, + region: &mut String, + aws_s3_prefix: &mut String, + aws_domain_suffix: &mut String, + dualstack: &mut bool, +) -> Result<(), Error> { + if !match_hostname(host.as_str()) { + return Ok(()); + } + + if AWS_ELB_ENDPOINT_REGEX.is_match(host.as_str()) { + let token = host + .get(..host.rfind(".elb.amazonaws.com").unwrap() - 1) + .unwrap(); + *region = token + .get(token.rfind('.').unwrap() + 1..) + .unwrap() + .to_string(); + return Ok(()); + } + + if !match_aws_endpoint(host.as_str()) { + return Ok(()); + } + + if !match_aws_s3_endpoint(host.as_str()) { + return Err(Error::UrlBuildError( + String::from("invalid Amazon AWS host ") + host, + )); + } + + let matcher = AWS_S3_PREFIX_REGEX.find(host.as_str()).unwrap(); + let s3_prefix = host.get(..matcher.end()).unwrap(); + + if s3_prefix.contains("s3-accesspoint") && !https { + return Err(Error::UrlBuildError( + String::from("use HTTPS scheme for host ") + host, + )); + } + + let mut tokens: Vec<_> = host.get(matcher.len()..).unwrap().split('.').collect(); + *dualstack = tokens[0] == "dualstack"; + if *dualstack { + tokens.remove(0); + } + + let mut region_in_host = String::new(); + if tokens[0] != "vpce" && tokens[0] != "amazonaws" { + region_in_host = tokens[0].to_string(); + tokens.remove(0); + } + + let domain_suffix = tokens.join("."); + + if host == "s3-external-1.amazonaws.com" { + region_in_host = "us-east-1".to_string(); + } + if host == "s3-us-gov-west-1.amazonaws.com" || host == "s3-fips-us-gov-west-1.amazonaws.com" { + region_in_host = "us-gov-west-1".to_string(); + } + + if domain_suffix.ends_with(".cn") && !s3_prefix.ends_with("s3-accelerate.") && region.is_empty() + { + return Err(Error::UrlBuildError( + String::from("region missing in Amazon S3 China endpoint ") + host, + )); + } + + *region = region_in_host; + *aws_s3_prefix = s3_prefix.to_string(); + *aws_domain_suffix = domain_suffix; + + Ok(()) } #[derive(Derivative)] @@ -100,13 +211,89 @@ pub struct BaseUrl { host: String, port: u16, pub region: String, - pub aws_host: bool, - accelerate_host: bool, - dualstack_host: bool, - virtual_style: bool, + aws_s3_prefix: String, + aws_domain_suffix: String, + pub dualstack: bool, + pub virtual_style: bool, } impl BaseUrl { + /// Checks base URL is AWS host + pub fn is_aws_host(&self) -> bool { + !self.aws_domain_suffix.is_empty() + } + + fn build_aws_url( + &self, + url: &mut Url, + bucket_name: &str, + enforce_path_style: bool, + region: &str, + ) -> Result<(), Error> { + let mut host = String::from(&self.aws_s3_prefix); + host.push_str(&self.aws_domain_suffix); + if host == "s3-external-1.amazonaws.com" + || host == "s3-us-gov-west-1.amazonaws.com" + || host == "s3-fips-us-gov-west-1.amazonaws.com" + { + url.host = host; + return Ok(()); + } + + host = String::from(&self.aws_s3_prefix); + if self.aws_s3_prefix.contains("s3-accelerate") { + if bucket_name.contains('.') { + return Err(Error::UrlBuildError(String::from( + "bucket name with '.' is not allowed for accelerate endpoint", + ))); + } + + if enforce_path_style { + host = host.replacen("-accelerate", "", 1); + } + } + + if self.dualstack { + host.push_str("dualstack."); + } + if !self.aws_s3_prefix.contains("s3-accelerate") { + host.push_str(region); + host.push('.'); + } + host.push_str(&self.aws_domain_suffix); + + url.host = host; + + Ok(()) + } + + fn build_list_buckets_url(&self, url: &mut Url, region: &String) { + if self.aws_domain_suffix.is_empty() { + return; + } + + let mut host = String::from(&self.aws_s3_prefix); + host.push_str(&self.aws_domain_suffix); + if host == "s3-external-1.amazonaws.com" + || host == "s3-us-gov-west-1.amazonaws.com" + || host == "s3-fips-us-gov-west-1.amazonaws.com" + { + url.host = host; + return; + } + + let mut s3_prefix = String::from(&self.aws_s3_prefix); + let mut domain_suffix = String::from(&self.aws_domain_suffix); + if s3_prefix.starts_with("s3.") || s3_prefix.starts_with("s3-") { + s3_prefix = "s3.".to_string(); + domain_suffix = "amazonaws.com".to_string(); + if self.aws_domain_suffix.ends_with(".cn") { + domain_suffix.push_str(".cn"); + } + } + url.host = s3_prefix + region + "." + &domain_suffix; + } + /// Builds URL from base URL for given parameters for S3 operation pub fn build_url( &self, @@ -127,15 +314,13 @@ impl BaseUrl { https: self.https, host: self.host.clone(), port: self.port, + path: String::from("/"), query: query.clone(), ..Default::default() }; if bucket_name.is_none() { - url.path.push('/'); - if self.aws_host { - url.host = format!("s3.{}.{}", region, self.host); - } + self.build_list_buckets_url(&mut url, region); return Ok(url); } @@ -151,45 +336,31 @@ impl BaseUrl { // SSL certificate validation error. (bucket.contains('.') && self.https); - if self.aws_host { - let mut s3_domain = "s3.".to_string(); - if self.accelerate_host { - if bucket.contains('.') { - return Err(Error::UrlBuildError(String::from( - "bucket name with '.' is not allowed for accelerate endpoint", - ))); - } - - if !enforce_path_style { - s3_domain = "s3-accelerate.".to_string(); - } - } - - if self.dualstack_host { - s3_domain.push_str("dualstack."); - } - if enforce_path_style || !self.accelerate_host { - s3_domain.push_str(region); - s3_domain.push('.'); - } - url.host = s3_domain + &url.host; + if !self.aws_domain_suffix.is_empty() { + self.build_aws_url(&mut url, bucket, enforce_path_style, region)?; } + let mut host = String::from(&url.host); + let mut path = String::new(); + if enforce_path_style || !self.virtual_style { - url.path.push('/'); - url.path.push_str(bucket); + path.push('/'); + path.push_str(bucket); } else { - url.host = format!("{}.{}", bucket, url.host); + host = format!("{}.{}", bucket, url.host); } if let Some(v) = object_name { if !v.starts_with('/') { - url.path.push('/'); + path.push('/'); } // FIXME: urlencode path - url.path.push_str(v); + path.push_str(v); } + url.host = host; + url.path = path; + Ok(url) } @@ -259,42 +430,28 @@ impl BaseUrl { ))); } - let mut accelerate_host = host.starts_with("s3-accelerate."); - let aws_host = (host.starts_with("s3.") || accelerate_host) - && (host.ends_with(".amazonaws.com") || host.ends_with(".amazonaws.com.cn")); - let virtual_style = aws_host || host.ends_with("aliyuncs.com"); - let mut region = String::new(); - let mut dualstack_host = false; - - if aws_host { - let mut aws_domain = "amazonaws.com"; - region = extract_region(host); - - let is_aws_china_host = host.ends_with(".cn"); - if is_aws_china_host { - aws_domain = "amazonaws.com.cn"; - if region.is_empty() { - return Err(Error::InvalidBaseUrl(String::from( - "region must be provided in Amazon S3 China endpoint", - ))); - } - } - - dualstack_host = host.contains(".dualstack."); - host = aws_domain; - } else { - accelerate_host = false; - } + let mut aws_s3_prefix = String::new(); + let mut aws_domain_suffix = String::new(); + let mut dualstack: bool = false; + get_aws_info( + &host.to_string(), + https, + &mut region, + &mut aws_s3_prefix, + &mut aws_domain_suffix, + &mut dualstack, + )?; + let virtual_style = !aws_domain_suffix.is_empty() || host.ends_with("aliyuncs.com"); Ok(BaseUrl { https, host: host.to_string(), port, region, - aws_host, - accelerate_host, - dualstack_host, + aws_s3_prefix, + aws_domain_suffix, + dualstack, virtual_style, }) } diff --git a/src/s3/utils.rs b/src/s3/utils.rs index 7d49093..26bda77 100644 --- a/src/s3/utils.rs +++ b/src/s3/utils.rs @@ -244,6 +244,43 @@ pub fn get_canonical_headers(map: &Multimap) -> (String, String) { (signed_headers, canonical_headers) } +/// Checks if given hostname is valid or not +pub fn match_hostname(value: &str) -> bool { + lazy_static! { + static ref HOSTNAME_REGEX: Regex = + Regex::new(r"^([a-z_\d-]{1,63}\.)*([a-z_\d-]{1,63})$").unwrap(); + } + + if !HOSTNAME_REGEX.is_match(value.to_lowercase().as_str()) { + return false; + } + + for token in value.split('.') { + if token.starts_with('-') + || token.starts_with('_') + || token.ends_with('-') + || token.ends_with('_') + { + return false; + } + } + + true +} + +/// Checks if given region is valid or not +pub fn match_region(value: &str) -> bool { + lazy_static! { + static ref REGION_REGEX: Regex = Regex::new(r"^([a-z_\d-]{1,63})$").unwrap(); + } + + !REGION_REGEX.is_match(value.to_lowercase().as_str()) + || value.starts_with('-') + || value.starts_with('_') + || value.ends_with('-') + || value.ends_with('_') +} + /// Validates given bucket name pub fn check_bucket_name(bucket_name: &str, strict: bool) -> Result<(), Error> { if bucket_name.trim().is_empty() { @@ -265,14 +302,14 @@ pub fn check_bucket_name(bucket_name: &str, strict: bool) -> Result<(), Error> { } lazy_static! { - static ref VALID_IP_ADDR_REGEX: Regex = Regex::new("^(\\d+\\.){3}\\d+$").unwrap(); + static ref IPV4_REGEX: Regex = Regex::new(r"^((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$").unwrap(); static ref VALID_BUCKET_NAME_REGEX: Regex = Regex::new("^[A-Za-z0-9][A-Za-z0-9\\.\\-_:]{1,61}[A-Za-z0-9]$").unwrap(); static ref VALID_BUCKET_NAME_STRICT_REGEX: Regex = Regex::new("^[a-z0-9][a-z0-9\\.\\-]{1,61}[a-z0-9]$").unwrap(); } - if VALID_IP_ADDR_REGEX.is_match(bucket_name) { + if IPV4_REGEX.is_match(bucket_name) { return Err(Error::InvalidBucketName(String::from( "bucket name cannot be an IP address", )));