From 5601907d5c4ead6ae42ea191f485726fd6179f98 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 27 Aug 2024 17:37:49 -0400 Subject: [PATCH] Add Digest and SHA256Digest This addresses https://github.com/containers/oci-spec-rs/issues/201 add starts parsing digests in a stricter way. We now reject malformed digests (notably including ones with `/` which is my biggest concern) making them always safe to use as a file name. For example in some cases (e.g. ocidir-rs) I want to write a descriptor to the filesystem, and I don't want any possibility of path traversal attacks from someone including a `/` in a descriptor. We also add accessors to retrieve strictly validated SHA-256 as that's really the only important case (while still supporting other generic digests). Signed-off-by: Colin Walters --- README.md | 12 +- src/image/artifact.rs | 16 ++- src/image/descriptor.rs | 29 ++++- src/image/digest.rs | 264 ++++++++++++++++++++++++++++++++++++++++ src/image/index.rs | 17 ++- src/image/manifest.rs | 19 +-- src/image/mod.rs | 2 + 7 files changed, 334 insertions(+), 25 deletions(-) create mode 100644 src/image/digest.rs diff --git a/README.md b/README.md index e4b5bca759..b96fd23d50 100644 --- a/README.md +++ b/README.md @@ -37,34 +37,36 @@ assert_eq!(image_manifest.layers().len(), 5); - Create new image manifest using builder ```rust no_run +use std::str::FromStr; use oci_spec::image::{ Descriptor, DescriptorBuilder, ImageManifest, ImageManifestBuilder, MediaType, + Sha256Digest, SCHEMA_VERSION }; let config = DescriptorBuilder::default() .media_type(MediaType::ImageConfig) .size(7023u64) - .digest("sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7") + .digest(Sha256Digest::from_str("b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7").unwrap()) .build() .expect("build config descriptor"); let layers: Vec = [ ( 32654u64, - "sha256:9834876dcfb05cb167a5c24953eba58c4ac89b1adf57f28f2f9d09af107ee8f0", + "9834876dcfb05cb167a5c24953eba58c4ac89b1adf57f28f2f9d09af107ee8f0", ), ( 16724, - "sha256:3c3a4604a545cdc127456d94e421cd355bca5b528f4a9c1905b15da2eb4a4c6b", + "3c3a4604a545cdc127456d94e421cd355bca5b528f4a9c1905b15da2eb4a4c6b", ), ( 73109, - "sha256:ec4b8955958665577945c89419d1af06b5f7636b4ac3da7f12184802ad867736", + "ec4b8955958665577945c89419d1af06b5f7636b4ac3da7f12184802ad867736", ), ] .iter() @@ -72,7 +74,7 @@ let layers: Vec = [ DescriptorBuilder::default() .media_type(MediaType::ImageLayerGzip) .size(l.0) - .digest(l.1.to_owned()) + .digest(Sha256Digest::from_str(l.1).unwrap()) .build() .expect("build layer") }) diff --git a/src/image/artifact.rs b/src/image/artifact.rs index 6cf33ef700..9c2d2af4bf 100644 --- a/src/image/artifact.rs +++ b/src/image/artifact.rs @@ -216,8 +216,8 @@ impl ArtifactManifest { #[cfg(test)] mod tests { use super::*; - use crate::image::DescriptorBuilder; - use std::path::PathBuf; + use crate::image::{DescriptorBuilder, Sha256Digest}; + use std::{path::PathBuf, str::FromStr}; fn get_manifest_path() -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test/data/artifact_manifest.json") @@ -228,8 +228,10 @@ mod tests { .media_type(MediaType::Other("application/gzip".to_string())) .size(123u64) .digest( - "sha256:87923725d74f4bfb94c9e86d64170f7521aad8221a5de834851470ca142da630" - .to_string(), + Sha256Digest::from_str( + "87923725d74f4bfb94c9e86d64170f7521aad8221a5de834851470ca142da630", + ) + .unwrap(), ) .build() .unwrap(); @@ -237,8 +239,10 @@ mod tests { .media_type(MediaType::ImageManifest) .size(1234u64) .digest( - "sha256:cc06a2839488b8bd2a2b99dcdc03d5cfd818eed72ad08ef3cc197aac64c0d0a0" - .to_string(), + Sha256Digest::from_str( + "cc06a2839488b8bd2a2b99dcdc03d5cfd818eed72ad08ef3cc197aac64c0d0a0", + ) + .unwrap(), ) .build() .unwrap(); diff --git a/src/image/descriptor.rs b/src/image/descriptor.rs index e749ed52e7..78a790f65b 100644 --- a/src/image/descriptor.rs +++ b/src/image/descriptor.rs @@ -1,4 +1,4 @@ -use super::{Arch, MediaType, Os}; +use super::{Arch, Digest, MediaType, Os}; use crate::error::OciSpecError; use derive_builder::Builder; use getset::{CopyGetters, Getters, Setters}; @@ -30,7 +30,7 @@ pub struct Descriptor { /// content SHOULD be verified against this digest when consumed via /// untrusted sources. #[getset(get = "pub", set = "pub")] - digest: String, + digest: Digest, /// This REQUIRED property specifies the size, in bytes, of the raw /// content. This property exists so that a client will have an /// expected size for the content before processing. If the @@ -133,7 +133,7 @@ pub struct Platform { impl Descriptor { /// Construct a new descriptor with the required fields. - pub fn new(media_type: MediaType, size: u64, digest: impl Into) -> Self { + pub fn new(media_type: MediaType, size: u64, digest: impl Into) -> Self { Self { media_type, size, @@ -145,10 +145,17 @@ impl Descriptor { data: Default::default(), } } + + /// Return a view of [`Self::digest()`] that has been parsed as a valid SHA-256 digest. + pub fn digest_sha256(&self) -> crate::Result { + super::Sha256Digest::try_from(self.digest().clone()) + } } #[cfg(test)] mod tests { + use std::str::FromStr; + use super::*; #[test] @@ -163,7 +170,10 @@ mod tests { assert_eq!(descriptor.media_type, MediaType::ImageManifest); assert_eq!( descriptor.digest, - "sha256:c2b8beca588702777e5f35dafdbeae9ec16c2bab802331f81cacd2a92f1d5356" + Digest::from_str( + "sha256:c2b8beca588702777e5f35dafdbeae9ec16c2bab802331f81cacd2a92f1d5356" + ) + .unwrap() ); assert_eq!(descriptor.size, 769); assert_eq!( @@ -178,4 +188,15 @@ mod tests { MediaType::Other("application/spdx+json".to_string()) ); } + + #[test] + fn test_malformed_digest() { + let descriptor_str = r#"{ + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "digest":"../blah:this-is-an-attack", + "size":769, + "annotations":{"org.opencontainers.image.created": "2023-10-11T22:37:26Z"}, + "artifactType":"application/spdx+json"}"#; + assert!(serde_json::from_str::(descriptor_str).is_err()); + } } diff --git a/src/image/digest.rs b/src/image/digest.rs new file mode 100644 index 0000000000..b8ffa823aa --- /dev/null +++ b/src/image/digest.rs @@ -0,0 +1,264 @@ +//! Functionality corresponding to . + +use std::str::FromStr; + +/// The well-known identifier for a SHA-256 digest. +/// At this point, no one is really using alternative digests like sha512, so we +/// don't yet try to expose them here in a higher level way. +const ALG_SHA256: &str = "sha256"; + +fn char_is_lowercase_ascii_hex(c: char) -> bool { + matches!(c, '0'..='9' | 'a'..='f') +} + +/// algorithm-component ::= [a-z0-9]+ +fn char_is_algorithm_component(c: char) -> bool { + matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9') +} + +/// encoded ::= [a-zA-Z0-9=_-]+ +fn char_is_encoded(c: char) -> bool { + char_is_algorithm_component(c) || matches!(c, '=' | '_' | '-') +} + +/// A parsed pair of algorithm:digest as defined +/// by +/// +/// ``` +/// # fn main() -> Result<(), Box> { +/// use std::str::FromStr; +/// use oci_spec::image::Digest; +/// let d = Digest::from_str("sha256:6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b")?; +/// assert_eq!(d.algorithm(), "sha256"); +/// assert_eq!(d.value(), "6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b"); +/// let d = Digest::from_str("multihash+base58:QmRZxt2b1FVZPNqd8hsiykDL3TdBDeTSPX9Kv46HmX4Gx8")?; +/// assert_eq!(d.algorithm(), "multihash+base58"); +/// assert_eq!(d.value(), "QmRZxt2b1FVZPNqd8hsiykDL3TdBDeTSPX9Kv46HmX4Gx8"); +/// # Ok(()) +/// # } +/// ``` + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Digest { + /// The underlying buffer + buf: Box, + /// The byte offset of the `:` + split: usize, +} + +impl<'de> serde::Deserialize<'de> for Digest { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Self::from_str(&s).map_err(serde::de::Error::custom) + } +} + +impl serde::ser::Serialize for Digest { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(&self.buf) + } +} + +impl Digest { + const ALGORITHM_SEPARATOR: &'static [char] = &['+', '.', '_', '-']; + /// The algorithm name (e.g. sha256, sha512) + pub fn algorithm(&self) -> &str { + &self.buf[0..self.split] + } + + /// The algorithm component (lowercase hexadecimal) + pub fn value(&self) -> &str { + &self.buf[self.split + 1..] + } + + /// View this digest as a valid SHA-256 digest, or return an error. + pub fn to_sha256(&self) -> crate::Result { + Sha256Digest::try_from(self.clone()) + } +} + +impl FromStr for Digest { + type Err = crate::OciSpecError; + + fn from_str(s: &str) -> Result { + let Some(split) = s.find(':') else { + return Err(crate::OciSpecError::Other("missing ':' in digest".into())); + }; + let (algorithm, value) = s.split_at(split); + let value = &value[1..]; + + // algorithm ::= algorithm-component (algorithm-separator algorithm-component)* + let algorithm_parts = algorithm.split(Self::ALGORITHM_SEPARATOR); + for part in algorithm_parts { + if part.is_empty() { + return Err(crate::OciSpecError::Other( + "Empty algorithm component".into(), + )); + } + if !part.chars().all(char_is_algorithm_component) { + return Err(crate::OciSpecError::Other(format!( + "Invalid algorithm component: {part}" + ))); + } + } + + if value.is_empty() { + return Err(crate::OciSpecError::Other("Empty algorithm value".into())); + } + if !value.chars().all(char_is_encoded) { + return Err(crate::OciSpecError::Other(format!( + "Invalid encoded value {value}" + ))); + } + + Ok(Self { + buf: s.into(), + split, + }) + } +} + +/// A SHA-256 digest; this can only be constructed by first parsing a [`Digest`]. +/// +/// ``` +/// # fn main() -> Result<(), Box> { +/// use std::str::FromStr; +/// use oci_spec::image::{Digest, Sha256Digest}; +/// let d = Digest::from_str("sha256:6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b")?; +/// assert_eq!(d.algorithm(), "sha256"); +/// assert_eq!(d.value(), "6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b"); +/// let d = Sha256Digest::try_from(d)?; +/// assert_eq!(d.value(), "6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b"); +/// // But other digests will fail to be converted +/// let d = Digest::from_str("multihash+base58:QmRZxt2b1FVZPNqd8hsiykDL3TdBDeTSPX9Kv46HmX4Gx8")?; +/// assert!(Sha256Digest::try_from(d).is_err()); +/// # Ok(()) +/// # } +/// ``` + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Sha256Digest { + /// The underlying SHA-256 digest, guaranteed to be 64 lowercase hexadecimal ASCII characters. + value: Box, +} + +impl From for Digest { + fn from(value: Sha256Digest) -> Self { + Self { + buf: format!("sha256:{}", value.value).into_boxed_str(), + split: 6, + } + } +} + +impl TryFrom for Sha256Digest { + type Error = crate::OciSpecError; + + fn try_from(algdigest: Digest) -> Result { + match algdigest.algorithm() { + ALG_SHA256 => {} + o => { + return Err(crate::OciSpecError::Other(format!( + "Expected algorithm {ALG_SHA256} but found {o}" + ))) + } + } + + Self::from_str(algdigest.value()) + } +} + +impl FromStr for Sha256Digest { + type Err = crate::OciSpecError; + + fn from_str(digest: &str) -> Result { + let is_all_hex = digest.chars().all(char_is_lowercase_ascii_hex); + if !(digest.len() == 64 && is_all_hex) { + return Err(crate::OciSpecError::Other(format!( + "Invalid SHA-256 digest: {digest}" + ))); + } + Ok(Self { + value: digest.into(), + }) + } +} + +impl Sha256Digest { + /// The underlying SHA-256 digest, guaranteed to be 64 lowercase hexadecimal ASCII characters. + pub fn value(&self) -> &str { + &self.value + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_digest_invalid() { + let invalid = [ + "", + "foo", + ":", + "blah+", + "_digest:somevalue", + ":blah", + "blah:", + "^:foo", + "bar^baz:blah", + "sha256:123456*78", + ]; + for case in invalid { + assert!( + Digest::from_str(case).is_err(), + "Should have failed to parse: {case}" + ) + } + } + + const VALID_DIGEST_SHA256: &str = + "sha256:6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b"; + + #[test] + fn test_digest_valid() { + let cases = ["foo:bar", "sha256:blah", "sha512:12345"]; + for case in cases { + Digest::from_str(case).unwrap(); + } + + let d = Digest::from_str("multihash+base58:QmRZxt2b1FVZPNqd8hsiykDL3TdBDeTSPX9Kv46HmX4Gx8") + .unwrap(); + assert_eq!(d.algorithm(), "multihash+base58"); + assert_eq!(d.value(), "QmRZxt2b1FVZPNqd8hsiykDL3TdBDeTSPX9Kv46HmX4Gx8"); + } + + #[test] + fn test_sha256_invalid() { + let invalid = [ + "sha256:123456=78", + "foo:bar", + "sha256+blah:6c3c624b58dbbcd3c0dd82b4c53f04194d1247c6eebdaab7c610cf7d66709b3b", + ]; + for case in invalid { + let d = Digest::from_str(case).unwrap(); + assert!( + Sha256Digest::try_from(d).is_err(), + "Should have failed to parse: {case}" + ) + } + } + + #[test] + fn test_sha256_valid() { + let d = Digest::from_str(VALID_DIGEST_SHA256).unwrap(); + let d = d.to_sha256().unwrap(); + assert_eq!(d.value(), VALID_DIGEST_SHA256.split_once(':').unwrap().1); + } +} diff --git a/src/image/index.rs b/src/image/index.rs index 569c25faaa..fa4a0b7027 100644 --- a/src/image/index.rs +++ b/src/image/index.rs @@ -229,16 +229,22 @@ impl Display for ImageIndex { #[cfg(test)] mod tests { + use std::str::FromStr; use std::{fs, path::PathBuf}; use super::*; - use crate::image::{Arch, Os}; + use crate::image::{Arch, Os, Sha256Digest}; use crate::image::{DescriptorBuilder, PlatformBuilder}; fn create_index() -> ImageIndex { let ppc_manifest = DescriptorBuilder::default() .media_type(MediaType::ImageManifest) - .digest("sha256:e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f") + .digest( + Sha256Digest::from_str( + "e692418e4cbaf90ca69d05a66403747baa33ee08806650b51fab815ad7fc331f", + ) + .unwrap(), + ) .size(7143u64) .platform( PlatformBuilder::default() @@ -252,7 +258,12 @@ mod tests { let amd64_manifest = DescriptorBuilder::default() .media_type(MediaType::ImageManifest) - .digest("sha256:5b0bcabd1ed22e9fb1310cf6c2dec7cdef19f0ad69efa1f392e94a4333501270") + .digest( + Sha256Digest::from_str( + "5b0bcabd1ed22e9fb1310cf6c2dec7cdef19f0ad69efa1f392e94a4333501270", + ) + .unwrap(), + ) .size(7682u64) .platform( PlatformBuilder::default() diff --git a/src/image/manifest.rs b/src/image/manifest.rs index b47a1b3312..0813720780 100644 --- a/src/image/manifest.rs +++ b/src/image/manifest.rs @@ -241,10 +241,10 @@ impl Display for ImageManifest { #[cfg(test)] mod tests { - use std::{fs, path::PathBuf}; + use std::{fs, path::PathBuf, str::FromStr}; use super::*; - use crate::image::DescriptorBuilder; + use crate::image::{DescriptorBuilder, Sha256Digest}; fn create_manifest() -> ImageManifest { use crate::image::SCHEMA_VERSION; @@ -252,22 +252,27 @@ mod tests { let config = DescriptorBuilder::default() .media_type(MediaType::ImageConfig) .size(7023u64) - .digest("sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7") + .digest( + Sha256Digest::from_str( + "b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7", + ) + .unwrap(), + ) .build() .expect("build config descriptor"); let layers: Vec = [ ( 32654u64, - "sha256:9834876dcfb05cb167a5c24953eba58c4ac89b1adf57f28f2f9d09af107ee8f0", + "9834876dcfb05cb167a5c24953eba58c4ac89b1adf57f28f2f9d09af107ee8f0", ), ( 16724, - "sha256:3c3a4604a545cdc127456d94e421cd355bca5b528f4a9c1905b15da2eb4a4c6b", + "3c3a4604a545cdc127456d94e421cd355bca5b528f4a9c1905b15da2eb4a4c6b", ), ( 73109, - "sha256:ec4b8955958665577945c89419d1af06b5f7636b4ac3da7f12184802ad867736", + "ec4b8955958665577945c89419d1af06b5f7636b4ac3da7f12184802ad867736", ), ] .iter() @@ -275,7 +280,7 @@ mod tests { DescriptorBuilder::default() .media_type(MediaType::ImageLayerGzip) .size(l.0) - .digest(l.1.to_owned()) + .digest(Sha256Digest::from_str(l.1).unwrap()) .build() .expect("build layer") }) diff --git a/src/image/mod.rs b/src/image/mod.rs index 334dd99e25..f260cb1e4e 100644 --- a/src/image/mod.rs +++ b/src/image/mod.rs @@ -4,6 +4,7 @@ mod annotations; mod artifact; mod config; mod descriptor; +mod digest; mod index; mod manifest; mod oci_layout; @@ -17,6 +18,7 @@ pub use annotations::*; pub use artifact::*; pub use config::*; pub use descriptor::*; +pub use digest::*; pub use index::*; pub use manifest::*; pub use oci_layout::*;