Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial support for compressed data blobs with zstd compression #83

Conversation

ariel-miculas
Copy link
Collaborator

Fixes #14

@hallyn
Copy link
Contributor

hallyn commented Apr 4, 2023

hm, not sure if this is a result of this patch, but i see that when I create a new fs, the index.json has:

      "fs_verity_digest": [
        72,
        188,
        196,
        180,
        57,
        215,
        11,
        70,
        53,
        77,
        131,
        29,
        3,
        28,
        195,
        163,
        219,
        236,
        102,
        26,
        128,
        244,
        122,
        78,
        89,
        196,
        91,
        58,
        145,
        162,
        138,
        32
      ],

instead of all one string like

      "annotations": {
        "io.stackeroci.stacker.squashfs_verity_root_hash": "6fb77b2e33699df89e7df475944aa5d2eeb356e8070e05fd4db273773ac460b4"
      }

@hallyn
Copy link
Contributor

hallyn commented Apr 4, 2023

Other than that it looks good. It won't auto-merge, so I'll hit approve.

@hallyn hallyn self-requested a review April 4, 2023 02:19
hallyn
hallyn previously approved these changes Apr 4, 2023
@ariel-miculas
Copy link
Collaborator Author

That's definitely not intended behavior, fs_verity shouldn't appear in index.json

Default to no compression in puzzlefs builds and zstd compression in
tests.

This commit adds a new field in BlobRef which stores whether the blob is
compressed or not. Per blob information is useful if we want to skip
compressing the blob in cases where the compressed version has a larger
size than the uncompressed version (e.g. when the blob is already
compressed or it has a high enough entropy that it cannot be compressed
further).

Signed-off-by: Ariel Miculas <[email protected]>
The main reason for this change is to be able to use std::io::Cursor, an
in-memory buffer which can be used as a replacement for File. This is
useful because we want to compress blobs in-memory and only write them
to disk if they take up less space than the uncompressed blob.

Signed-off-by: Ariel Miculas <[email protected]>
@ariel-miculas
Copy link
Collaborator Author

Diff:

diff --git a/builder/src/lib.rs b/builder/src/lib.rs
index d3b6d11..70323a6 100644
--- a/builder/src/lib.rs
+++ b/builder/src/lib.rs
@@ -95,12 +95,13 @@ fn process_chunks<C: for<'a> Compression<'a> + Any>(
         let chunk = result.unwrap();
         let mut chunk_used: u64 = 0;
 
-        let desc = oci.put_blob::<C, media_types::Chunk>(&chunk.data)?;
+        let (desc, fs_verity_digest, compressed) =
+            oci.put_blob::<C, media_types::Chunk>(&chunk.data)?;
         let blob_kind = BlobRefKind::Other {
             digest: desc.digest.underlying(),
         };
 
-        let verity_hash = desc.fs_verity_digest;
+        let verity_hash = fs_verity_digest;
         verity_data.insert(desc.digest.underlying(), verity_hash);
 
         while chunk_used < chunk.length as u64 {
@@ -112,7 +113,7 @@ fn process_chunks<C: for<'a> Compression<'a> + Any>(
             let blob = BlobRef {
                 offset: chunk_used,
                 kind: blob_kind,
-                compressed: desc.compressed,
+                compressed,
             };
 
             file.as_mut()
@@ -455,7 +456,7 @@ fn build_delta<C: for<'a> Compression<'a> + Any>(
     md_buf.append(&mut files_buf);
     md_buf.append(&mut others_buf);
 
-    let desc = oci.put_blob::<compression::Noop, media_types::Inodes>(md_buf.as_slice())?;
+    let (desc, ..) = oci.put_blob::<compression::Noop, media_types::Inodes>(md_buf.as_slice())?;
     let verity_hash = get_fs_verity_digest(md_buf.as_slice())?;
     verity_data.insert(desc.digest.underlying(), verity_hash);
 
@@ -486,7 +487,9 @@ pub fn build_initial_rootfs<C: for<'a> Compression<'a> + Any>(
             manifest_version: PUZZLEFS_IMAGE_MANIFEST_VERSION,
         },
     )?;
-    oci.put_blob::<compression::Noop, media_types::Rootfs>(rootfs_buf.as_slice())
+    Ok(oci
+        .put_blob::<compression::Noop, media_types::Rootfs>(rootfs_buf.as_slice())?
+        .0)
 }
 
 // add_rootfs_delta adds whatever the delta between the current rootfs and the puzzlefs
@@ -524,7 +527,8 @@ pub fn add_rootfs_delta<C: for<'a> Compression<'a> + Any>(
     let mut rootfs_buf = Vec::new();
     serde_cbor::to_writer(&mut rootfs_buf, &rootfs)?;
     Ok((
-        oci.put_blob::<compression::Noop, media_types::Rootfs>(rootfs_buf.as_slice())?,
+        oci.put_blob::<compression::Noop, media_types::Rootfs>(rootfs_buf.as_slice())?
+            .0,
         oci,
     ))
 }
diff --git a/oci/src/descriptor.rs b/oci/src/descriptor.rs
index 7f80f42..91940b8 100644
--- a/oci/src/descriptor.rs
+++ b/oci/src/descriptor.rs
@@ -11,25 +11,15 @@ pub struct Descriptor {
     pub size: u64,
     pub media_type: String,
     pub annotations: HashMap<String, String>,
-    pub fs_verity_digest: [u8; SHA256_BLOCK_SIZE],
-    pub compressed: bool,
 }
 
 impl Descriptor {
-    pub fn new(
-        digest: [u8; 32],
-        size: u64,
-        media_type: String,
-        fs_verity_digest: [u8; SHA256_BLOCK_SIZE],
-        compressed: bool,
-    ) -> Descriptor {
+    pub fn new(digest: [u8; 32], size: u64, media_type: String) -> Descriptor {
         Descriptor {
             digest: Digest::new(&digest),
             size,
             media_type,
             annotations: HashMap::new(),
-            fs_verity_digest,
-            compressed,
         }
     }
 
diff --git a/oci/src/lib.rs b/oci/src/lib.rs
index 6b70b4a..01cb619 100644
--- a/oci/src/lib.rs
+++ b/oci/src/lib.rs
@@ -15,7 +15,7 @@ use sha2::{Digest as Sha2Digest, Sha256};
 use tempfile::NamedTempFile;
 
 use compression::{Compression, Decompressor};
-use format::{MetadataBlob, Result, Rootfs, VerityData, WireFormatError};
+use format::{MetadataBlob, Result, Rootfs, VerityData, WireFormatError, SHA256_BLOCK_SIZE};
 use openat::Dir;
 use std::io::{Error, ErrorKind};
 
@@ -89,7 +89,7 @@ impl Image {
     pub fn put_blob<C: for<'a> Compression<'a> + Any, MT: media_types::MediaType>(
         &self,
         buf: &[u8],
-    ) -> Result<Descriptor> {
+    ) -> Result<(Descriptor, [u8; SHA256_BLOCK_SIZE], bool)> {
         let mut compressed_data = Cursor::new(Vec::<u8>::new());
         let mut compressed = C::compress(&mut compressed_data)?;
         let mut hasher = Sha256::new();
@@ -117,13 +117,8 @@ impl Image {
         hasher.update(final_data);
         let digest = hasher.finalize();
         let media_type = C::append_extension(MT::name());
-        let descriptor = Descriptor::new(
-            digest.into(),
-            uncompressed_size,
-            media_type,
-            get_fs_verity_digest(final_data)?,
-            compressed_blob,
-        );
+        let descriptor = Descriptor::new(digest.into(), uncompressed_size, media_type);
+        let fs_verity_digest = get_fs_verity_digest(&compressed_data.get_ref()[..])?;
         let path = self.blob_path().join(descriptor.digest.to_string());
 
         // avoid replacing the data blob so we don't drop fsverity data
@@ -145,7 +140,7 @@ impl Image {
             tmp.write_all(final_data)?;
             tmp.persist(path).map_err(|e| e.error)?;
         }
-        Ok(descriptor)
+        Ok((descriptor, fs_verity_digest, compressed_blob))
     }
 
     fn open_raw_blob(&self, digest: &Digest, verity: Option<&[u8]>) -> io::Result<fs::File> {
@@ -269,7 +264,7 @@ mod tests {
     fn test_put_blob_correct_hash() {
         let dir = tempdir().unwrap();
         let image: Image = Image::new(dir.path()).unwrap();
-        let desc = image
+        let (desc, ..) = image
             .put_blob::<compression::Noop, media_types::Chunk>("meshuggah rocks".as_bytes())
             .unwrap();
 
@@ -291,7 +286,7 @@ mod tests {
     fn test_put_get_index() {
         let dir = tempdir().unwrap();
         let image = Image::new(dir.path()).unwrap();
-        let mut desc = image
+        let (mut desc, ..) = image
             .put_blob::<DefaultCompression, media_types::Chunk>("meshuggah rocks".as_bytes())
             .unwrap();
         desc.set_name("foo");

@hallyn hallyn self-requested a review April 6, 2023 14:13
@hallyn hallyn merged commit 60579df into project-machine:master Apr 6, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

implement compression
2 participants