Skip to content
This repository has been archived by the owner on Nov 7, 2024. It is now read-only.

feat: Add external input support for container encapsulation #652

Merged
merged 16 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ once_cell = "1.9"
libc = "0.2.92"
libsystemd = "0.7.0"
openssl = "0.10.33"
ocidir = "0.1.0"
ocidir = { version = "0.2.0", git = "https://github.com/hhd-dev/ocidir-rs" }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah...you have changes to that too!

The bump to 0.2 is in #653 at least.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you mind doing (again) at least a draft PR to the repo with hhd-dev/ocidir-rs@9f6095c and provide a bit of reproducer instructions around how skopeo is broken?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ohh I see it's in the PR text:

Violates the JSON standard by encoding the characters \n etc literally

Hmmmm...ok. I will look.

Copy link
Collaborator Author

@antheas antheas Aug 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://docs.rs/olpc-cjson/latest/olpc_cjson/

(specifically, ASCII control characters 0x00–0x1f are printed literally, which is not valid JSON). Therefore, serde_json cannot necessarily deserialize JSON produced by this formatter.

error: Querying manifest after push: Fetching manifest: Failed to invoke skopeo proxy method GetManifest: remote error: invalid character '\n' in string literal

Hopefully this helps

Only occurs if \n is included in a label. It would have been nice to generate fancy descriptions. Although the only place that reads them (ghcr) omits \n so new lines are not shown.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, right thanks. This is probably then best tracked at containers/ocidir-rs#10 ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, since I do not think my fork is a proper fix it did not make sense to PR it.

pin-project = "1.0"
regex = "1.5.4"
rustix = { version = "0.38", features = ["fs", "process"] }
Expand All @@ -50,6 +50,7 @@ tokio-util = { features = ["io-util"], version = "0.7" }
tokio-stream = { features = ["sync"], version = "0.1.8" }
tracing = "0.1"
zstd = { version = "0.13.1", features = ["pkg-config"] }
indexmap = { version = "2.2.6", features = ["serde"] }

indoc = { version = "2", optional = true }
xshell = { version = "0.2", optional = true }
Expand Down
21 changes: 12 additions & 9 deletions lib/src/chunking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT

use std::borrow::{Borrow, Cow};
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Write;
use std::hash::{Hash, Hasher};
use std::num::NonZeroU32;
Expand All @@ -19,6 +19,7 @@ use camino::Utf8PathBuf;
use containers_image_proxy::oci_spec;
use gvariant::aligned_bytes::TryAsAligned;
use gvariant::{Marker, Structure};
use indexmap::IndexMap;
use ostree::{gio, glib};
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -53,9 +54,9 @@ pub(crate) struct Chunk {
pub struct ObjectSourceMetaSized {
/// The original metadata
#[serde(flatten)]
meta: ObjectSourceMeta,
pub meta: ObjectSourceMeta,
/// Total size of associated objects
size: u64,
pub size: u64,
}

impl Hash for ObjectSourceMetaSized {
Expand Down Expand Up @@ -89,7 +90,7 @@ impl ObjectMetaSized {
let map = meta.map;
let mut set = meta.set;
// Maps content id -> total size of associated objects
let mut sizes = HashMap::<&str, u64>::new();
let mut sizes = BTreeMap::<&str, u64>::new();
// Populate two mappings above, iterating over the object -> contentid mapping
for (checksum, contentid) in map.iter() {
let finfo = repo.query_file(checksum, cancellable)?.0;
Expand Down Expand Up @@ -308,7 +309,7 @@ impl Chunking {
}

// Reverses `contentmeta.map` i.e. contentid -> Vec<checksum>
let mut rmap = HashMap::<ContentID, Vec<&String>>::new();
let mut rmap = IndexMap::<ContentID, Vec<&String>>::new();
for (checksum, contentid) in meta.map.iter() {
rmap.entry(Rc::clone(contentid)).or_default().push(checksum);
}
Expand Down Expand Up @@ -577,12 +578,12 @@ fn basic_packing_with_prior_build<'a>(
let mut curr_build = curr_build?;

// View the packages as unordered sets for lookups and differencing
let prev_pkgs_set: HashSet<String> = curr_build
let prev_pkgs_set: BTreeSet<String> = curr_build
.iter()
.flat_map(|v| v.iter().cloned())
.filter(|name| !name.is_empty())
.collect();
let curr_pkgs_set: HashSet<String> = components
let curr_pkgs_set: BTreeSet<String> = components
.iter()
.map(|pkg| pkg.meta.name.to_string())
.collect();
Expand All @@ -597,13 +598,13 @@ fn basic_packing_with_prior_build<'a>(
}

// Handle removed packages
let removed: HashSet<&String> = prev_pkgs_set.difference(&curr_pkgs_set).collect();
let removed: BTreeSet<&String> = prev_pkgs_set.difference(&curr_pkgs_set).collect();
for bin in curr_build.iter_mut() {
bin.retain(|pkg| !removed.contains(pkg));
}

// Handle updated packages
let mut name_to_component: HashMap<String, &ObjectSourceMetaSized> = HashMap::new();
let mut name_to_component: BTreeMap<String, &ObjectSourceMetaSized> = BTreeMap::new();
for component in components.iter() {
name_to_component
.entry(component.meta.name.to_string())
Expand Down Expand Up @@ -821,6 +822,8 @@ mod test {
}

fn create_manifest(prev_expected_structure: Vec<Vec<&str>>) -> oci_spec::image::ImageManifest {
use std::collections::HashMap;

let mut p = prev_expected_structure
.iter()
.map(|b| {
Expand Down
85 changes: 81 additions & 4 deletions lib/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,28 @@ use cap_std_ext::cap_std;
use cap_std_ext::prelude::CapStdExtDirExt;
use clap::{Parser, Subcommand};
use fn_error_context::context;
use indexmap::IndexMap;
use io_lifetimes::AsFd;
use ostree::{gio, glib};
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::ffi::OsString;
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::num::NonZeroU32;
use std::path::PathBuf;
use std::process::Command;
use tokio::sync::mpsc::Receiver;

use crate::chunking::{ObjectMetaSized, ObjectSourceMetaSized};
use crate::commit::container_commit;
use crate::container::store::{ExportToOCIOpts, ImportProgress, LayerProgress, PreparedImport};
use crate::container::{self as ostree_container, ManifestDiff};
use crate::container::{Config, ImageReference, OstreeImageReference};
use crate::objectsource::ObjectSourceMeta;
use crate::sysroot::SysrootLock;
use ostree_container::store::{ImageImporter, PrepareResult};
use serde::{Deserialize, Serialize};

/// Parse an [`OstreeImageReference`] from a CLI arguemnt.
pub fn parse_imgref(s: &str) -> Result<OstreeImageReference> {
Expand Down Expand Up @@ -165,6 +170,10 @@ pub(crate) enum ContainerOpts {
/// Compress at the fastest level (e.g. gzip level 1)
#[clap(long)]
compression_fast: bool,

/// Path to a JSON-formatted content meta object.
#[clap(long)]
contentmeta: Option<Utf8PathBuf>,
},

/// Perform build-time checking and canonicalization.
Expand Down Expand Up @@ -699,6 +708,19 @@ async fn container_import(
Ok(())
}

/// Grouping of metadata about an object.
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct RawMeta {
/// When the image was created. Sync it with the io.container.image.created label.
pub created: Option<String>,
/// Top level labels, to be prefixed to the ones with --label
pub labels: Option<BTreeMap<String, String>>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK having these here, but it seems like it'd make more sense for them to be separate CLI arguments?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its valuable to have 2 sources for this, as currently rechunk will also generate the labels and the created tag and harmonize them. Since it pulls the rpm database it can do fancy stuff such as variable substitution. So this acts as a way of passing them through the 2 commands cleanly.

Having to refeed them into arguments would be hell.

I expect if anyone else tried to extend this they would agree.

Here is how the action example for the layers looks right now:
https://github.com/hhd-dev/rechunk/blob/496f4b84aced656b9c2c0f176f24323fe13129ad/.github/workflows/online_test_deck.yml#L51-L93

CLI may override the file.

/// ContentId to layer annotation
pub layers: IndexMap<String, String>,
/// OSTree hash to layer ContentId
pub mapping: IndexMap<String, String>,
antheas marked this conversation as resolved.
Show resolved Hide resolved
}

/// Export a container image with an encapsulated ostree commit.
#[allow(clippy::too_many_arguments)]
async fn container_export(
Expand All @@ -712,22 +734,75 @@ async fn container_export(
container_config: Option<Utf8PathBuf>,
cmd: Option<Vec<String>>,
compression_fast: bool,
contentmeta: Option<Utf8PathBuf>,
) -> Result<()> {
let config = Config {
labels: Some(labels),
cmd,
};
let container_config = if let Some(container_config) = container_config {
serde_json::from_reader(File::open(container_config).map(BufReader::new)?)?
} else {
None
};

let mut contentmeta_data = None;
let mut created = None;
let mut labels = labels.clone();
if let Some(contentmeta) = contentmeta {
let raw: Option<RawMeta> =
antheas marked this conversation as resolved.
Show resolved Hide resolved
serde_json::from_reader(File::open(contentmeta).map(BufReader::new)?)?;
if let Some(raw) = raw {
created = raw.created;

contentmeta_data = Some(ObjectMetaSized {
map: raw
.mapping
.into_iter()
.map(|(k, v)| (k.into(), v.into()))
.collect(),
sizes: raw
.layers
.into_iter()
.map(|(k, v)| ObjectSourceMetaSized {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't we just directly parse this data from the input JSON?

Copy link
Collaborator Author

@antheas antheas Aug 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The use of this fork only reuses ostree-rs-ext as an exporter. If we wanted to use it with its grouping algorithm, it would make sense to do it that.

Right now this section is fighting with the existing code to make it work as an exporter and by exposing a cleaner API for the json file.

meta: ObjectSourceMeta {
identifier: k.clone().into(),
name: v.into(),
srcid: k.clone().into(),
change_frequency: if k == "unpackaged" { std::u32::MAX } else { 1 },
change_time_offset: 1,
},
size: 1,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Size of 1 seems odd here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This section is what was required to make it work without restructuring the code.

A proper implementation would remove this, incl. the dangling "Reserved for New Packages" layer at the end.

})
.collect(),
});
// Allow --label to override labels from the content metadata
if let Some(raw_labels) = raw.labels {
labels = raw_labels.into_iter().chain(labels.into_iter()).collect();
};
antheas marked this conversation as resolved.
Show resolved Hide resolved
} else {
anyhow::bail!("Content metadata must be a JSON object")
}
}

// Use enough layers so that each package ends in its own layer
// while respecting the layer ordering.
let max_layers = if let Some(contentmeta_data) = &contentmeta_data {
NonZeroU32::new((contentmeta_data.sizes.len() + 1).try_into().unwrap())
} else {
None
};

let config = Config {
labels: Some(labels),
cmd,
};

let opts = crate::container::ExportOpts {
copy_meta_keys,
copy_meta_opt_keys,
container_config,
authfile,
skip_compression: compression_fast, // TODO rename this in the struct at the next semver break
contentmeta: contentmeta_data.as_ref(),
max_layers,
created,
..Default::default()
};
let pushed = crate::container::encapsulate(repo, rev, &config, Some(opts), imgref).await?;
Expand Down Expand Up @@ -958,6 +1033,7 @@ async fn run_from_opt(opt: Opt) -> Result<()> {
config,
cmd,
compression_fast,
contentmeta,
} => {
let labels: Result<BTreeMap<_, _>> = labels
.into_iter()
Expand All @@ -980,6 +1056,7 @@ async fn run_from_opt(opt: Opt) -> Result<()> {
config,
cmd,
compression_fast,
contentmeta,
)
.await
}
Expand Down
22 changes: 16 additions & 6 deletions lib/src/container/encapsulate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,16 +186,19 @@ fn build_oci(

let mut ctrcfg = opts.container_config.clone().unwrap_or_default();
let mut imgcfg = oci_image::ImageConfiguration::default();
imgcfg.set_created(Some(
commit_timestamp.format("%Y-%m-%dT%H:%M:%SZ").to_string(),
));
let labels = ctrcfg.labels_mut().get_or_insert_with(Default::default);

let created_at = opts
.created
.clone()
.unwrap_or_else(|| commit_timestamp.format("%Y-%m-%dT%H:%M:%SZ").to_string());
imgcfg.set_created(Some(created_at));
let mut labels = HashMap::new();

commit_meta_to_labels(
&commit_meta,
opts.copy_meta_keys.iter().map(|k| k.as_str()),
opts.copy_meta_opt_keys.iter().map(|k| k.as_str()),
labels,
&mut labels,
)?;

let mut manifest = ocidir::new_empty_manifest().build().unwrap();
Expand Down Expand Up @@ -244,7 +247,7 @@ fn build_oci(
writer,
&mut manifest,
&mut imgcfg,
labels,
&mut labels,
chunking,
&opts,
&description,
Expand All @@ -261,9 +264,14 @@ fn build_oci(
ctrcfg.set_cmd(Some(cmd.clone()));
}

ctrcfg
.labels_mut()
.get_or_insert_with(Default::default)
.extend(labels.clone());
imgcfg.set_config(Some(ctrcfg));
let ctrcfg = writer.write_config(imgcfg)?;
manifest.set_config(ctrcfg);
manifest.set_annotations(Some(labels));
let platform = oci_image::Platform::default();
if let Some(tag) = tag {
writer.insert_manifest(manifest, Some(tag), platform)?;
Expand Down Expand Up @@ -375,6 +383,8 @@ pub struct ExportOpts<'m, 'o> {
/// Metadata mapping between objects and their owning component/package;
/// used to optimize packing.
pub contentmeta: Option<&'o ObjectMetaSized>,
/// Sets the created tag in the image manifest.
pub created: Option<String>,
}

impl<'m, 'o> ExportOpts<'m, 'o> {
Expand Down
2 changes: 1 addition & 1 deletion lib/src/container/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1310,7 +1310,7 @@ pub(crate) fn export_to_oci(
let compression = opts.skip_compression.then_some(Compression::none());
for (i, layer) in remaining_layers.iter().enumerate() {
let layer_ref = &ref_for_layer(layer)?;
let mut target_blob = dest_oci.create_raw_layer(compression)?;
let mut target_blob = dest_oci.create_gzip_layer(compression)?;
// Sadly the libarchive stuff isn't exposed via Rust due to type unsafety,
// so we'll just fork off the CLI.
let repo_dfd = repo.dfd_borrow();
Expand Down
2 changes: 1 addition & 1 deletion lib/src/container/update_detachedmeta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ pub async fn update_detached_metadata(
// Create tar streams for source and destination
let src_layer = BufReader::new(tempsrc.read_blob(commit_layer)?);
let mut src_layer = flate2::read::GzDecoder::new(src_layer);
let mut out_layer = BufWriter::new(tempsrc.create_raw_layer(None)?);
let mut out_layer = BufWriter::new(tempsrc.create_gzip_layer(None)?);

// Process the tar stream and inject our new detached metadata
crate::tar::update_detached_metadata(
Expand Down
6 changes: 3 additions & 3 deletions lib/src/integrationtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use containers_image_proxy::oci_spec;
use fn_error_context::context;
use gio::prelude::*;
use oci_spec::image as oci_image;
use ocidir::RawLayerWriter;
use ocidir::GzipLayerWriter;
use ostree::gio;
use xshell::cmd;

Expand Down Expand Up @@ -58,7 +58,7 @@ pub fn generate_derived_oci_from_tar<F>(
tag: Option<&str>,
) -> Result<()>
where
F: FnOnce(&mut RawLayerWriter) -> Result<()>,
F: FnOnce(&mut GzipLayerWriter) -> Result<()>,
{
let src = src.as_ref();
let src = Dir::open_ambient_dir(src, cap_std::ambient_authority())?;
Expand All @@ -67,7 +67,7 @@ where
let mut manifest = src.read_manifest()?;
let mut config: oci_spec::image::ImageConfiguration = src.read_json_blob(manifest.config())?;

let mut bw = src.create_raw_layer(None)?;
let mut bw = src.create_gzip_layer(None)?;
f(&mut bw)?;
let new_layer = bw.complete()?;

Expand Down
5 changes: 3 additions & 2 deletions lib/src/objectsource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
//! This is used to help split up containers into distinct layers.

use std::borrow::Borrow;
use std::collections::{BTreeMap, HashSet};
use std::collections::HashSet;
use indexmap::IndexMap;
use std::hash::Hash;
use std::rc::Rc;

Expand Down Expand Up @@ -78,7 +79,7 @@ impl Borrow<str> for ObjectSourceMeta {
pub type ObjectMetaSet = HashSet<ObjectSourceMeta>;

/// Maps from an ostree content object digest to the `ContentSet` key.
pub type ObjectMetaMap = BTreeMap<String, ContentID>;
pub type ObjectMetaMap = IndexMap<String, ContentID>;

/// Grouping of metadata about an object.
#[derive(Debug, Default)]
Expand Down
Loading