Skip to content

Commit

Permalink
Merge pull request #456 from RishabhSaini/issue/4012
Browse files Browse the repository at this point in the history
Improving the encapsulation (chunking) algorithm
  • Loading branch information
cgwalters committed May 15, 2023
2 parents 1b463a9 + 5159164 commit f7473b0
Show file tree
Hide file tree
Showing 11 changed files with 745 additions and 88 deletions.
630 changes: 564 additions & 66 deletions lib/src/chunking.rs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion lib/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ async fn container_export(
..Default::default()
};
let pushed =
crate::container::encapsulate(repo, rev, &config, Some(opts), None, imgref).await?;
crate::container::encapsulate(repo, rev, &config, None, Some(opts), None, imgref).await?;
println!("{}", pushed);
Ok(())
}
Expand Down
48 changes: 40 additions & 8 deletions lib/src/container/encapsulate.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! APIs for creating container images from OSTree commits

use super::ocidir::{Layer, OciDir};
use super::{ocidir, OstreeImageReference, Transport};
use super::{ocidir, OstreeImageReference, Transport, CONTENT_ANNOTATION};
use super::{ImageReference, SignatureSource, OSTREE_COMMIT_LABEL};
use crate::chunking::{Chunk, Chunking, ObjectMetaSized};
use crate::container::skopeo;
Expand Down Expand Up @@ -104,7 +104,7 @@ fn export_chunks(
ociw: &mut OciDir,
chunks: Vec<Chunk>,
opts: &ExportOpts,
) -> Result<Vec<(Layer, String)>> {
) -> Result<Vec<(Layer, String, Vec<String>)>> {
chunks
.into_iter()
.enumerate()
Expand All @@ -113,7 +113,7 @@ fn export_chunks(
ostree_tar::export_chunk(repo, commit, chunk.content, &mut w)
.with_context(|| format!("Exporting chunk {i}"))?;
let w = w.into_inner()?;
Ok((w.complete()?, chunk.name))
Ok((w.complete()?, chunk.name, chunk.packages))
})
.collect()
}
Expand Down Expand Up @@ -151,11 +151,20 @@ fn export_chunked(
.clone();

// Add the ostree layer
ociw.push_layer(manifest, imgcfg, ostree_layer, description);
ociw.push_layer(manifest, imgcfg, ostree_layer, description, None);
// Add the component/content layers
for (layer, name) in layers {
ociw.push_layer(manifest, imgcfg, layer, name.as_str());
for (layer, name, packages) in layers {
let mut annotation_component_layer = HashMap::new();
annotation_component_layer.insert(CONTENT_ANNOTATION.to_string(), packages.join(","));
ociw.push_layer(
manifest,
imgcfg,
layer,
name.as_str(),
Some(annotation_component_layer),
);
}

// This label (mentioned above) points to the last layer that is part of
// the ostree commit.
labels.insert(
Expand All @@ -167,13 +176,15 @@ fn export_chunked(

/// Generate an OCI image from a given ostree root
#[context("Building oci")]
#[allow(clippy::too_many_arguments)]
fn build_oci(
repo: &ostree::Repo,
rev: &str,
ocidir_path: &Path,
tag: Option<&str>,
config: &Config,
opts: ExportOpts,
prior_build: Option<&oci_image::ImageManifest>,
contentmeta: Option<crate::chunking::ObjectMetaSized>,
) -> Result<ImageReference> {
if !ocidir_path.exists() {
Expand Down Expand Up @@ -209,7 +220,15 @@ fn build_oci(
let mut manifest = ocidir::new_empty_manifest().build().unwrap();

let chunking = contentmeta
.map(|meta| crate::chunking::Chunking::from_mapping(repo, commit, meta, opts.max_layers))
.map(|meta| {
crate::chunking::Chunking::from_mapping(
repo,
commit,
meta,
&opts.max_layers,
prior_build,
)
})
.transpose()?;
// If no chunking was provided, create a logical single chunk.
let chunking = chunking
Expand Down Expand Up @@ -291,6 +310,7 @@ async fn build_impl(
repo: &ostree::Repo,
ostree_ref: &str,
config: &Config,
prior_build: Option<&oci_image::ImageManifest>,
opts: Option<ExportOpts>,
contentmeta: Option<ObjectMetaSized>,
dest: &ImageReference,
Expand All @@ -308,6 +328,7 @@ async fn build_impl(
tag,
config,
opts,
prior_build,
contentmeta,
)?;
None
Expand All @@ -323,6 +344,7 @@ async fn build_impl(
None,
config,
opts,
prior_build,
contentmeta,
)?;

Expand Down Expand Up @@ -377,9 +399,19 @@ pub async fn encapsulate<S: AsRef<str>>(
repo: &ostree::Repo,
ostree_ref: S,
config: &Config,
prior_build: Option<&oci_image::ImageManifest>,
opts: Option<ExportOpts>,
contentmeta: Option<ObjectMetaSized>,
dest: &ImageReference,
) -> Result<String> {
build_impl(repo, ostree_ref.as_ref(), config, opts, contentmeta, dest).await
build_impl(
repo,
ostree_ref.as_ref(),
config,
prior_build,
opts,
contentmeta,
dest,
)
.await
}
4 changes: 4 additions & 0 deletions lib/src/container/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ use std::str::FromStr;
/// The label injected into a container image that contains the ostree commit SHA-256.
pub const OSTREE_COMMIT_LABEL: &str = "ostree.commit";

/// The name of an annotation attached to a layer which names the packages/components
/// which are part of it.
pub(crate) const CONTENT_ANNOTATION: &str = "ostree.components";

/// Our generic catchall fatal error, expected to be converted
/// to a string to output to a terminal or logs.
type Result<T> = anyhow::Result<T>;
Expand Down
5 changes: 3 additions & 2 deletions lib/src/container/ocidir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ impl OciDir {
config: &mut oci_image::ImageConfiguration,
layer: Layer,
description: &str,
annotations: Option<HashMap<String, String>>,
) {
let annotations: Option<HashMap<String, String>> = None;
self.push_layer_annotated(manifest, config, layer, annotations, description);
}

Expand Down Expand Up @@ -531,7 +531,8 @@ mod tests {
let mut config = oci_image::ImageConfigurationBuilder::default()
.build()
.unwrap();
w.push_layer(&mut manifest, &mut config, root_layer, "root");
let annotations: Option<HashMap<String, String>> = None;
w.push_layer(&mut manifest, &mut config, root_layer, "root", annotations);
let config = w.write_config(config)?;
manifest.set_config(config);
w.replace_with_single_manifest(manifest.clone(), oci_image::Platform::default())?;
Expand Down
11 changes: 9 additions & 2 deletions lib/src/fixture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,9 @@ d tmp
"## };
pub const CONTENTS_CHECKSUM_V0: &str =
"5e41de82f9f861fa51e53ce6dd640a260e4fb29b7657f5a3f14157e93d2c0659";
pub static CONTENTS_V0_LEN: Lazy<usize> = Lazy::new(|| OWNERS.len().checked_sub(1).unwrap());
// 1 for ostree commit, 2 for max frequency packages, 3 as empty layer
pub const LAYERS_V0_LEN: usize = 3usize;
pub const PKGS_V0_LEN: usize = 7usize;

#[derive(Debug, PartialEq, Eq)]
enum SeLabel {
Expand Down Expand Up @@ -317,6 +319,7 @@ fn build_mapping_recurse(
name: Rc::clone(&owner),
srcid: Rc::clone(&owner),
change_time_offset: u32::MAX,
change_frequency: u32::MAX,
});
}

Expand Down Expand Up @@ -661,11 +664,15 @@ impl Fixture {
let contentmeta = self.get_object_meta().context("Computing object meta")?;
let contentmeta = ObjectMetaSized::compute_sizes(self.srcrepo(), contentmeta)
.context("Computing sizes")?;
let opts = ExportOpts::default();
let opts = ExportOpts {
max_layers: std::num::NonZeroU32::new(PKGS_V0_LEN as u32),
..Default::default()
};
let digest = crate::container::encapsulate(
self.srcrepo(),
self.testref(),
&config,
None,
Some(opts),
Some(contentmeta),
&imgref,
Expand Down
Binary file modified lib/src/fixtures/fedora-coreos-contentmeta.json.gz
Binary file not shown.
1 change: 1 addition & 0 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub mod objectsource;
pub(crate) mod objgv;
#[cfg(feature = "internal-testing-api")]
pub mod ostree_manual;
pub(crate) mod statistics;

mod utils;

Expand Down
6 changes: 3 additions & 3 deletions lib/src/objectsource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ pub struct ObjectSourceMeta {
/// Unique identifier, does not need to be human readable, but can be.
#[serde(with = "rcstr_serialize")]
pub identifier: ContentID,
/// Identifier for this source (e.g. package name-version, git repo).
/// Unlike the [`ContentID`], this should be human readable. It likely comes from an external source,
/// and may be re-serialized.
/// Just the name of the package (no version), needs to be human readable.
#[serde(with = "rcstr_serialize")]
pub name: Rc<str>,
/// Identifier for the *source* of this content; for example, if multiple binary
Expand All @@ -54,6 +52,8 @@ pub struct ObjectSourceMeta {
/// One suggested way to generate this number is to have it be in units of hours or days
/// since the earliest changed item.
pub change_time_offset: u32,
/// Change frequency
pub change_frequency: u32,
}

impl PartialEq for ObjectSourceMeta {
Expand Down
109 changes: 109 additions & 0 deletions lib/src/statistics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
//! This module holds implementations of some basic statistical properties, such as mean and standard deviation.

pub(crate) fn mean(data: &[u64]) -> Option<f64> {
if data.is_empty() {
None
} else {
Some(data.iter().sum::<u64>() as f64 / data.len() as f64)
}
}

pub(crate) fn std_deviation(data: &[u64]) -> Option<f64> {
match (mean(data), data.len()) {
(Some(data_mean), count) if count > 0 => {
let variance = data
.iter()
.map(|value| {
let diff = data_mean - (*value as f64);
diff * diff
})
.sum::<f64>()
/ count as f64;
Some(variance.sqrt())
}
_ => None,
}
}

//Assumed sorted
pub(crate) fn median_absolute_deviation(data: &mut [u64]) -> Option<(f64, f64)> {
if data.is_empty() {
None
} else {
//Sort data
//data.sort_by(|a, b| a.partial_cmp(b).unwrap());

//Find median of data
let median_data: f64 = match data.len() % 2 {
1 => data[data.len() / 2] as f64,
_ => 0.5 * (data[data.len() / 2 - 1] + data[data.len() / 2]) as f64,
};

//Absolute deviations
let mut absolute_deviations = Vec::new();
for size in data {
absolute_deviations.push(f64::abs(*size as f64 - median_data))
}

absolute_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
let l = absolute_deviations.len();
let mad: f64 = match l % 2 {
1 => absolute_deviations[l / 2],
_ => 0.5 * (absolute_deviations[l / 2 - 1] + absolute_deviations[l / 2]),
};

Some((median_data, mad))
}
}

#[test]
fn test_mean() {
assert_eq!(mean(&[]), None);
for v in [0u64, 1, 5, 100] {
assert_eq!(mean(&[v]), Some(v as f64));
}
assert_eq!(mean(&[0, 1]), Some(0.5));
assert_eq!(mean(&[0, 5, 100]), Some(35.0));
assert_eq!(mean(&[7, 4, 30, 14]), Some(13.75));
}

#[test]
fn test_std_deviation() {
assert_eq!(std_deviation(&[]), None);
for v in [0u64, 1, 5, 100] {
assert_eq!(std_deviation(&[v]), Some(0 as f64));
}
assert_eq!(std_deviation(&[1, 4]), Some(1.5));
assert_eq!(std_deviation(&[2, 2, 2, 2]), Some(0.0));
assert_eq!(
std_deviation(&[1, 20, 300, 4000, 50000, 600000, 7000000, 80000000]),
Some(26193874.56387471)
);
}

#[test]
fn test_median_absolute_deviation() {
//Assumes sorted
assert_eq!(median_absolute_deviation(&mut []), None);
for v in [0u64, 1, 5, 100] {
assert_eq!(median_absolute_deviation(&mut [v]), Some((v as f64, 0.0)));
}
assert_eq!(median_absolute_deviation(&mut [1, 4]), Some((2.5, 1.5)));
assert_eq!(
median_absolute_deviation(&mut [2, 2, 2, 2]),
Some((2.0, 0.0))
);
assert_eq!(
median_absolute_deviation(&mut [
1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 9, 12, 52, 90
]),
Some((6.0, 2.0))
);

//if more than half of the data has the same value, MAD = 0, thus any
//value different from the residual median is classified as an outlier
assert_eq!(
median_absolute_deviation(&mut [0, 1, 1, 1, 1, 1, 1, 1, 0]),
Some((1.0, 0.0))
);
}
Loading

0 comments on commit f7473b0

Please sign in to comment.