From 3a7fe037a4093531063fd2d32743ed2c7ea75418 Mon Sep 17 00:00:00 2001 From: Benno Rice Date: Fri, 1 Oct 2021 13:28:07 +1000 Subject: [PATCH] compose: normalise underlying BDB files in RPM database Berkeley DB has several issues that cause unreproducible builds: 1) Upon creation each file is assigned a unique ID generated using a mixture of process ID, current time, and some randomness. 2) Pages used to hold data to be written out to disk are not zeroed prior to use. This leads to arbitrary data from the current process being written out to disk. 3) Unused fields in structures are not zeroed leading to arbitrary stack data being written out to disk. Replacing the unique file ID causes no issues broadly but to ensure "sufficient" uniqueness these are replaced with a value generated by feeding the current time or the current value of SOURCE_DATE_EPOCH along with a partial file path into sha256 and using the first 20 bytes as the ID. For the other problems, areas known to be unused are found and zeroed out. In order to ensure no change to data, the `db_dump` utility is run prior to any changes and the output is hashed using sha256. After changes the `db_verify` utility is run and, assuming this is successful, `db_dump` is re-run and the hash of the contents is compared. Any variation is considered a failure. This change does not look at any potential reproducibility issues in the ndb or sqlite backends. --- Cargo.lock | 92 ++++++++++++ Cargo.toml | 2 + rust/src/composepost.rs | 2 + rust/src/normalization.rs | 291 +++++++++++++++++++++++++++++++++++++- 4 files changed, 386 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 79298685f6..28b8e15c28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -76,12 +76,43 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "binread" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16598dfc8e6578e9b597d9910ba2e73618385dc9f4b1d43dd92c349d6be6418f" +dependencies = [ + "binread_derive", + "rustversion", +] + +[[package]] +name = "binread_derive" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d9672209df1714ee804b1f4d4f68c8eb2a90b1f7a07acf472f88ce198ef1fed" +dependencies = [ + "either", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array", +] + [[package]] name = "build-env" version = "0.3.1" @@ -240,6 +271,15 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" +[[package]] +name = "cpufeatures" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.2.1" @@ -377,6 +417,15 @@ dependencies = [ "syn", ] +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + [[package]] name = "dlv-list" version = "0.2.3" @@ -634,6 +683,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" +dependencies = [ + "typenum", + "version_check 0.9.3", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -1246,6 +1305,12 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + [[package]] name = "openat" version = "0.1.21" @@ -1826,6 +1891,7 @@ name = "rpmostree-rust" version = "0.1.0" dependencies = [ "anyhow", + "binread", "c_utf8", "camino", "chrono", @@ -1860,6 +1926,7 @@ dependencies = [ "serde_derive", "serde_json", "serde_yaml", + "sha2", "structopt", "subprocess", "system-deps 5.0.0", @@ -1882,6 +1949,12 @@ dependencies = [ "ordered-multimap", ] +[[package]] +name = "rustversion" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61b3909d758bb75c79f23d4736fac9433868679d3ad2ea7a61e3c25cfda9a088" + [[package]] name = "ryu" version = "1.0.5" @@ -1988,6 +2061,19 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "sha2" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b69f9a4c9740d74c5baa3fd2e547f9525fa8088a8a958e0ca2409a514e33f5fa" +dependencies = [ + "block-buffer", + "cfg-if 1.0.0", + "cpufeatures", + "digest", + "opaque-debug", +] + [[package]] name = "sharded-slab" version = "0.1.1" @@ -2405,6 +2491,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" +[[package]] +name = "typenum" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" + [[package]] name = "unicode-bidi" version = "0.3.5" diff --git a/Cargo.toml b/Cargo.toml index 1410af1495..f5243a77e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ rpm = "4" [dependencies] anyhow = "1.0.44" +binread = "2.2.0" c_utf8 = "0.1.0" camino = "1.0.5" chrono = { version = "0.4.19", features = ["serde"] } @@ -63,6 +64,7 @@ serde = { version = "1.0.130", features = ["derive"] } serde_derive = "1.0.118" serde_json = "1.0.68" serde_yaml = "0.8.21" +sha2 = "0.9.8" structopt = "0.3.25" subprocess = "0.2.8" systemd = "0.9.0" diff --git a/rust/src/composepost.rs b/rust/src/composepost.rs index 39169be9f7..abcd23fc63 100644 --- a/rust/src/composepost.rs +++ b/rust/src/composepost.rs @@ -994,6 +994,8 @@ fn rewrite_rpmdb_for_target_inner(rootfs_dfd: &openat::Dir) -> Result<()> { .run(cancellable.gobj_rewrap()) .context("Failed to run rpmdb --importdb")?; + normalization::normalize_rpmdb(rootfs_dfd, RPMOSTREE_RPMDB_LOCATION)?; + tempetc.undo()?; Ok(()) diff --git a/rust/src/normalization.rs b/rust/src/normalization.rs index 62d3b0dccd..ded8dedf13 100644 --- a/rust/src/normalization.rs +++ b/rust/src/normalization.rs @@ -10,7 +10,8 @@ use anyhow::{anyhow, Result}; use fn_error_context::context; use lazy_static::lazy_static; use std::convert::TryInto; -use std::io::{BufReader, Read, Seek, SeekFrom, Write}; +use std::io::{BufRead, BufReader, Read, Seek, SeekFrom, Write}; +use std::path::Path; lazy_static! { static ref SOURCE_DATE_EPOCH_RAW: Option = std::env::var("SOURCE_DATE_EPOCH").ok(); @@ -120,6 +121,294 @@ pub(crate) fn rewrite_rpmdb_timestamps(rpmdb: &mut F) -> Ok(()) } +#[context("Rewriting rpmdb database files for build stability")] +pub(crate) fn normalize_rpmdb(rootfs: &openat::Dir, rpmdb_path: impl AsRef) -> Result<()> { + let source_date_epoch = if let Some(source_date_epoch) = *SOURCE_DATE_EPOCH { + source_date_epoch as u32 + } else { + return Ok(()); + }; + + let mut db_backend = None; + + let macros = BufReader::new(rootfs.open_file("usr/lib/rpm/macros")?); + for line in macros.lines() { + let line = line?; + if line.is_empty() { + continue; + } + + let mut bits = line.split(char::is_whitespace).filter(|s| !s.is_empty()); + if let Some(m) = bits.next() { + if m == "%_db_backend" { + db_backend = bits.last().map(|s| s.to_string()); + break; + } + }; + } + + let db_backend = db_backend.ok_or_else(|| anyhow!("Unable to determine rpmdb backend"))?; + + match db_backend.as_str() { + "bdb" | "b-d-b" => rpmdb_bdb::normalize_rpmdb_bdb(rootfs, rpmdb_path, source_date_epoch), + "ndb" => Ok(()), + "sqlite" => Ok(()), + _ => Err(anyhow!("Unknown rpmdb backend: {}", db_backend)), + } +} + +mod rpmdb_bdb { + use anyhow::{anyhow, Context, Result}; + use binread::{BinRead, BinReaderExt}; + use lazy_static::lazy_static; + use openat::SimpleType; + use sha2::Digest; + use std::ffi::OsStr; + use std::io::{Read, Seek, SeekFrom, Write}; + use std::os::unix::io::AsRawFd; + use std::path::{Path, PathBuf}; + use subprocess::{Exec, Redirection}; + + #[derive(BinRead, Debug, Clone, Copy, PartialEq, Eq)] + #[repr(u8)] + #[br(repr=u8)] + enum PageType { + IBTree = 3, + LBTree = 5, + Overflow = 7, + HashMeta = 8, + BTreeMeta = 9, + Hash = 13, + } + + #[derive(BinRead, Debug)] + #[br(little)] + struct MetaHeader { + lsn: u64, + pgno: u32, + magic: u32, + version: u32, + pagesize: u32, + encrypt_alg: u8, + page_type: PageType, + metaflags: u8, + unused1: u8, + free: u32, + last_pgno: u32, + nparts: u32, + key_count: u32, + record_count: u32, + flags: u32, + uid: [u8; 20], + } + + const BTREE_MAGIC: u32 = 0x00053162; + const HASH_MAGIC: u32 = 0x00061561; + const PAGE_HEADER_SIZE: u16 = 26; + const PAGE_HEADER_MAGIC_OFFSET: u64 = 0x34; + + #[derive(BinRead, Debug)] + #[br(little)] + struct PageHeader { + lsn: u64, + pgno: u32, + prev_pgno: u32, + next_pgno: u32, + entries: u16, + hf_offset: u16, + level: u8, + page_type: PageType, + } + + #[derive(BinRead, Debug)] + #[br(repr=u8)] + #[repr(u8)] + enum BTreeItemType { + KeyData = 1, + Duplicate = 2, + Overflow = 3, + } + + #[derive(BinRead, Debug)] + #[br(little)] + struct BTreeItem { + len: u16, + item_type: BTreeItemType, + } + + #[derive(BinRead, Debug)] + #[br(repr=u8)] + #[repr(u8)] + enum HashItemType { + KeyData = 1, + Duplicate = 2, + Offpage = 3, + OffDup = 4, + } + + lazy_static! { + static ref PROC_SELF_CWD: PathBuf = PathBuf::from("/proc/self/cwd"); + static ref PROC_SELF_FD: PathBuf = PathBuf::from("/proc/self/fd"); + } + + pub(super) fn normalize_rpmdb_bdb>( + rootfs: &openat::Dir, + rpmdb_path: P, + timestamp: u32, + ) -> Result<()> { + let rpmdb_path = rpmdb_path.as_ref(); + + for entry in rootfs.list_dir(rpmdb_path)? { + let entry = entry?; + + match entry.simple_type() { + Some(SimpleType::File) => (), + _ => continue, + }; + + if let Some(filename) = entry.file_name().to_str() { + if filename.starts_with('.') || filename.starts_with("__db") { + continue; + } + } else { + continue; + } + + let path = rpmdb_path.join(entry.file_name()); + + let old_digest = sha2::Sha256::digest( + Exec::cmd("db_dump") + .args(&[PROC_SELF_CWD.join(&path)]) + .cwd(PROC_SELF_FD.join(rootfs.as_raw_fd().to_string())) + .stdout(Redirection::Pipe) + .capture() + .context("pre-normalization data dump")? + .stdout + .as_slice(), + ); + + { + let mut file_id = sha2::Sha256::new(); + file_id.update(timestamp.to_be_bytes()); + file_id.update(format!("rpm/{}", entry.file_name().to_str().unwrap()).as_bytes()); + let file_id = &file_id.finalize()[..20]; + + let mut db = rootfs.update_file(&path, 0o644)?; + let meta_header: MetaHeader = db.read_le()?; + + match (meta_header.magic, meta_header.page_type) { + (BTREE_MAGIC, PageType::BTreeMeta) => (), + (HASH_MAGIC, PageType::HashMeta) => (), + _ => continue, + }; + + db.seek(SeekFrom::Start(PAGE_HEADER_MAGIC_OFFSET))?; + db.write_all(file_id)?; + + for pageno in 1..meta_header.last_pgno + 1 { + db.seek(SeekFrom::Start((pageno * meta_header.pagesize) as u64))?; + + let header: PageHeader = db.read_le()?; + + if header.page_type == PageType::Overflow { + db.seek(SeekFrom::Current(header.hf_offset as i64))?; + let fill_length = + meta_header.pagesize - (PAGE_HEADER_SIZE + header.hf_offset) as u32; + std::io::copy( + &mut std::io::repeat(b'\x00').take(fill_length as u64), + &mut db, + )?; + continue; + } + + let mut offsets: Vec = Vec::new(); + + for _ in 0..header.entries { + offsets.push(db.read_le()?); + } + + offsets.sort_unstable(); + + let empty = if offsets.is_empty() { + meta_header.pagesize - PAGE_HEADER_SIZE as u32 + } else { + *offsets.first().unwrap() as u32 + - (PAGE_HEADER_SIZE + header.entries * 2) as u32 + }; + + std::io::copy(&mut std::io::repeat(b'\x00').take(empty as u64), &mut db)?; + + let mut offset_iter = offsets.into_iter().peekable(); + while let Some(offset) = offset_iter.next() { + db.seek(SeekFrom::Start( + (pageno * meta_header.pagesize + offset as u32) as u64, + ))?; + + if matches!(header.page_type, PageType::IBTree | PageType::LBTree) { + let item: BTreeItem = db.read_le()?; + if header.page_type == PageType::IBTree { + db.write_all(b"\x00")?; + } else if header.page_type == PageType::LBTree { + if let BTreeItemType::Overflow = item.item_type { + db.seek(SeekFrom::Current(-3))?; + db.write_all(b"\x00\x00")?; + } else if let BTreeItemType::KeyData = item.item_type { + let next_offset = if let Some(next) = offset_iter.peek() { + *next + } else { + meta_header.pagesize as u16 + }; + let remainder = next_offset - (offset + 3 + item.len); + if remainder != 0 { + db.seek(SeekFrom::Current(item.len as i64))?; + std::io::copy( + &mut std::io::repeat(b'\x00').take(remainder as u64), + &mut db, + )?; + } + } + } + } else if header.page_type == PageType::Hash { + let item_type: HashItemType = db.read_le()?; + + if let HashItemType::Offpage = item_type { + db.write_all(b"\x00\x00\x00")?; + } + } + } + } + + db.flush()?; + } + + let r = Exec::cmd("db_verify") + .args(&[OsStr::new("-q"), PROC_SELF_CWD.join(&path).as_os_str()]) + .cwd(PROC_SELF_FD.join(rootfs.as_raw_fd().to_string())) + .join()?; + if !r.success() { + return Err(anyhow!("post-normalization verification failed")); + } + + let new_digest = sha2::Sha256::digest( + Exec::cmd("db_dump") + .args(&[PROC_SELF_CWD.join(&path)]) + .cwd(PROC_SELF_FD.join(rootfs.as_raw_fd().to_string())) + .stdout(Redirection::Pipe) + .capture() + .context("post-normalization data dump")? + .stdout + .as_slice(), + ); + + if new_digest != old_digest { + return Err(anyhow!("file data changed during normalization")); + } + } + + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*;