diff --git a/Cargo.lock b/Cargo.lock index 79298685f6..28b8e15c28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -76,12 +76,43 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "binread" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16598dfc8e6578e9b597d9910ba2e73618385dc9f4b1d43dd92c349d6be6418f" +dependencies = [ + "binread_derive", + "rustversion", +] + +[[package]] +name = "binread_derive" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d9672209df1714ee804b1f4d4f68c8eb2a90b1f7a07acf472f88ce198ef1fed" +dependencies = [ + "either", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array", +] + [[package]] name = "build-env" version = "0.3.1" @@ -240,6 +271,15 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" +[[package]] +name = "cpufeatures" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.2.1" @@ -377,6 +417,15 @@ dependencies = [ "syn", ] +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + [[package]] name = "dlv-list" version = "0.2.3" @@ -634,6 +683,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" +dependencies = [ + "typenum", + "version_check 0.9.3", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -1246,6 +1305,12 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + [[package]] name = "openat" version = "0.1.21" @@ -1826,6 +1891,7 @@ name = "rpmostree-rust" version = "0.1.0" dependencies = [ "anyhow", + "binread", "c_utf8", "camino", "chrono", @@ -1860,6 +1926,7 @@ dependencies = [ "serde_derive", "serde_json", "serde_yaml", + "sha2", "structopt", "subprocess", "system-deps 5.0.0", @@ -1882,6 +1949,12 @@ dependencies = [ "ordered-multimap", ] +[[package]] +name = "rustversion" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61b3909d758bb75c79f23d4736fac9433868679d3ad2ea7a61e3c25cfda9a088" + [[package]] name = "ryu" version = "1.0.5" @@ -1988,6 +2061,19 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "sha2" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b69f9a4c9740d74c5baa3fd2e547f9525fa8088a8a958e0ca2409a514e33f5fa" +dependencies = [ + "block-buffer", + "cfg-if 1.0.0", + "cpufeatures", + "digest", + "opaque-debug", +] + [[package]] name = "sharded-slab" version = "0.1.1" @@ -2405,6 +2491,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" +[[package]] +name = "typenum" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" + [[package]] name = "unicode-bidi" version = "0.3.5" diff --git a/Cargo.toml b/Cargo.toml index 1410af1495..f5243a77e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ rpm = "4" [dependencies] anyhow = "1.0.44" +binread = "2.2.0" c_utf8 = "0.1.0" camino = "1.0.5" chrono = { version = "0.4.19", features = ["serde"] } @@ -63,6 +64,7 @@ serde = { version = "1.0.130", features = ["derive"] } serde_derive = "1.0.118" serde_json = "1.0.68" serde_yaml = "0.8.21" +sha2 = "0.9.8" structopt = "0.3.25" subprocess = "0.2.8" systemd = "0.9.0" diff --git a/rust/src/composepost.rs b/rust/src/composepost.rs index 39169be9f7..abcd23fc63 100644 --- a/rust/src/composepost.rs +++ b/rust/src/composepost.rs @@ -994,6 +994,8 @@ fn rewrite_rpmdb_for_target_inner(rootfs_dfd: &openat::Dir) -> Result<()> { .run(cancellable.gobj_rewrap()) .context("Failed to run rpmdb --importdb")?; + normalization::normalize_rpmdb(rootfs_dfd, RPMOSTREE_RPMDB_LOCATION)?; + tempetc.undo()?; Ok(()) diff --git a/rust/src/normalization.rs b/rust/src/normalization.rs index 62d3b0dccd..ded8dedf13 100644 --- a/rust/src/normalization.rs +++ b/rust/src/normalization.rs @@ -10,7 +10,8 @@ use anyhow::{anyhow, Result}; use fn_error_context::context; use lazy_static::lazy_static; use std::convert::TryInto; -use std::io::{BufReader, Read, Seek, SeekFrom, Write}; +use std::io::{BufRead, BufReader, Read, Seek, SeekFrom, Write}; +use std::path::Path; lazy_static! { static ref SOURCE_DATE_EPOCH_RAW: Option = std::env::var("SOURCE_DATE_EPOCH").ok(); @@ -120,6 +121,294 @@ pub(crate) fn rewrite_rpmdb_timestamps(rpmdb: &mut F) -> Ok(()) } +#[context("Rewriting rpmdb database files for build stability")] +pub(crate) fn normalize_rpmdb(rootfs: &openat::Dir, rpmdb_path: impl AsRef) -> Result<()> { + let source_date_epoch = if let Some(source_date_epoch) = *SOURCE_DATE_EPOCH { + source_date_epoch as u32 + } else { + return Ok(()); + }; + + let mut db_backend = None; + + let macros = BufReader::new(rootfs.open_file("usr/lib/rpm/macros")?); + for line in macros.lines() { + let line = line?; + if line.is_empty() { + continue; + } + + let mut bits = line.split(char::is_whitespace).filter(|s| !s.is_empty()); + if let Some(m) = bits.next() { + if m == "%_db_backend" { + db_backend = bits.last().map(|s| s.to_string()); + break; + } + }; + } + + let db_backend = db_backend.ok_or_else(|| anyhow!("Unable to determine rpmdb backend"))?; + + match db_backend.as_str() { + "bdb" | "b-d-b" => rpmdb_bdb::normalize_rpmdb_bdb(rootfs, rpmdb_path, source_date_epoch), + "ndb" => Ok(()), + "sqlite" => Ok(()), + _ => Err(anyhow!("Unknown rpmdb backend: {}", db_backend)), + } +} + +mod rpmdb_bdb { + use anyhow::{anyhow, Context, Result}; + use binread::{BinRead, BinReaderExt}; + use lazy_static::lazy_static; + use openat::SimpleType; + use sha2::Digest; + use std::ffi::OsStr; + use std::io::{Read, Seek, SeekFrom, Write}; + use std::os::unix::io::AsRawFd; + use std::path::{Path, PathBuf}; + use subprocess::{Exec, Redirection}; + + #[derive(BinRead, Debug, Clone, Copy, PartialEq, Eq)] + #[repr(u8)] + #[br(repr=u8)] + enum PageType { + IBTree = 3, + LBTree = 5, + Overflow = 7, + HashMeta = 8, + BTreeMeta = 9, + Hash = 13, + } + + #[derive(BinRead, Debug)] + #[br(little)] + struct MetaHeader { + lsn: u64, + pgno: u32, + magic: u32, + version: u32, + pagesize: u32, + encrypt_alg: u8, + page_type: PageType, + metaflags: u8, + unused1: u8, + free: u32, + last_pgno: u32, + nparts: u32, + key_count: u32, + record_count: u32, + flags: u32, + uid: [u8; 20], + } + + const BTREE_MAGIC: u32 = 0x00053162; + const HASH_MAGIC: u32 = 0x00061561; + const PAGE_HEADER_SIZE: u16 = 26; + const PAGE_HEADER_MAGIC_OFFSET: u64 = 0x34; + + #[derive(BinRead, Debug)] + #[br(little)] + struct PageHeader { + lsn: u64, + pgno: u32, + prev_pgno: u32, + next_pgno: u32, + entries: u16, + hf_offset: u16, + level: u8, + page_type: PageType, + } + + #[derive(BinRead, Debug)] + #[br(repr=u8)] + #[repr(u8)] + enum BTreeItemType { + KeyData = 1, + Duplicate = 2, + Overflow = 3, + } + + #[derive(BinRead, Debug)] + #[br(little)] + struct BTreeItem { + len: u16, + item_type: BTreeItemType, + } + + #[derive(BinRead, Debug)] + #[br(repr=u8)] + #[repr(u8)] + enum HashItemType { + KeyData = 1, + Duplicate = 2, + Offpage = 3, + OffDup = 4, + } + + lazy_static! { + static ref PROC_SELF_CWD: PathBuf = PathBuf::from("/proc/self/cwd"); + static ref PROC_SELF_FD: PathBuf = PathBuf::from("/proc/self/fd"); + } + + pub(super) fn normalize_rpmdb_bdb>( + rootfs: &openat::Dir, + rpmdb_path: P, + timestamp: u32, + ) -> Result<()> { + let rpmdb_path = rpmdb_path.as_ref(); + + for entry in rootfs.list_dir(rpmdb_path)? { + let entry = entry?; + + match entry.simple_type() { + Some(SimpleType::File) => (), + _ => continue, + }; + + if let Some(filename) = entry.file_name().to_str() { + if filename.starts_with('.') || filename.starts_with("__db") { + continue; + } + } else { + continue; + } + + let path = rpmdb_path.join(entry.file_name()); + + let old_digest = sha2::Sha256::digest( + Exec::cmd("db_dump") + .args(&[PROC_SELF_CWD.join(&path)]) + .cwd(PROC_SELF_FD.join(rootfs.as_raw_fd().to_string())) + .stdout(Redirection::Pipe) + .capture() + .context("pre-normalization data dump")? + .stdout + .as_slice(), + ); + + { + let mut file_id = sha2::Sha256::new(); + file_id.update(timestamp.to_be_bytes()); + file_id.update(format!("rpm/{}", entry.file_name().to_str().unwrap()).as_bytes()); + let file_id = &file_id.finalize()[..20]; + + let mut db = rootfs.update_file(&path, 0o644)?; + let meta_header: MetaHeader = db.read_le()?; + + match (meta_header.magic, meta_header.page_type) { + (BTREE_MAGIC, PageType::BTreeMeta) => (), + (HASH_MAGIC, PageType::HashMeta) => (), + _ => continue, + }; + + db.seek(SeekFrom::Start(PAGE_HEADER_MAGIC_OFFSET))?; + db.write_all(file_id)?; + + for pageno in 1..meta_header.last_pgno + 1 { + db.seek(SeekFrom::Start((pageno * meta_header.pagesize) as u64))?; + + let header: PageHeader = db.read_le()?; + + if header.page_type == PageType::Overflow { + db.seek(SeekFrom::Current(header.hf_offset as i64))?; + let fill_length = + meta_header.pagesize - (PAGE_HEADER_SIZE + header.hf_offset) as u32; + std::io::copy( + &mut std::io::repeat(b'\x00').take(fill_length as u64), + &mut db, + )?; + continue; + } + + let mut offsets: Vec = Vec::new(); + + for _ in 0..header.entries { + offsets.push(db.read_le()?); + } + + offsets.sort_unstable(); + + let empty = if offsets.is_empty() { + meta_header.pagesize - PAGE_HEADER_SIZE as u32 + } else { + *offsets.first().unwrap() as u32 + - (PAGE_HEADER_SIZE + header.entries * 2) as u32 + }; + + std::io::copy(&mut std::io::repeat(b'\x00').take(empty as u64), &mut db)?; + + let mut offset_iter = offsets.into_iter().peekable(); + while let Some(offset) = offset_iter.next() { + db.seek(SeekFrom::Start( + (pageno * meta_header.pagesize + offset as u32) as u64, + ))?; + + if matches!(header.page_type, PageType::IBTree | PageType::LBTree) { + let item: BTreeItem = db.read_le()?; + if header.page_type == PageType::IBTree { + db.write_all(b"\x00")?; + } else if header.page_type == PageType::LBTree { + if let BTreeItemType::Overflow = item.item_type { + db.seek(SeekFrom::Current(-3))?; + db.write_all(b"\x00\x00")?; + } else if let BTreeItemType::KeyData = item.item_type { + let next_offset = if let Some(next) = offset_iter.peek() { + *next + } else { + meta_header.pagesize as u16 + }; + let remainder = next_offset - (offset + 3 + item.len); + if remainder != 0 { + db.seek(SeekFrom::Current(item.len as i64))?; + std::io::copy( + &mut std::io::repeat(b'\x00').take(remainder as u64), + &mut db, + )?; + } + } + } + } else if header.page_type == PageType::Hash { + let item_type: HashItemType = db.read_le()?; + + if let HashItemType::Offpage = item_type { + db.write_all(b"\x00\x00\x00")?; + } + } + } + } + + db.flush()?; + } + + let r = Exec::cmd("db_verify") + .args(&[OsStr::new("-q"), PROC_SELF_CWD.join(&path).as_os_str()]) + .cwd(PROC_SELF_FD.join(rootfs.as_raw_fd().to_string())) + .join()?; + if !r.success() { + return Err(anyhow!("post-normalization verification failed")); + } + + let new_digest = sha2::Sha256::digest( + Exec::cmd("db_dump") + .args(&[PROC_SELF_CWD.join(&path)]) + .cwd(PROC_SELF_FD.join(rootfs.as_raw_fd().to_string())) + .stdout(Redirection::Pipe) + .capture() + .context("post-normalization data dump")? + .stdout + .as_slice(), + ); + + if new_digest != old_digest { + return Err(anyhow!("file data changed during normalization")); + } + } + + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*;