From ac2d0bef9e3110d3f7fbe5a13bdcbaac6b86fc08 Mon Sep 17 00:00:00 2001 From: brooks Date: Mon, 17 Jun 2024 16:54:02 -0400 Subject: [PATCH] Adds `diff` to accounts-hash-cache-tool --- .../accounts-hash-cache-tool/Cargo.toml | 2 +- .../accounts-hash-cache-tool/src/main.rs | 292 ++++++++++++++---- 2 files changed, 241 insertions(+), 53 deletions(-) diff --git a/accounts-db/accounts-hash-cache-tool/Cargo.toml b/accounts-db/accounts-hash-cache-tool/Cargo.toml index 501e0dfdb8b71d..e4803261ef6995 100644 --- a/accounts-db/accounts-hash-cache-tool/Cargo.toml +++ b/accounts-db/accounts-hash-cache-tool/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "agave-accounts-hash-cache-tool" -description = "Tool to inspect accounts hash cache files" +description = "Tool for accounts hash cache files" publish = false version = { workspace = true } authors = { workspace = true } diff --git a/accounts-db/accounts-hash-cache-tool/src/main.rs b/accounts-db/accounts-hash-cache-tool/src/main.rs index 98778049504216..c56369391c8077 100644 --- a/accounts-db/accounts-hash-cache-tool/src/main.rs +++ b/accounts-db/accounts-hash-cache-tool/src/main.rs @@ -1,12 +1,17 @@ use { bytemuck::Zeroable as _, - clap::{crate_description, crate_name, value_t_or_exit, App, Arg}, + clap::{ + crate_description, crate_name, value_t_or_exit, App, AppSettings, Arg, ArgMatches, + SubCommand, + }, solana_accounts_db::{CacheHashDataFileEntry, CacheHashDataFileHeader}, std::{ + collections::HashMap, fs::File, io::{self, BufReader, Read as _}, mem::size_of, num::Saturating, + path::Path, }, }; @@ -14,62 +19,73 @@ fn main() { let matches = App::new(crate_name!()) .about(crate_description!()) .version(solana_version::version!()) - .arg( - Arg::with_name("path") - .index(1) - .takes_value(true) - .value_name("PATH") - .help("Accounts hash cache file to inspect"), + .global_setting(AppSettings::ArgRequiredElseHelp) + .global_setting(AppSettings::ColoredHelp) + .global_setting(AppSettings::InferSubcommands) + .global_setting(AppSettings::UnifiedHelpMessage) + .global_setting(AppSettings::VersionlessSubcommands) + .subcommand( + SubCommand::with_name("inspect") + .about( + "Inspect an accounts hash cache file and display \ + each account's address, hash, and balance", + ) + .arg( + Arg::with_name("force") + .long("force") + .takes_value(false) + .help("Continue even if sanity checks fail"), + ) + .arg( + Arg::with_name("path") + .index(1) + .takes_value(true) + .value_name("PATH") + .help("Accounts hash cache file to inspect"), + ), ) - .arg( - Arg::with_name("force") - .long("force") - .takes_value(false) - .help("Continue even if sanity checks fail"), + .subcommand( + SubCommand::with_name("diff") + .about("Diff two accounts hash cache files") + .arg( + Arg::with_name("path1") + .index(1) + .takes_value(true) + .value_name("PATH1") + .help("Accounts hash cache file 1 to diff"), + ) + .arg( + Arg::with_name("path2") + .index(2) + .takes_value(true) + .value_name("PATH2") + .help("Accounts hash cache file 2 to diff"), + ), ) .get_matches(); - let force = matches.is_present("force"); - let path = value_t_or_exit!(matches, "path", String); - - let file = File::open(&path).unwrap_or_else(|err| { - eprintln!("Failed to open accounts hash cache file '{path}': {err}"); - std::process::exit(1); - }); - let actual_file_size = file - .metadata() - .unwrap_or_else(|err| { - eprintln!("Failed to query file metadata: {err}"); - std::process::exit(1); - }) - .len(); - let mut reader = BufReader::new(file); - - let header = { - let mut header = CacheHashDataFileHeader::zeroed(); - reader - .read_exact(bytemuck::bytes_of_mut(&mut header)) - .unwrap_or_else(|err| { - eprintln!("Failed to read header: {err}"); - std::process::exit(1); - }); - header - }; - - // Sanity checks -- ensure the actual file size matches the expected file size - let expected_file_size = size_of::() - .saturating_add(size_of::().saturating_mul(header.count)); - if actual_file_size != expected_file_size as u64 { - eprintln!( - "Failed sanitization: actual file size does not match expected file size! \ - actual: {actual_file_size}, expected: {expected_file_size}", - ); - if !force { - std::process::exit(1); + match matches.subcommand() { + ("inspect", Some(subcommand_matches)) => do_inspect(&matches, subcommand_matches) + .map_err(|err| format!("inspection failed: {err}")), + ("diff", Some(subcommand_matches)) => { + do_diff(&matches, subcommand_matches).map_err(|err| format!("diff failed: {err}")) } - eprintln!("Forced. Continuing... Results may be incorrect."); + _ => unreachable!(), } + .unwrap_or_else(|err| { + eprintln!("Error: {err}"); + std::process::exit(1); + }); +} +fn do_inspect( + _app_matches: &ArgMatches<'_>, + subcommand_matches: &ArgMatches<'_>, +) -> Result<(), String> { + let force = subcommand_matches.is_present("force"); + let path = value_t_or_exit!(subcommand_matches, "path", String); + let (mut reader, header) = open_file(&path, force) + .map_err(|err| format!("failed to open accounts hash cache file '{path}': {err}"))?; let count_width = (header.count as f64).log10().ceil() as usize; let mut count = Saturating(0usize); loop { @@ -80,10 +96,13 @@ fn main() { Err(err) => { if err.kind() == io::ErrorKind::UnexpectedEof && count.0 == header.count { // we've hit the expected end of the file + break; } else { - eprintln!("Failed to read entry {count}: {err}"); + return Err(format!( + "failed to read entry {count}, expected {}: {err}", + header.count, + )); } - break; } }; println!( @@ -96,4 +115,173 @@ fn main() { } println!("actual entries: {count}, expected: {}", header.count); + Ok(()) +} + +fn do_diff( + _app_matches: &ArgMatches<'_>, + subcommand_matches: &ArgMatches<'_>, +) -> Result<(), String> { + let force = false; // skipping sanity checks is not supported when diffing + let path1 = value_t_or_exit!(subcommand_matches, "path1", String); + let path2 = value_t_or_exit!(subcommand_matches, "path2", String); + let (mut reader1, header1) = open_file(&path1, force) + .map_err(|err| format!("failed to open accounts hash cache file 1 '{path1}': {err}"))?; + let (mut reader2, header2) = open_file(&path2, force) + .map_err(|err| format!("failed to open accounts hash cache file 2 '{path2}': {err}"))?; + // Note: Purposely open both files before reading either one. This way, if there's an error + // opening file 2, we can bail early without having to wait for file 1 to be read completely. + + // extract the entries from both files + let do_extract = |num, reader: &mut BufReader<_>, header: &CacheHashDataFileHeader| { + let mut entries = HashMap::<_, _>::default(); + loop { + let mut entry = CacheHashDataFileEntry::zeroed(); + let result = reader.read_exact(bytemuck::bytes_of_mut(&mut entry)); + match result { + Ok(()) => {} + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof && entries.len() == header.count { + // we've hit the expected end of the file + break; + } else { + return Err(format!( + "failed to read entry {}, expected {}: {err}", + entries.len(), + header.count, + )); + } + } + }; + let CacheHashDataFileEntry { + hash, + lamports, + pubkey, + } = entry; + let old_value = entries.insert(pubkey, (hash, lamports)); + if let Some(old_value) = old_value { + let new_value = entries.get(&pubkey); + return Err(format!("found duplicate pubkey in file {num}: {pubkey}, old value: {old_value:?}, new value: {new_value:?}")); + } + } + Ok(entries) + }; + let entries1 = do_extract(1, &mut reader1, &header1)?; + let entries2 = do_extract(2, &mut reader2, &header2)?; + + // compute the differences between the files + let do_compute = |lhs: &HashMap<_, (_, _)>, rhs: &HashMap<_, (_, _)>| { + let mut unique_entries = Vec::new(); + let mut mismatch_entries = Vec::new(); + for (lhs_key, lhs_value) in lhs.iter() { + if let Some(rhs_value) = rhs.get(lhs_key) { + if lhs_value != rhs_value { + mismatch_entries.push(( + CacheHashDataFileEntry { + hash: lhs_value.0, + lamports: lhs_value.1, + pubkey: *lhs_key, + }, + CacheHashDataFileEntry { + hash: rhs_value.0, + lamports: rhs_value.1, + pubkey: *lhs_key, + }, + )); + } + } else { + unique_entries.push(CacheHashDataFileEntry { + hash: lhs_value.0, + lamports: lhs_value.1, + pubkey: *lhs_key, + }); + } + } + unique_entries.sort_unstable_by(|a, b| a.pubkey.cmp(&b.pubkey)); + mismatch_entries.sort_unstable_by(|a, b| a.0.pubkey.cmp(&b.0.pubkey)); + (unique_entries, mismatch_entries) + }; + let (unique_entries1, mismatch_entries) = do_compute(&entries1, &entries2); + let (unique_entries2, _) = do_compute(&entries2, &entries1); + + // display the unique entries in each file + let do_print = |entries: &[CacheHashDataFileEntry]| { + let count_width = (entries.len() as f64).log10().ceil() as usize; + if entries.is_empty() { + println!("(none)"); + } else { + for (i, entry) in entries.iter().enumerate() { + println!( + "{i:count_width$}: pubkey: {:44}, hash: {:44}, lamports: {}", + entry.pubkey.to_string(), + entry.hash.0.to_string(), + entry.lamports, + ); + } + } + }; + println!("Unique entries in file 1:"); + do_print(&unique_entries1); + println!("Unique entries in file 2:"); + do_print(&unique_entries2); + + println!("Mismatch values:"); + let count_width = (mismatch_entries.len() as f64).log10().ceil() as usize; + if mismatch_entries.is_empty() { + println!("(none)"); + } else { + for (i, (lhs, rhs)) in mismatch_entries.iter().enumerate() { + println!( + "{i:count_width$}: pubkey: {:44}, hash: {:44}, lamports: {}", + lhs.pubkey.to_string(), + lhs.hash.0.to_string(), + lhs.lamports, + ); + println!( + "{i:count_width$}: file 2: {:44}, hash: {:44}, lamports: {}", + "(same)".to_string(), + rhs.hash.0.to_string(), + rhs.lamports, + ); + } + } + + Ok(()) +} + +fn open_file( + path: impl AsRef, + force: bool, +) -> Result<(BufReader, CacheHashDataFileHeader), String> { + let file = File::open(path).map_err(|err| format!("{err}"))?; + let actual_file_size = file + .metadata() + .map_err(|err| format!("failed to query file metadata: {err}"))? + .len(); + let mut reader = BufReader::new(file); + + let header = { + let mut header = CacheHashDataFileHeader::zeroed(); + reader + .read_exact(bytemuck::bytes_of_mut(&mut header)) + .map_err(|err| format!("failed to read header: {err}"))?; + header + }; + + // Sanity checks -- ensure the actual file size matches the expected file size + let expected_file_size = size_of::() + .saturating_add(size_of::().saturating_mul(header.count)); + if actual_file_size != expected_file_size as u64 { + let err_msg = format!( + "failed sanitization: actual file size does not match expected file size! \ + actual: {actual_file_size}, expected: {expected_file_size}", + ); + if force { + eprintln!("Warning: {err_msg}\nForced. Continuing... Results may be incorrect."); + } else { + return Err(err_msg); + } + } + + Ok((reader, header)) }