From 4e390a6f0a59fd33d024604bfe6aa584075344a3 Mon Sep 17 00:00:00 2001 From: jeff washington Date: Thu, 13 Jun 2024 10:35:51 -0500 Subject: [PATCH 1/4] always pack a few newest ancient slots --- accounts-db/src/ancient_append_vecs.rs | 50 +++++++++++++++++++++----- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/accounts-db/src/ancient_append_vecs.rs b/accounts-db/src/ancient_append_vecs.rs index 9c1ea421ec39d6..686cdc45e768b4 100644 --- a/accounts-db/src/ancient_append_vecs.rs +++ b/accounts-db/src/ancient_append_vecs.rs @@ -57,6 +57,9 @@ struct SlotInfo { alive_bytes: u64, /// true if this should be shrunk due to ratio should_shrink: bool, + /// this slot is a high slot # + /// It is important to include some high slot #s so that we have new slots to try each time pack runs. + high_slot: bool, } /// info for all storages in ancient slots @@ -83,6 +86,7 @@ impl AncientSlotInfos { storage: Arc, can_randomly_shrink: bool, ideal_size: NonZeroU64, + high_slot: bool, ) -> bool { let mut was_randomly_shrunk = false; let alive_bytes = storage.alive_bytes() as u64; @@ -122,6 +126,7 @@ impl AncientSlotInfos { storage, alive_bytes, should_shrink, + high_slot, }); self.total_alive_bytes += alive_bytes; } @@ -198,13 +203,16 @@ impl AncientSlotInfos { let storages_remaining = total_storages - i - 1; // if the remaining uncombined storages and the # of resulting - // combined ancient storages is less than the threshold, then + // combined ancient storages are less than the threshold, then // we've gone too far, so get rid of this entry and all after it. - // Every storage after this one is larger. + // Every storage after this one is larger than the ones we've chosen. // if we ever get to more than `max_resulting_storages` required ancient storages, that is enough to stop for now. - // It will take a while to create that many. This should be a limit that only affects - // extreme testing environments. - if storages_remaining + ancient_storages_required < low_threshold + // It will take a lot of time for the pack algorithm to create that many, and that is bad for system performance. + // This should be a limit that only affects extreme testing environments. + // We do not stop including entries until we have dealt with all the high slot #s. This allows the algorithm to continue + // to make progress each time it is called. There are exceptions that can cause the pack to fail, such as accounts with multiple + // refs. + if !info.high_slot && storages_remaining + ancient_storages_required < low_threshold || ancient_storages_required as u64 > u64::from(tuning.max_resulting_storages) { self.all_infos.truncate(i); @@ -229,10 +237,15 @@ impl AncientSlotInfos { return; } - // sort by 'should_shrink' then smallest capacity to largest + // sort by: + // 1. `high_slot`: we want to include new, high slots each time so that we try new slots + // each time alg runs and have several high target slots for packed storages. + // 2. 'should_shrink' so we make progress on shrinking ancient storages + // 3. smallest capacity to largest so that we remove the most slots possible self.all_infos.sort_unstable_by(|l, r| { - r.should_shrink - .cmp(&l.should_shrink) + r.high_slot + .cmp(&l.high_slot) + .then_with(|| (r.should_shrink.cmp(&l.should_shrink))) .then_with(|| l.capacity.cmp(&r.capacity)) }); @@ -498,10 +511,20 @@ impl AccountsDb { ..AncientSlotInfos::default() }; let mut randoms = 0; + let max_slot = slots.iter().cloned().max().unwrap_or_default(); + // heuristic to include some # of newly eligible ancient slots so that the pack algorithm always makes progress + let high_slot_boundary = max_slot.saturating_sub(100); + let is_high_slot = |slot| slot >= high_slot_boundary; for slot in &slots { if let Some(storage) = self.storage.get_slot_storage_entry(*slot) { - if infos.add(*slot, storage, can_randomly_shrink, ideal_size) { + if infos.add( + *slot, + storage, + can_randomly_shrink, + ideal_size, + is_high_slot(*slot), + ) { randoms += 1; } } @@ -1107,6 +1130,7 @@ pub mod tests { let original_stores = (0..slots) .filter_map(|slot| db.storage.get_slot_storage_entry((slot as Slot) + slot1)) .collect::>(); + let high_slot = false; let slot_infos = original_stores .iter() .map(|storage| SlotInfo { @@ -1115,6 +1139,7 @@ pub mod tests { capacity: 0, alive_bytes: 0, should_shrink: false, + high_slot, }) .collect(); ( @@ -2381,6 +2406,7 @@ pub mod tests { let mut infos = AncientSlotInfos::default(); let storage = db.storage.get_slot_storage_entry(slot1).unwrap(); let alive_bytes_expected = storage.alive_bytes(); + let high_slot = false; match method { TestCollectInfo::Add => { // test lower level 'add' @@ -2389,6 +2415,7 @@ pub mod tests { Arc::clone(&storage), can_randomly_shrink, NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(), + high_slot, ); } TestCollectInfo::CalcAncientSlotInfo => { @@ -2447,12 +2474,14 @@ pub mod tests { let (db, slot1) = create_db_with_storages_and_index(alive, slots, None); let mut infos = AncientSlotInfos::default(); let storage = db.storage.get_slot_storage_entry(slot1).unwrap(); + let high_slot = false; if call_add { infos.add( slot1, Arc::clone(&storage), can_randomly_shrink, NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(), + high_slot, ); } else { infos = db.calc_ancient_slot_info( @@ -2630,6 +2659,7 @@ pub mod tests { capacity: 1, alive_bytes: 1, should_shrink: false, + high_slot: false, }) .collect(), shrink_indexes: (0..count).collect(), @@ -3214,6 +3244,7 @@ pub mod tests { capacity: info1_capacity, alive_bytes: 0, should_shrink: false, + high_slot: false, }; let info2 = SlotInfo { storage: storage.clone(), @@ -3221,6 +3252,7 @@ pub mod tests { capacity: 2, alive_bytes: 1, should_shrink: false, + high_slot: false, }; let mut infos = AncientSlotInfos { all_infos: vec![info1, info2], From 8ef608933bcf02f73ed636f628e252f4640cdd36 Mon Sep 17 00:00:00 2001 From: jeff washington Date: Thu, 13 Jun 2024 15:20:25 -0500 Subject: [PATCH 2/4] pr feedback --- accounts-db/src/ancient_append_vecs.rs | 33 +++++++++++++++----------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/accounts-db/src/ancient_append_vecs.rs b/accounts-db/src/ancient_append_vecs.rs index 686cdc45e768b4..c60332a806dde0 100644 --- a/accounts-db/src/ancient_append_vecs.rs +++ b/accounts-db/src/ancient_append_vecs.rs @@ -27,6 +27,10 @@ use { }, }; +/// this many # of highest slot values should be treated as desirable to pack. +/// This gives us high slots to move packed accounts into. +const HIGH_SLOT_OFFSET: u64 = 100; + /// ancient packing algorithm tuning per pass #[derive(Debug)] struct PackedAncientStorageTuning { @@ -59,7 +63,7 @@ struct SlotInfo { should_shrink: bool, /// this slot is a high slot # /// It is important to include some high slot #s so that we have new slots to try each time pack runs. - high_slot: bool, + is_high_slot: bool, } /// info for all storages in ancient slots @@ -86,7 +90,7 @@ impl AncientSlotInfos { storage: Arc, can_randomly_shrink: bool, ideal_size: NonZeroU64, - high_slot: bool, + is_high_slot: bool, ) -> bool { let mut was_randomly_shrunk = false; let alive_bytes = storage.alive_bytes() as u64; @@ -126,7 +130,7 @@ impl AncientSlotInfos { storage, alive_bytes, should_shrink, - high_slot, + is_high_slot, }); self.total_alive_bytes += alive_bytes; } @@ -212,8 +216,9 @@ impl AncientSlotInfos { // We do not stop including entries until we have dealt with all the high slot #s. This allows the algorithm to continue // to make progress each time it is called. There are exceptions that can cause the pack to fail, such as accounts with multiple // refs. - if !info.high_slot && storages_remaining + ancient_storages_required < low_threshold - || ancient_storages_required as u64 > u64::from(tuning.max_resulting_storages) + if !info.is_high_slot + && (storages_remaining + ancient_storages_required < low_threshold + || ancient_storages_required as u64 > u64::from(tuning.max_resulting_storages)) { self.all_infos.truncate(i); break; @@ -243,8 +248,8 @@ impl AncientSlotInfos { // 2. 'should_shrink' so we make progress on shrinking ancient storages // 3. smallest capacity to largest so that we remove the most slots possible self.all_infos.sort_unstable_by(|l, r| { - r.high_slot - .cmp(&l.high_slot) + r.is_high_slot + .cmp(&l.is_high_slot) .then_with(|| (r.should_shrink.cmp(&l.should_shrink))) .then_with(|| l.capacity.cmp(&r.capacity)) }); @@ -511,9 +516,9 @@ impl AccountsDb { ..AncientSlotInfos::default() }; let mut randoms = 0; - let max_slot = slots.iter().cloned().max().unwrap_or_default(); + let max_slot = slots.iter().max().cloned().unwrap_or_default(); // heuristic to include some # of newly eligible ancient slots so that the pack algorithm always makes progress - let high_slot_boundary = max_slot.saturating_sub(100); + let high_slot_boundary = max_slot.saturating_sub(HIGH_SLOT_OFFSET); let is_high_slot = |slot| slot >= high_slot_boundary; for slot in &slots { @@ -1130,7 +1135,7 @@ pub mod tests { let original_stores = (0..slots) .filter_map(|slot| db.storage.get_slot_storage_entry((slot as Slot) + slot1)) .collect::>(); - let high_slot = false; + let is_high_slot = false; let slot_infos = original_stores .iter() .map(|storage| SlotInfo { @@ -1139,7 +1144,7 @@ pub mod tests { capacity: 0, alive_bytes: 0, should_shrink: false, - high_slot, + is_high_slot, }) .collect(); ( @@ -2659,7 +2664,7 @@ pub mod tests { capacity: 1, alive_bytes: 1, should_shrink: false, - high_slot: false, + is_high_slot: false, }) .collect(), shrink_indexes: (0..count).collect(), @@ -3244,7 +3249,7 @@ pub mod tests { capacity: info1_capacity, alive_bytes: 0, should_shrink: false, - high_slot: false, + is_high_slot: false, }; let info2 = SlotInfo { storage: storage.clone(), @@ -3252,7 +3257,7 @@ pub mod tests { capacity: 2, alive_bytes: 1, should_shrink: false, - high_slot: false, + is_high_slot: false, }; let mut infos = AncientSlotInfos { all_infos: vec![info1, info2], From 88eb9f194af1005bd2a668c105d21903f48373ed Mon Sep 17 00:00:00 2001 From: jeff washington Date: Fri, 14 Jun 2024 12:48:20 -0500 Subject: [PATCH 3/4] remove extra () --- accounts-db/src/ancient_append_vecs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accounts-db/src/ancient_append_vecs.rs b/accounts-db/src/ancient_append_vecs.rs index c60332a806dde0..99c0457c9b858f 100644 --- a/accounts-db/src/ancient_append_vecs.rs +++ b/accounts-db/src/ancient_append_vecs.rs @@ -250,7 +250,7 @@ impl AncientSlotInfos { self.all_infos.sort_unstable_by(|l, r| { r.is_high_slot .cmp(&l.is_high_slot) - .then_with(|| (r.should_shrink.cmp(&l.should_shrink))) + .then_with(|| r.should_shrink.cmp(&l.should_shrink)) .then_with(|| l.capacity.cmp(&r.capacity)) }); From c577e5d876bc1d1544f544bcd4029269bd0077bc Mon Sep 17 00:00:00 2001 From: brooks Date: Fri, 14 Jun 2024 12:22:43 -0400 Subject: [PATCH 4/4] adds high slot tests --- accounts-db/src/ancient_append_vecs.rs | 100 ++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/accounts-db/src/ancient_append_vecs.rs b/accounts-db/src/ancient_append_vecs.rs index 99c0457c9b858f..adf6cabfc06c70 100644 --- a/accounts-db/src/ancient_append_vecs.rs +++ b/accounts-db/src/ancient_append_vecs.rs @@ -1111,12 +1111,13 @@ pub mod tests { append_vec::{aligned_stored_size, AppendVec, AppendVecStoredAccountMeta}, storable_accounts::{tests::build_accounts_from_storage, StorableAccountsBySlot}, }, + rand::seq::SliceRandom as _, solana_sdk::{ account::{AccountSharedData, ReadableAccount, WritableAccount}, hash::Hash, pubkey::Pubkey, }, - std::ops::Range, + std::{collections::HashSet, ops::Range}, strum::IntoEnumIterator, strum_macros::EnumIter, }; @@ -2708,7 +2709,7 @@ pub mod tests { } #[test] - fn test_filter_by_smaller_capacity_sort() { + fn test_filter_by_smallest_capacity_sort() { // max is 6 // 7 storages // storage[last] is big enough to cause us to need another storage @@ -2766,6 +2767,101 @@ pub mod tests { } } + /// Test that we always include the high slots when filtering which ancient infos to pack + /// + /// If we have *more* high slots than max resulting storages set in the tuning parameters, + /// we should still have all the high slots after calling `filter_by_smallest_capacity(). + #[test] + fn test_filter_by_smallest_capacity_high_slot_more() { + let tuning = default_tuning(); + + // Ensure we have more storages with high slots than the 'max resulting storages'. + let num_high_slots = tuning.max_resulting_storages.get() * 2; + let num_ancient_storages = num_high_slots * 3; + let mut infos = create_test_infos(num_ancient_storages as usize); + infos + .all_infos + .sort_unstable_by_key(|slot_info| slot_info.slot); + infos + .all_infos + .iter_mut() + .rev() + .take(num_high_slots as usize) + .for_each(|slot_info| { + slot_info.is_high_slot = true; + }); + let slots_expected: Vec<_> = infos + .all_infos + .iter() + .filter_map(|slot_info| slot_info.is_high_slot.then_some(slot_info.slot)) + .collect(); + + // shuffle the infos so they actually need to be sorted + infos.all_infos.shuffle(&mut thread_rng()); + infos.filter_by_smallest_capacity(&tuning); + + infos + .all_infos + .sort_unstable_by_key(|slot_info| slot_info.slot); + let slots_actual: Vec<_> = infos + .all_infos + .iter() + .map(|slot_info| slot_info.slot) + .collect(); + assert_eq!(infos.all_infos.len() as u64, num_high_slots); + assert_eq!(slots_actual, slots_expected); + } + + /// Test that we always include the high slots when filtering which ancient infos to pack + /// + /// If we have *less* high slots than max resulting storages set in the tuning parameters, + /// we should still have all the high slots after calling `filter_by_smallest_capacity(). + #[test] + fn test_filter_by_smallest_capacity_high_slot_less() { + let tuning = default_tuning(); + + // Ensure we have less storages with high slots than the 'max resulting storages'. + let num_high_slots = tuning.max_resulting_storages.get() / 2; + let num_ancient_storages = num_high_slots * 5; + let mut infos = create_test_infos(num_ancient_storages as usize); + infos + .all_infos + .sort_unstable_by_key(|slot_info| slot_info.slot); + infos + .all_infos + .iter_mut() + .rev() + .take(num_high_slots as usize) + .for_each(|slot_info| { + slot_info.is_high_slot = true; + }); + let high_slots: Vec<_> = infos + .all_infos + .iter() + .filter_map(|slot_info| slot_info.is_high_slot.then_some(slot_info.slot)) + .collect(); + + // shuffle the infos so they actually need to be sorted + infos.all_infos.shuffle(&mut thread_rng()); + infos.filter_by_smallest_capacity(&tuning); + + infos + .all_infos + .sort_unstable_by_key(|slot_info| slot_info.slot); + let slots_actual: HashSet<_> = infos + .all_infos + .iter() + .map(|slot_info| slot_info.slot) + .collect(); + assert_eq!( + infos.all_infos.len() as u64, + tuning.max_resulting_storages.get() - 1, + ); + assert!(high_slots + .iter() + .all(|high_slot| slots_actual.contains(high_slot))); + } + fn test(filter: bool, infos: &mut AncientSlotInfos, tuning: &PackedAncientStorageTuning) { if filter { infos.filter_by_smallest_capacity(tuning);