Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

always pack a few newest ancient slots #1730

Merged
merged 4 commits into from
Jun 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 145 additions & 12 deletions accounts-db/src/ancient_append_vecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ use {
},
};

/// this many # of highest slot values should be treated as desirable to pack.
/// This gives us high slots to move packed accounts into.
const HIGH_SLOT_OFFSET: u64 = 100;

/// ancient packing algorithm tuning per pass
#[derive(Debug)]
struct PackedAncientStorageTuning {
Expand Down Expand Up @@ -57,6 +61,9 @@ struct SlotInfo {
alive_bytes: u64,
/// true if this should be shrunk due to ratio
should_shrink: bool,
/// this slot is a high slot #
/// It is important to include some high slot #s so that we have new slots to try each time pack runs.
is_high_slot: bool,
}

/// info for all storages in ancient slots
Expand All @@ -83,6 +90,7 @@ impl AncientSlotInfos {
storage: Arc<AccountStorageEntry>,
can_randomly_shrink: bool,
ideal_size: NonZeroU64,
is_high_slot: bool,
) -> bool {
let mut was_randomly_shrunk = false;
let alive_bytes = storage.alive_bytes() as u64;
Expand Down Expand Up @@ -122,6 +130,7 @@ impl AncientSlotInfos {
storage,
alive_bytes,
should_shrink,
is_high_slot,
});
self.total_alive_bytes += alive_bytes;
}
Expand Down Expand Up @@ -198,14 +207,18 @@ impl AncientSlotInfos {
let storages_remaining = total_storages - i - 1;

// if the remaining uncombined storages and the # of resulting
// combined ancient storages is less than the threshold, then
// combined ancient storages are less than the threshold, then
// we've gone too far, so get rid of this entry and all after it.
// Every storage after this one is larger.
// Every storage after this one is larger than the ones we've chosen.
// if we ever get to more than `max_resulting_storages` required ancient storages, that is enough to stop for now.
// It will take a while to create that many. This should be a limit that only affects
// extreme testing environments.
if storages_remaining + ancient_storages_required < low_threshold
|| ancient_storages_required as u64 > u64::from(tuning.max_resulting_storages)
// It will take a lot of time for the pack algorithm to create that many, and that is bad for system performance.
// This should be a limit that only affects extreme testing environments.
// We do not stop including entries until we have dealt with all the high slot #s. This allows the algorithm to continue
// to make progress each time it is called. There are exceptions that can cause the pack to fail, such as accounts with multiple
// refs.
if !info.is_high_slot
&& (storages_remaining + ancient_storages_required < low_threshold
|| ancient_storages_required as u64 > u64::from(tuning.max_resulting_storages))
{
self.all_infos.truncate(i);
break;
Expand All @@ -229,10 +242,15 @@ impl AncientSlotInfos {
return;
}

// sort by 'should_shrink' then smallest capacity to largest
// sort by:
// 1. `high_slot`: we want to include new, high slots each time so that we try new slots
// each time alg runs and have several high target slots for packed storages.
// 2. 'should_shrink' so we make progress on shrinking ancient storages
// 3. smallest capacity to largest so that we remove the most slots possible
self.all_infos.sort_unstable_by(|l, r| {
r.should_shrink
.cmp(&l.should_shrink)
r.is_high_slot
.cmp(&l.is_high_slot)
.then_with(|| r.should_shrink.cmp(&l.should_shrink))
.then_with(|| l.capacity.cmp(&r.capacity))
});

Expand Down Expand Up @@ -498,10 +516,20 @@ impl AccountsDb {
..AncientSlotInfos::default()
};
let mut randoms = 0;
let max_slot = slots.iter().max().cloned().unwrap_or_default();
// heuristic to include some # of newly eligible ancient slots so that the pack algorithm always makes progress
let high_slot_boundary = max_slot.saturating_sub(HIGH_SLOT_OFFSET);
let is_high_slot = |slot| slot >= high_slot_boundary;

for slot in &slots {
if let Some(storage) = self.storage.get_slot_storage_entry(*slot) {
if infos.add(*slot, storage, can_randomly_shrink, ideal_size) {
if infos.add(
*slot,
storage,
can_randomly_shrink,
ideal_size,
is_high_slot(*slot),
) {
randoms += 1;
}
}
Expand Down Expand Up @@ -1083,12 +1111,13 @@ pub mod tests {
append_vec::{aligned_stored_size, AppendVec, AppendVecStoredAccountMeta},
storable_accounts::{tests::build_accounts_from_storage, StorableAccountsBySlot},
},
rand::seq::SliceRandom as _,
solana_sdk::{
account::{AccountSharedData, ReadableAccount, WritableAccount},
hash::Hash,
pubkey::Pubkey,
},
std::ops::Range,
std::{collections::HashSet, ops::Range},
strum::IntoEnumIterator,
strum_macros::EnumIter,
};
Expand All @@ -1107,6 +1136,7 @@ pub mod tests {
let original_stores = (0..slots)
.filter_map(|slot| db.storage.get_slot_storage_entry((slot as Slot) + slot1))
.collect::<Vec<_>>();
let is_high_slot = false;
let slot_infos = original_stores
.iter()
.map(|storage| SlotInfo {
Expand All @@ -1115,6 +1145,7 @@ pub mod tests {
capacity: 0,
alive_bytes: 0,
should_shrink: false,
is_high_slot,
})
.collect();
(
Expand Down Expand Up @@ -2381,6 +2412,7 @@ pub mod tests {
let mut infos = AncientSlotInfos::default();
let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
let alive_bytes_expected = storage.alive_bytes();
let high_slot = false;
match method {
TestCollectInfo::Add => {
// test lower level 'add'
Expand All @@ -2389,6 +2421,7 @@ pub mod tests {
Arc::clone(&storage),
can_randomly_shrink,
NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(),
high_slot,
);
}
TestCollectInfo::CalcAncientSlotInfo => {
Expand Down Expand Up @@ -2447,12 +2480,14 @@ pub mod tests {
let (db, slot1) = create_db_with_storages_and_index(alive, slots, None);
let mut infos = AncientSlotInfos::default();
let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
let high_slot = false;
if call_add {
infos.add(
slot1,
Arc::clone(&storage),
can_randomly_shrink,
NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(),
high_slot,
);
} else {
infos = db.calc_ancient_slot_info(
Expand Down Expand Up @@ -2630,6 +2665,7 @@ pub mod tests {
capacity: 1,
alive_bytes: 1,
should_shrink: false,
is_high_slot: false,
})
.collect(),
shrink_indexes: (0..count).collect(),
Expand Down Expand Up @@ -2673,7 +2709,7 @@ pub mod tests {
}

#[test]
fn test_filter_by_smaller_capacity_sort() {
fn test_filter_by_smallest_capacity_sort() {
// max is 6
// 7 storages
// storage[last] is big enough to cause us to need another storage
Expand Down Expand Up @@ -2731,6 +2767,101 @@ pub mod tests {
}
}

/// Test that we always include the high slots when filtering which ancient infos to pack
///
/// If we have *more* high slots than max resulting storages set in the tuning parameters,
/// we should still have all the high slots after calling `filter_by_smallest_capacity().
#[test]
fn test_filter_by_smallest_capacity_high_slot_more() {
let tuning = default_tuning();

// Ensure we have more storages with high slots than the 'max resulting storages'.
let num_high_slots = tuning.max_resulting_storages.get() * 2;
let num_ancient_storages = num_high_slots * 3;
let mut infos = create_test_infos(num_ancient_storages as usize);
infos
.all_infos
.sort_unstable_by_key(|slot_info| slot_info.slot);
infos
.all_infos
.iter_mut()
.rev()
.take(num_high_slots as usize)
.for_each(|slot_info| {
slot_info.is_high_slot = true;
});
let slots_expected: Vec<_> = infos
.all_infos
.iter()
.filter_map(|slot_info| slot_info.is_high_slot.then_some(slot_info.slot))
.collect();

// shuffle the infos so they actually need to be sorted
infos.all_infos.shuffle(&mut thread_rng());
infos.filter_by_smallest_capacity(&tuning);

infos
.all_infos
.sort_unstable_by_key(|slot_info| slot_info.slot);
let slots_actual: Vec<_> = infos
.all_infos
.iter()
.map(|slot_info| slot_info.slot)
.collect();
assert_eq!(infos.all_infos.len() as u64, num_high_slots);
assert_eq!(slots_actual, slots_expected);
}

/// Test that we always include the high slots when filtering which ancient infos to pack
///
/// If we have *less* high slots than max resulting storages set in the tuning parameters,
/// we should still have all the high slots after calling `filter_by_smallest_capacity().
#[test]
fn test_filter_by_smallest_capacity_high_slot_less() {
let tuning = default_tuning();

// Ensure we have less storages with high slots than the 'max resulting storages'.
let num_high_slots = tuning.max_resulting_storages.get() / 2;
let num_ancient_storages = num_high_slots * 5;
let mut infos = create_test_infos(num_ancient_storages as usize);
infos
.all_infos
.sort_unstable_by_key(|slot_info| slot_info.slot);
infos
.all_infos
.iter_mut()
.rev()
.take(num_high_slots as usize)
.for_each(|slot_info| {
slot_info.is_high_slot = true;
});
let high_slots: Vec<_> = infos
.all_infos
.iter()
.filter_map(|slot_info| slot_info.is_high_slot.then_some(slot_info.slot))
.collect();

// shuffle the infos so they actually need to be sorted
infos.all_infos.shuffle(&mut thread_rng());
infos.filter_by_smallest_capacity(&tuning);

infos
.all_infos
.sort_unstable_by_key(|slot_info| slot_info.slot);
let slots_actual: HashSet<_> = infos
.all_infos
.iter()
.map(|slot_info| slot_info.slot)
.collect();
assert_eq!(
infos.all_infos.len() as u64,
tuning.max_resulting_storages.get() - 1,
);
assert!(high_slots
.iter()
.all(|high_slot| slots_actual.contains(high_slot)));
}

fn test(filter: bool, infos: &mut AncientSlotInfos, tuning: &PackedAncientStorageTuning) {
if filter {
infos.filter_by_smallest_capacity(tuning);
Expand Down Expand Up @@ -3214,13 +3345,15 @@ pub mod tests {
capacity: info1_capacity,
alive_bytes: 0,
should_shrink: false,
is_high_slot: false,
};
let info2 = SlotInfo {
storage: storage.clone(),
slot,
capacity: 2,
alive_bytes: 1,
should_shrink: false,
is_high_slot: false,
};
let mut infos = AncientSlotInfos {
all_infos: vec![info1, info2],
Expand Down