Skip to content

Commit

Permalink
repair coin index (#19142)
Browse files Browse the repository at this point in the history
temporary PR.
This PR introduces a background task to repair the `coin_index` and
remove any dangling entries. The PR will be active for one release and
will be reverted afterward.

The background task works by iterating over a snapshot of the
`coin_index`, identifying coins that no longer belong to their
respective owners, and populating a list of candidates for removal(some
entries might be benign)

Once the candidate list is populated, the task makes a second pass over
the candidates list. This time it locks the corresponding entries to
prevent potential races with concurrent writes. The task then reverifies
the eligibility criteria and removes the dangling entries
  • Loading branch information
phoenix-o authored Sep 5, 2024
1 parent 22844ae commit bb77882
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 4 deletions.
4 changes: 3 additions & 1 deletion crates/sui-core/src/authority.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::execution_cache::ExecutionCacheTraitPointers;
use crate::execution_cache::TransactionCacheRead;
use crate::rest_index::RestIndexStore;
use crate::transaction_outputs::TransactionOutputs;
use crate::verify_indexes::verify_indexes;
use crate::verify_indexes::{fix_indexes, verify_indexes};
use anyhow::anyhow;
use arc_swap::{ArcSwap, Guard};
use async_trait::async_trait;
Expand Down Expand Up @@ -2709,6 +2709,8 @@ impl AuthorityState {
validator_tx_finalizer,
});

let state_clone = Arc::downgrade(&state);
spawn_monitored_task!(fix_indexes(state_clone));
// Start a task to execute ready certificates.
let authority_state = Arc::downgrade(&state);
spawn_monitored_task!(execution_process(
Expand Down
60 changes: 60 additions & 0 deletions crates/sui-core/src/verify_indexes.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
// Copyright (c) Mysten Labs, Inc.
// SPDX-License-Identifier: Apache-2.0

use std::sync::Weak;
use std::{collections::BTreeMap, sync::Arc};

use anyhow::{anyhow, bail, Result};
use sui_storage::indexes::CoinIndexKey;
use sui_storage::{indexes::CoinInfo, IndexStore};
use sui_types::{base_types::ObjectInfo, object::Owner};
use tracing::info;
use typed_store::traits::Map;

use crate::authority::AuthorityState;
use crate::{authority::authority_store_tables::LiveObject, state_accumulator::AccumulatorStore};

/// This is a very expensive function that verifies some of the secondary indexes. This is done by
Expand Down Expand Up @@ -88,3 +91,60 @@ pub fn verify_indexes(store: &dyn AccumulatorStore, indexes: Arc<IndexStore>) ->

Ok(())
}

// temporary code to repair the coin index. This should be removed in the next release
pub async fn fix_indexes(authority_state: Weak<AuthorityState>) -> Result<()> {
let is_violation = |coin_index_key: &CoinIndexKey,
state: &Arc<AuthorityState>|
-> anyhow::Result<bool> {
if let Some(object) = state.get_object_store().get_object(&coin_index_key.2)? {
if matches!(object.owner, Owner::AddressOwner(real_owner_id) | Owner::ObjectOwner(real_owner_id) if coin_index_key.0 == real_owner_id)
{
return Ok(false);
}
}
Ok(true)
};

tracing::info!("Starting fixing coin index");
// populate candidate list without locking. Some entries are benign
let authority_state_clone = authority_state.clone();
let candidates = tokio::task::spawn_blocking(move || {
if let Some(authority) = authority_state_clone.upgrade() {
let mut batch = vec![];
if let Some(indexes) = &authority.indexes {
for (coin_index_key, _) in indexes.tables().coin_index().unbounded_iter() {
if is_violation(&coin_index_key, &authority)? {
batch.push(coin_index_key);
}
}
}
return Ok::<Vec<_>, anyhow::Error>(batch);
}
Ok(vec![])
})
.await??;

if let Some(authority) = authority_state.upgrade() {
if let Some(indexes) = &authority.indexes {
for chunk in candidates.chunks(100) {
let _locks = indexes
.caches
.locks
.acquire_locks(chunk.iter().map(|key| key.0))
.await;
let mut batch = vec![];
for key in chunk {
if is_violation(key, &authority)? {
batch.push(key);
}
}
let mut wb = indexes.tables().coin_index().batch();
wb.delete_batch(indexes.tables().coin_index(), batch)?;
wb.write()?;
}
}
}
tracing::info!("Finished fix for the coin index");
Ok(())
}
6 changes: 3 additions & 3 deletions crates/sui-storage/src/indexes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ use typed_store::traits::{TableSummary, TypedStoreDebug};
use typed_store::DBMapUtils;

type OwnerIndexKey = (SuiAddress, ObjectID);
type CoinIndexKey = (SuiAddress, String, ObjectID);
pub type CoinIndexKey = (SuiAddress, String, ObjectID);
type DynamicFieldKey = (ObjectID, ObjectID);
type EventId = (TxSequenceNumber, usize);
type EventIndex = (TransactionEventsDigest, TransactionDigest, u64);
Expand Down Expand Up @@ -129,7 +129,7 @@ impl IndexStoreMetrics {
pub struct IndexStoreCaches {
per_coin_type_balance: ShardedLruCache<(SuiAddress, TypeTag), SuiResult<TotalBalance>>,
all_balances: ShardedLruCache<SuiAddress, SuiResult<Arc<HashMap<TypeTag, TotalBalance>>>>,
locks: MutexTable<SuiAddress>,
pub locks: MutexTable<SuiAddress>,
}

#[derive(Default)]
Expand Down Expand Up @@ -229,7 +229,7 @@ impl IndexStoreTables {
pub struct IndexStore {
next_sequence_number: AtomicU64,
tables: IndexStoreTables,
caches: IndexStoreCaches,
pub caches: IndexStoreCaches,
metrics: Arc<IndexStoreMetrics>,
max_type_length: u64,
remove_deprecated_tables: bool,
Expand Down

0 comments on commit bb77882

Please sign in to comment.