diff --git a/storage_controller/src/id_lock_map.rs b/storage_controller/src/id_lock_map.rs index dff793289f1b..fcd3eb57e25f 100644 --- a/storage_controller/src/id_lock_map.rs +++ b/storage_controller/src/id_lock_map.rs @@ -8,14 +8,15 @@ use crate::service::RECONCILE_TIMEOUT; const LOCK_TIMEOUT_ALERT_THRESHOLD: Duration = RECONCILE_TIMEOUT; -/// A wrapper around `OwnedRwLockWriteGuard` that when dropped changes the -/// current holding operation in lock. -pub struct WrappedWriteGuard { +/// A wrapper around `OwnedRwLockWriteGuard` used for tracking the +/// operation that holds the lock, and print a warning if it exceeds +/// the LOCK_TIMEOUT_ALERT_THRESHOLD time +pub struct TracingExclusiveGuard { guard: tokio::sync::OwnedRwLockWriteGuard>, start: Instant, } -impl WrappedWriteGuard { +impl TracingExclusiveGuard { pub fn new(guard: tokio::sync::OwnedRwLockWriteGuard>) -> Self { Self { guard, @@ -24,12 +25,12 @@ impl WrappedWriteGuard { } } -impl Drop for WrappedWriteGuard { +impl Drop for TracingExclusiveGuard { fn drop(&mut self) { let duration = self.start.elapsed(); if duration > LOCK_TIMEOUT_ALERT_THRESHOLD { tracing::warn!( - "Lock on {} was held for {:?}", + "Exclusive lock by {} was held for {:?}", self.guard.as_ref().unwrap(), duration ); @@ -38,6 +39,38 @@ impl Drop for WrappedWriteGuard { } } +// A wrapper around `OwnedRwLockReadGuard` used for tracking the +/// operation that holds the lock, and print a warning if it exceeds +/// the LOCK_TIMEOUT_ALERT_THRESHOLD time +pub struct TracingSharedGuard { + _guard: tokio::sync::OwnedRwLockReadGuard>, + operation: T, + start: Instant, +} + +impl TracingSharedGuard { + pub fn new(guard: tokio::sync::OwnedRwLockReadGuard>, operation: T) -> Self { + Self { + _guard: guard, + operation, + start: Instant::now(), + } + } +} + +impl Drop for TracingSharedGuard { + fn drop(&mut self) { + let duration = self.start.elapsed(); + if duration > LOCK_TIMEOUT_ALERT_THRESHOLD { + tracing::warn!( + "Shared lock by {} was held for {:?}", + self.operation, + duration + ); + } + } +} + /// A map of locks covering some arbitrary identifiers. Useful if you have a collection of objects but don't /// want to embed a lock in each one, or if your locking granularity is different to your object granularity. /// For example, used in the storage controller where the objects are tenant shards, but sometimes locking @@ -58,21 +91,22 @@ where pub(crate) fn shared( &self, key: T, - ) -> impl std::future::Future>> { + operation: I, + ) -> impl std::future::Future> { let mut locked = self.entities.lock().unwrap(); - let entry = locked.entry(key).or_default(); - entry.clone().read_owned() + let entry = locked.entry(key).or_default().clone(); + async move { TracingSharedGuard::new(entry.read_owned().await, operation) } } pub(crate) fn exclusive( &self, key: T, operation: I, - ) -> impl std::future::Future> { + ) -> impl std::future::Future> { let mut locked = self.entities.lock().unwrap(); let entry = locked.entry(key).or_default().clone(); async move { - let mut guard = WrappedWriteGuard::new(entry.clone().write_owned().await); + let mut guard = TracingExclusiveGuard::new(entry.write_owned().await); *guard.guard = Some(operation); guard } @@ -99,12 +133,12 @@ where pub async fn trace_exclusive_lock< T: Clone + Display + Eq + PartialEq + std::hash::Hash, - I: Display + Clone, + I: Clone + Display, >( op_locks: &IdLockMap, key: T, operation: I, -) -> WrappedWriteGuard { +) -> TracingExclusiveGuard { let start = Instant::now(); let guard = op_locks.exclusive(key.clone(), operation.clone()).await; @@ -123,14 +157,14 @@ pub async fn trace_exclusive_lock< pub async fn trace_shared_lock< T: Clone + Display + Eq + PartialEq + std::hash::Hash, - I: Display, + I: Clone + Display, >( op_locks: &IdLockMap, key: T, operation: I, -) -> tokio::sync::OwnedRwLockReadGuard> { +) -> TracingSharedGuard { let start = Instant::now(); - let guard = op_locks.shared(key.clone()).await; + let guard = op_locks.shared(key.clone(), operation.clone()).await; let duration = start.elapsed(); if duration > LOCK_TIMEOUT_ALERT_THRESHOLD { @@ -159,11 +193,11 @@ mod tests { async fn multiple_shared_locks() { let id_lock_map: IdLockMap = IdLockMap::default(); - let shared_lock_1 = id_lock_map.shared(1).await; - let shared_lock_2 = id_lock_map.shared(1).await; + let shared_lock_1 = id_lock_map.shared(1, Operations::Op1).await; + let shared_lock_2 = id_lock_map.shared(1, Operations::Op2).await; - assert!(shared_lock_1.is_none()); - assert!(shared_lock_2.is_none()); + assert_eq!(shared_lock_1.operation, Operations::Op1); + assert_eq!(shared_lock_2.operation, Operations::Op2); } #[tokio::test] @@ -183,7 +217,7 @@ mod tests { assert!(_ex_lock_2.is_err()); } - let shared_lock_1 = id_lock_map.shared(resource_id).await; - assert!(shared_lock_1.is_none()); + let shared_lock_1 = id_lock_map.shared(resource_id, Operations::Op1).await; + assert_eq!(shared_lock_1.operation, Operations::Op1); } } diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 1e81b5c5a2ca..3451ffb1f489 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -9,7 +9,7 @@ use std::{ use crate::{ compute_hook::NotifyError, - id_lock_map::{trace_exclusive_lock, trace_shared_lock, IdLockMap, WrappedWriteGuard}, + id_lock_map::{trace_exclusive_lock, trace_shared_lock, IdLockMap, TracingExclusiveGuard}, persistence::{AbortShardSplitStatus, TenantFilter}, reconciler::{ReconcileError, ReconcileUnits}, scheduler::{ScheduleContext, ScheduleMode}, @@ -334,7 +334,7 @@ struct TenantShardSplitAbort { new_shard_count: ShardCount, new_stripe_size: Option, /// Until this abort op is complete, no other operations may be done on the tenant - _tenant_lock: WrappedWriteGuard, + _tenant_lock: TracingExclusiveGuard, } #[derive(thiserror::Error, Debug)] @@ -1371,7 +1371,7 @@ impl Service { async fn node_activate_reconcile( &self, mut node: Node, - _lock: &WrappedWriteGuard, + _lock: &TracingExclusiveGuard, ) -> Result<(), ApiError> { // This Node is a mutable local copy: we will set it active so that we can use its // API client to reconcile with the node. The Node in [`Self::nodes`] will get updated @@ -2560,6 +2560,7 @@ impl Service { TenantOperations::TimelineCreate, ) .await; + failpoint_support::sleep_millis_async!("tenant-create-timeline-shared-lock"); self.ensure_attached_wait(tenant_id).await?; diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 2031feaa8307..d8cee7388103 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -1359,7 +1359,8 @@ def test_lock_time_tracing(neon_env_builder: NeonEnvBuilder): tenant_id = env.initial_tenant env.storage_controller.allowed_errors.extend( [ - ".*Lock on.*", + ".*Exclusive lock by.*", + ".*Shared lock by.*", ".*Scheduling is disabled by policy.*", f".*Operation TimelineCreate on key {tenant_id} has waited.*", ] @@ -1391,11 +1392,25 @@ def update_tenent_policy(): ) thread_update_tenant_policy.join() - env.storage_controller.assert_log_contains("Lock on UpdatePolicy was held for") - env.storage_controller.assert_log_contains( + env.storage_controller.assert_log_contains("Exclusive lock by UpdatePolicy was held for") + _, last_log_cursor = env.storage_controller.assert_log_contains( f"Operation TimelineCreate on key {tenant_id} has waited" ) + # Test out shared lock + env.storage_controller.configure_failpoints( + ("tenant-create-timeline-shared-lock", "return(31000)") + ) + + timeline_id = TimelineId.generate() + # This will hold the shared lock for enough time to cause an warning + env.storage_controller.pageserver_api().timeline_create( + pg_version=PgVersion.NOT_SET, tenant_id=tenant_id, new_timeline_id=timeline_id + ) + env.storage_controller.assert_log_contains( + "Shared lock by TimelineCreate was held for", offset=last_log_cursor + ) + @pytest.mark.parametrize("remote_storage", [RemoteStorageKind.LOCAL_FS, s3_storage()]) @pytest.mark.parametrize("shard_count", [None, 4])