From ef3533344c815ff12a92e5f8e34b0bd00fb8c1ba Mon Sep 17 00:00:00 2001 From: Jason Kim Date: Mon, 18 Oct 2021 18:41:21 -0700 Subject: [PATCH] [BACKPORT 2.8][#10304] docdb: fix deadlock in ProcessTabletReportBatch Summary: Since commit b14485a5fdd8220a23e4f685d84b44e501b3bb15 ([#8229] backup: repartition table if needed on YSQL restore), there is a rare deadlock issue between ProcessTabletReportBatch and RepartitionTable. It can be hit when - thread 1 (RepartitionTable): import a YSQL snapshot where the number of tablets for a table mismatch between the cluster and the external snapshot - thread 2 (ProcessTabletReportBatch): process tablets of that table from heartbeat A more in-depth sequence of steps follows: 1. t1: table->LockForWrite (WriteLock) 1. t2: table->LockForRead (ReadLock) 1. t1: tablet->StartMutation (WriteLock) 2. t1: table_lock.Commit (UpgradeToCommitLock; blocks on t2 ReadLock) 3. t2: tablet->LockForWrite (WriteLock; blocks on t1 WriteLock) To fix, for ProcessTabletReportBatch, take table write lock instead of read lock. The table metadata isn't mutated, so this is purely for deadlock avoidance reasons (since only one writer is allowed at a time). Bogdan thinks we should expect table write lock to be taken whenever tablet write lock is taken. Original Commit: https://github.com/yugabyte/yugabyte-db/commit/cc90f018e246ee16a6eb95f46cc6a4fcc92ff26c Original Differential Revision: https://phabricator.dev.yugabyte.com/D13459 Test Plan: ./yb_build.sh \ --cxx-test tools_yb-backup-test_ent \ --gtest_filter YBBackupTest.TestYSQLChangeDefaultNumTablets \ -n 1000 \ --tp 1 \ fastdebug Reviewers: nicolas Reviewed By: nicolas Subscribers: bogdan Differential Revision: https://phabricator.dev.yugabyte.com/D13730 --- src/yb/master/catalog_manager.cc | 18 ++++++++++-------- src/yb/master/catalog_manager.h | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/yb/master/catalog_manager.cc b/src/yb/master/catalog_manager.cc index 95f2412f77f1..70314503b3a0 100644 --- a/src/yb/master/catalog_manager.cc +++ b/src/yb/master/catalog_manager.cc @@ -5554,7 +5554,7 @@ bool CatalogManager::ProcessCommittedConsensusState( TSDescriptor* ts_desc, bool is_incremental, const ReportedTabletPB& report, - const TableInfo::ReadLock& table_lock, + const TableInfo::WriteLock& table_lock, const TabletInfoPtr& tablet, const TabletInfo::WriteLock& tablet_lock, std::vector* rpcs) { @@ -5753,13 +5753,15 @@ Status CatalogManager::ProcessTabletReportBatch( ReportedTablets::const_iterator end, TabletReportUpdatesPB* full_report_update, std::vector* rpcs) { - // 1. First Pass. Iterate in TabletId Order to discover all Table locks we'll need. + // 1. First Pass. Iterate in TabletId Order to discover all Table locks we'll need. Even though + // read locks are sufficient here, take write locks since we'll be writing to the tablet while + // holding this. // Need to acquire both types of locks in Id order to prevent deadlock. - std::map table_read_locks; + std::map table_write_locks; for (auto it = begin; it != end; ++it) { - auto& lock = table_read_locks[it->info->table()->id()]; + auto& lock = table_write_locks[it->info->table()->id()]; if (!lock.locked()) { - lock = it->info->table()->LockForRead(); + lock = it->info->table()->LockForWrite(); } } @@ -5780,7 +5782,7 @@ Status CatalogManager::ProcessTabletReportBatch( // Get tablet lock on demand. This works in the batch case because the loop is ordered. tablet_write_locks[tablet_id] = tablet->LockForWrite(); - auto& table_lock = table_read_locks[table->id()]; + auto& table_lock = table_write_locks[table->id()]; auto& tablet_lock = tablet_write_locks[tablet_id]; TRACE_EVENT1("master", "HandleReportedTablet", "tablet_id", report.tablet_id()); @@ -5887,10 +5889,10 @@ Status CatalogManager::ProcessTabletReportBatch( } // Finished one round of batch processing. // 7. Unlock the tables; we no longer need to access their state. - for (auto& l : table_read_locks) { + for (auto& l : table_write_locks) { l.second.Unlock(); } - table_read_locks.clear(); + table_write_locks.clear(); // 8. Write all tablet mutations to the catalog table. // diff --git a/src/yb/master/catalog_manager.h b/src/yb/master/catalog_manager.h index 9906374c5b6e..f0802116c452 100644 --- a/src/yb/master/catalog_manager.h +++ b/src/yb/master/catalog_manager.h @@ -1551,7 +1551,7 @@ class CatalogManager : TSDescriptor* ts_desc, bool is_incremental, const ReportedTabletPB& report, - const TableInfo::ReadLock& table_lock, + const TableInfo::WriteLock& table_lock, const TabletInfoPtr& tablet, const TabletInfo::WriteLock& tablet_lock, std::vector* rpcs);