From 804ddb82ce86ef4fdc1a929fc3ada6b83bb36bff Mon Sep 17 00:00:00 2001 From: Timur Yusupov Date: Wed, 26 Jun 2019 18:51:57 +0300 Subject: [PATCH] #1631: Fixed concurrent access to primary table info in RaftGroupMetadata Summary: Originally issue has been discovered as `RaftConsensusITest.TestAddRemoveVoter` random test failure in TSAN mode due to a data race. ``` WARNING: ThreadSanitizer: data race (pid=11050) 1663 [ts-2] Write of size 8 at 0x7b4c000603a8 by thread T51 (mutexes: write M3613): ... 1674 [ts-2] #10 yb::tablet::KvStoreInfo::LoadTablesFromPB(google::protobuf::RepeatedPtrField, string) src/yb/tablet/tablet_metadata.cc:170 1675 [ts-2] #11 yb::tablet::KvStoreInfo::LoadFromPB(yb::tablet::KvStoreInfoPB const&, string) src/yb/tablet/tablet_metadata.cc:189:10 1676 [ts-2] #12 yb::tablet::RaftGroupMetadata::LoadFromSuperBlock(yb::tablet::RaftGroupReplicaSuperBlockPB const&) src/yb/tablet/tablet_metadata.cc:508:5 1677 [ts-2] #13 yb::tablet::RaftGroupMetadata::ReplaceSuperBlock(yb::tablet::RaftGroupReplicaSuperBlockPB const&) src/yb/tablet/tablet_metadata.cc:545:3 1678 [ts-2] #14 yb::tserver::RemoteBootstrapClient::Finish() src/yb/tserver/remote_bootstrap_client.cc:486:3 ... Previous read of size 4 at 0x7b4c000603a8 by thread T16: 1697 [ts-2] #0 yb::tablet::RaftGroupMetadata::schema_version() const src/yb/tablet/tablet_metadata.h:251:34 1698 [ts-2] #1 yb::tserver::TSTabletManager::CreateReportedTabletPB(std::__1::shared_ptr const&, yb::master::ReportedTabletPB*) src/yb/tserver/ts_tablet_manager.cc:1323:71 1699 [ts-2] #2 yb::tserver::TSTabletManager::GenerateIncrementalTabletReport(yb::master::TabletReportPB*) src/yb/tserver/ts_tablet_manager.cc:1359:5 1700 [ts-2] #3 yb::tserver::Heartbeater::Thread::TryHeartbeat() src/yb/tserver/heartbeater.cc:371:32 1701 [ts-2] #4 yb::tserver::Heartbeater::Thread::DoHeartbeat() src/yb/tserver/heartbeater.cc:531:19 ``` The reason is that although `RaftGroupMetadata::schema_version()` is getting `TableInfo` pointer from `primary_table_info()` under mutex lock, but then it accesses its field without lock. Added `RaftGroupMetadata::primary_table_info_guarded()` private method which returns a pair of `TableInfo*` and `std::unique_lock` and used it in `RaftGroupMetadata::schema_version()` and other `RaftGroupMetadata` functions accessing primary table info fields. Test Plan: `ybd tsan --sj --cxx-test integration-tests_raft_consensus-itest --gtest_filter RaftConsensusITest.TestAddRemoveVoter -n 1000` Reviewers: bogdan, sergei Reviewed By: sergei Subscribers: ybase Differential Revision: https://phabricator.dev.yugabyte.com/D6813 --- src/yb/tablet/tablet_metadata.cc | 32 +++++++++++------------ src/yb/tablet/tablet_metadata.h | 45 ++++++++++++++++++++------------ 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/src/yb/tablet/tablet_metadata.cc b/src/yb/tablet/tablet_metadata.cc index 797a1208d542..bdf8b903c03d 100644 --- a/src/yb/tablet/tablet_metadata.cc +++ b/src/yb/tablet/tablet_metadata.cc @@ -321,7 +321,7 @@ CHECKED_STATUS MakeTableNotFound(const TableId& table_id, const RaftGroupId& raf } Result RaftGroupMetadata::GetTableInfo(const std::string& table_id) const { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); const auto& tables = kv_store_.tables; const auto id = !table_id.empty() ? table_id : primary_table_id_; const auto iter = tables.find(id); @@ -332,7 +332,7 @@ Result RaftGroupMetadata::GetTableInfo(const std::string& tabl } Result RaftGroupMetadata::GetTableInfo(const std::string& table_id) { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); const auto& tables = kv_store_.tables; const auto id = !table_id.empty() ? table_id : primary_table_id_; const auto iter = tables.find(id); @@ -356,7 +356,7 @@ Status RaftGroupMetadata::DeleteTabletData(TabletDataState delete_type, // We also set the state in our persisted metadata to indicate that // we have been deleted. { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); tablet_data_state_ = delete_type; if (last_logged_opid) { tombstone_last_logged_opid_ = last_logged_opid; @@ -403,7 +403,7 @@ Status RaftGroupMetadata::DeleteTabletData(TabletDataState delete_type, } Status RaftGroupMetadata::DeleteSuperBlock() { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); if (tablet_data_state_ != TABLET_DATA_DELETED) { return STATUS(IllegalState, Substitute("Tablet $0 is not in TABLET_DATA_DELETED state. " @@ -494,7 +494,7 @@ Status RaftGroupMetadata::LoadFromSuperBlock(const RaftGroupReplicaSuperBlockPB& << superblock.DebugString(); { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); // Verify that the Raft group id matches with the one in the protobuf. if (superblock.raft_group_id() != raft_group_id_) { @@ -527,7 +527,7 @@ Status RaftGroupMetadata::Flush() { MutexLock l_flush(flush_lock_); RaftGroupReplicaSuperBlockPB pb; { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); ToSuperBlockUnlocked(&pb); } RETURN_NOT_OK(ReplaceSuperBlockUnlocked(pb)); @@ -576,12 +576,12 @@ Status RaftGroupMetadata::ReadSuperBlockFromDisk(RaftGroupReplicaSuperBlockPB* s void RaftGroupMetadata::ToSuperBlock(RaftGroupReplicaSuperBlockPB* superblock) const { // acquire the lock so that rowsets_ doesn't get changed until we're finished. - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); ToSuperBlockUnlocked(superblock); } void RaftGroupMetadata::ToSuperBlockUnlocked(RaftGroupReplicaSuperBlockPB* superblock) const { - DCHECK(data_lock_.is_locked()); + DCHECK(data_mutex_.is_locked()); // Convert to protobuf. RaftGroupReplicaSuperBlockPB pb; pb.set_raft_group_id(raft_group_id_); @@ -605,12 +605,12 @@ void RaftGroupMetadata::SetSchema(const Schema& schema, const std::vector& deleted_cols, const uint32_t version) { DCHECK(schema.has_column_ids()); - std::unique_ptr new_table_info(new TableInfo(*primary_table_info(), + std::lock_guard lock(data_mutex_); + std::unique_ptr new_table_info(new TableInfo(*primary_table_info_unlocked(), schema, index_map, deleted_cols, version)); - std::lock_guard l(data_lock_); kv_store_.tables[primary_table_id_].swap(new_table_info); if (new_table_info) { kv_store_.old_tables.push_back(std::move(new_table_info)); @@ -618,14 +618,14 @@ void RaftGroupMetadata::SetSchema(const Schema& schema, } void RaftGroupMetadata::SetPartitionSchema(const PartitionSchema& partition_schema) { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); auto& tables = kv_store_.tables; DCHECK(tables.find(primary_table_id_) != tables.end()); tables[primary_table_id_]->partition_schema = partition_schema; } void RaftGroupMetadata::SetTableName(const string& table_name) { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); auto& tables = kv_store_.tables; DCHECK(tables.find(primary_table_id_) != tables.end()); tables[primary_table_id_]->table_name = table_name; @@ -653,14 +653,14 @@ void RaftGroupMetadata::AddTable(const std::string& table_id, CHECK_OK(cotable_id.FromHexString(table_id)); new_table_info->schema.set_cotable_id(cotable_id); } - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); auto& tables = kv_store_.tables; tables[table_id].swap(new_table_info); DCHECK(!new_table_info) << "table " << table_id << " already exists"; } void RaftGroupMetadata::RemoveTable(const std::string& table_id) { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); auto& tables = kv_store_.tables; tables.erase(table_id); } @@ -691,7 +691,7 @@ string RaftGroupMetadata::wal_root_dir() const { } void RaftGroupMetadata::set_tablet_data_state(TabletDataState state) { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); tablet_data_state_ = state; } @@ -700,7 +700,7 @@ string RaftGroupMetadata::LogPrefix() const { } TabletDataState RaftGroupMetadata::tablet_data_state() const { - std::lock_guard l(data_lock_); + std::lock_guard lock(data_mutex_); return tablet_data_state_; } diff --git a/src/yb/tablet/tablet_metadata.h b/src/yb/tablet/tablet_metadata.h index dc001a7978d7..2467d1a62a9e 100644 --- a/src/yb/tablet/tablet_metadata.h +++ b/src/yb/tablet/tablet_metadata.h @@ -228,63 +228,67 @@ class RaftGroupMetadata : public RefCountedThreadSafe { // Returns the name, type, schema, index map, schema, etc of the primary table. std::string table_name() const { DCHECK_NE(state_, kNotLoadedYet); - return primary_table_info()->table_name; + return primary_table_info_guarded().first->table_name; } TableType table_type() const { DCHECK_NE(state_, kNotLoadedYet); - return primary_table_info()->table_type; + return primary_table_info_guarded().first->table_type; } const Schema& schema() const { DCHECK_NE(state_, kNotLoadedYet); - return primary_table_info()->schema; + return primary_table_info_guarded().first->schema; } const IndexMap& index_map() const { DCHECK_NE(state_, kNotLoadedYet); - return primary_table_info()->index_map; + return primary_table_info_guarded().first->index_map; } uint32_t schema_version() const { DCHECK_NE(state_, kNotLoadedYet); - return primary_table_info()->schema_version; + return primary_table_info_guarded().first->schema_version; } const std::string& indexed_tablet_id() const { DCHECK_NE(state_, kNotLoadedYet); static const std::string kEmptyString = ""; - const auto* index_info = primary_table_info()->index_info.get(); + std::lock_guard lock(data_mutex_); + const auto* index_info = primary_table_info_unlocked()->index_info.get(); return index_info ? index_info->indexed_table_id() : kEmptyString; } bool is_local_index() const { DCHECK_NE(state_, kNotLoadedYet); - const auto* index_info = primary_table_info()->index_info.get(); + std::lock_guard lock(data_mutex_); + const auto* index_info = primary_table_info_unlocked()->index_info.get(); return index_info && index_info->is_local(); } bool is_unique_index() const { DCHECK_NE(state_, kNotLoadedYet); - const auto* index_info = primary_table_info()->index_info.get(); + std::lock_guard lock(data_mutex_); + const auto* index_info = primary_table_info_unlocked()->index_info.get(); return index_info && index_info->is_unique(); } std::vector index_key_column_ids() const { DCHECK_NE(state_, kNotLoadedYet); - const auto* index_info = primary_table_info()->index_info.get(); + std::lock_guard lock(data_mutex_); + const auto* index_info = primary_table_info_unlocked()->index_info.get(); return index_info ? index_info->index_key_column_ids() : std::vector(); } // Returns the partition schema of the Raft group's tables. const PartitionSchema& partition_schema() const { DCHECK_NE(state_, kNotLoadedYet); - return primary_table_info()->partition_schema; + return primary_table_info_guarded().first->partition_schema; } const std::vector& deleted_cols() const { DCHECK_NE(state_, kNotLoadedYet); - return primary_table_info()->deleted_cols; + return primary_table_info_guarded().first->deleted_cols; } std::string rocksdb_dir() const { return kv_store_.rocksdb_dir; } @@ -364,6 +368,8 @@ class RaftGroupMetadata : public RefCountedThreadSafe { CHECKED_STATUS ReplaceSuperBlock(const RaftGroupReplicaSuperBlockPB &pb); private: + typedef simple_spinlock MutexType; + friend class RefCountedThreadSafe; friend class MetadataTest; @@ -403,7 +409,7 @@ class RaftGroupMetadata : public RefCountedThreadSafe { // Requires 'flush_lock_'. CHECKED_STATUS ReplaceSuperBlockUnlocked(const RaftGroupReplicaSuperBlockPB &pb); - // Requires 'data_lock_'. + // Requires 'data_mutex_'. void ToSuperBlockUnlocked(RaftGroupReplicaSuperBlockPB* superblock) const; // Return standard "T xxx P yyy" log prefix. @@ -411,14 +417,20 @@ class RaftGroupMetadata : public RefCountedThreadSafe { // Return a pointer to the primary table info. This pointer will be valid until the // RaftGroupMetadata is destructed, even if the schema is changed. - const TableInfo* primary_table_info() const { - std::lock_guard l(data_lock_); + const TableInfo* primary_table_info_unlocked() const { const auto& tables = kv_store_.tables; const auto itr = tables.find(primary_table_id_); DCHECK(itr != tables.end()); return itr->second.get(); } + // Return a pair of a pointer to the primary table info and lock guard. The pointer will be valid + // until the RaftGroupMetadata is destructed, even if the schema is changed. + std::pair> primary_table_info_guarded() const { + std::unique_lock lock(data_mutex_); + return { primary_table_info_unlocked(), std::move(lock) }; + } + enum State { kNotLoadedYet, kNotWrittenYet, @@ -427,11 +439,10 @@ class RaftGroupMetadata : public RefCountedThreadSafe { State state_; // Lock protecting the underlying data. - typedef simple_spinlock LockType; - mutable LockType data_lock_; + mutable MutexType data_mutex_; // Lock protecting flushing the data to disk. - // If taken together with 'data_lock_', must be acquired first. + // If taken together with 'data_mutex_', must be acquired first. mutable Mutex flush_lock_; const RaftGroupId raft_group_id_;