Skip to content

Commit

Permalink
[#14873] docdb: follower should not check schema when preparing becau…
Browse files Browse the repository at this point in the history
…se previous change schema ops might not be applied yet

Summary:
The following script might crash a follower with check failure: Table <unknown_table_name> (000033e8000030008000000000004001) not found in Raft group
```
CREATE TABLEGROUP tg1;
-- Sleep here for 30 seconds. Giving enough time for bootstrap solve this issue.
CREATE TABLE test_tbl (
  h INT PRIMARY KEY,
  a INT,
  b FLOAT CONSTRAINT test_tbl_uniq UNIQUE WITH (colocation_id=654321)
) WITH (colocation_id=123456) TABLEGROUP tg1;
CREATE INDEX test_tbl_idx ON test_tbl (a ASC) WITH (colocation_id=11223344);
```

The failed scenario is:

The failing follower has replicated the operation to add table, but not applied.
When replicating the alter schema operation, inside CreatePreparedChangeMetadata,
it's doing some check, including GetKeySchema to get info of the previously added table.
Since the previous operation is not done yet, it fails to get it.

Test Plan:
PgLibPqTest.CreateTablesToTablegroup
PgIndexBackfillTest.Tablegroup
PgLibPqTest.ColocatedTablegroups

Reviewers: yguan, rthallam, amitanand, skedia, sergei

Reviewed By: sergei

Subscribers: mbautin, alex, jason, ybase, bogdan

Differential Revision: https://phabricator.dev.yugabyte.com/D21065
  • Loading branch information
Huqicheng committed Dec 13, 2022
1 parent 70fbb38 commit a5455f3
Show file tree
Hide file tree
Showing 24 changed files with 83 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ LWAutoFlagsConfigPB* RequestTraits<LWAutoFlagsConfigPB>::MutableRequest(
return replicate->mutable_auto_flags_config();
}

Status ChangeAutoFlagsConfigOperation::Prepare() {
Status ChangeAutoFlagsConfigOperation::Prepare(IsLeaderSide is_leader_side) {
VLOG_WITH_PREFIX_AND_FUNC(2);
return Status::OK();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class ChangeAutoFlagsConfigOperation
Status Apply();

private:
Status Prepare() override;
Status Prepare(IsLeaderSide is_leader_side) override;
Status DoReplicated(int64_t leader_term, Status* complete_status) override;
Status DoAborted(const Status& status) override;
};
Expand Down
14 changes: 12 additions & 2 deletions src/yb/tablet/operations/change_metadata_operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@
#include "yb/util/status_format.h"
#include "yb/util/trace.h"

DEFINE_test_flag(bool, ignore_apply_change_metadata_on_followers, false,
"Used in tests to ignore applying change metadata operation"
" on followers.");

using std::string;

namespace yb {
Expand Down Expand Up @@ -92,7 +96,7 @@ string ChangeMetadataOperation::ToString() const {
hybrid_time_even_if_unset(), schema_, request());
}

Status ChangeMetadataOperation::Prepare() {
Status ChangeMetadataOperation::Prepare(IsLeaderSide is_leader_side) {
TRACE("PREPARE CHANGE-METADATA: Starting");

// Decode schema
Expand All @@ -107,7 +111,8 @@ Status ChangeMetadataOperation::Prepare() {
}

TabletPtr tablet = VERIFY_RESULT(tablet_safe());
RETURN_NOT_OK(tablet->CreatePreparedChangeMetadata(this, schema_holder_.get()));
RETURN_NOT_OK(tablet->CreatePreparedChangeMetadata(
this, schema_holder_.get(), is_leader_side));

SetIndexes(ToRepeatedPtrField(request()->indexes()));

Expand All @@ -116,6 +121,11 @@ Status ChangeMetadataOperation::Prepare() {
}

Status ChangeMetadataOperation::DoReplicated(int64_t leader_term, Status* complete_status) {
if (PREDICT_FALSE(FLAGS_TEST_ignore_apply_change_metadata_on_followers)) {
LOG_WITH_PREFIX(INFO) << "Ignoring apply of change metadata ops on followers";
return Status::OK();
}

TRACE("APPLY CHANGE-METADATA: Starting");

TabletPtr tablet = VERIFY_RESULT(tablet_safe());
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/change_metadata_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class ChangeMetadataOperation
//
// TODO: need a schema lock?

Status Prepare() override;
Status Prepare(IsLeaderSide is_leader_side) override;

private:
// Starts the ChangeMetadataOperation by assigning it a timestamp.
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/history_cutoff_operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Status HistoryCutoffOperation::Apply(int64_t leader_term) {
return Status::OK();
}

Status HistoryCutoffOperation::Prepare() {
Status HistoryCutoffOperation::Prepare(IsLeaderSide is_leader_side) {
VLOG_WITH_PREFIX(2) << "Prepare";
return Status::OK();
}
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/history_cutoff_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class HistoryCutoffOperation
Status Apply(int64_t leader_term);

private:
Status Prepare() override;
Status Prepare(IsLeaderSide is_leader_side) override;
Status DoReplicated(int64_t leader_term, Status* complete_status) override;
Status DoAborted(const Status& status) override;
};
Expand Down
3 changes: 2 additions & 1 deletion src/yb/tablet/operations/operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ YB_DEFINE_ENUM(
((kChangeAutoFlagsConfig, consensus::CHANGE_AUTO_FLAGS_CONFIG_OP)));

YB_STRONGLY_TYPED_BOOL(WasPending);
YB_STRONGLY_TYPED_BOOL(IsLeaderSide);

// Base class for transactions. There are different implementations for different types (Write,
// AlterSchema, etc.) OperationDriver implementations use Operations along with Consensus to execute
Expand All @@ -97,7 +98,7 @@ class Operation {
// Executes the prepare phase of this transaction. The actual actions of this phase depend on the
// transaction type, but usually are limited to what can be done without actually changing shared
// data structures (such as the RocksDB memtable) and without side-effects.
virtual Status Prepare() = 0;
virtual Status Prepare(IsLeaderSide is_leader_side) = 0;

// Applies replicated operation, the actual actions of this phase depend on the
// operation type, but usually this is the method where data-structures are changed.
Expand Down
6 changes: 3 additions & 3 deletions src/yb/tablet/operations/operation_driver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ void OperationDriver::AddedToLeader(const OpId& op_id, const OpId& committed_op_

void OperationDriver::PrepareAndStartTask() {
TRACE_EVENT_FLOW_END0("operation", "PrepareAndStartTask", this);
Status prepare_status = PrepareAndStart();
Status prepare_status = PrepareAndStart(IsLeaderSide::kFalse);
if (PREDICT_FALSE(!prepare_status.ok())) {
HandleFailure(prepare_status);
}
Expand All @@ -223,14 +223,14 @@ bool OperationDriver::StartOperation() {
return true;
}

Status OperationDriver::PrepareAndStart() {
Status OperationDriver::PrepareAndStart(IsLeaderSide is_leader_side) {
ADOPT_TRACE(trace());
TRACE_EVENT1("operation", "PrepareAndStart", "operation", this);
VLOG_WITH_PREFIX(4) << "PrepareAndStart()";
// Actually prepare and start the operation.
prepare_physical_hybrid_time_ = GetMonoTimeMicros();
if (operation_) {
RETURN_NOT_OK(operation_->Prepare());
RETURN_NOT_OK(operation_->Prepare(is_leader_side));
}

// Only take the lock long enough to take a local copy of the
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/operation_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ class OperationDriver : public RefCountedThreadSafe<OperationDriver>,
// Actually prepare and start. In case of leader-side operations, this stops short of calling
// Consensus::Replicate, which is the responsibility of the caller. This is being done so that
// we can append multiple rounds to the consensus queue together.
Status PrepareAndStart();
Status PrepareAndStart(IsLeaderSide is_leader_side = IsLeaderSide::kTrue);

// The task used to be submitted to the prepare threadpool to prepare and start the operation.
// If PrepareAndStart() fails, calls HandleFailure. Since 07/07/2017 this is being used for
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/operation_tracker-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class OperationTrackerTest : public YBTest {
return std::make_shared<consensus::ReplicateMsg>();
}

Status Prepare() override { return Status::OK(); }
Status Prepare(IsLeaderSide is_leader_side) override { return Status::OK(); }
void Start() override {}
Status Apply() override {
return Status::OK();
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/snapshot_operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ Status SnapshotOperation::CheckOperationAllowed(
// SnapshotOperation
// ------------------------------------------------------------------------------------------------

Status SnapshotOperation::Prepare() {
Status SnapshotOperation::Prepare(IsLeaderSide is_leader_side) {
TRACE("PREPARE SNAPSHOT: Starting");
RETURN_NOT_OK(tablet()->snapshots().Prepare(this));

Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/snapshot_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class SnapshotOperation :

static Status RejectionStatus(OpId rejected_op_id, consensus::OperationType op_type);

Status Prepare() override;
Status Prepare(IsLeaderSide is_leader_side) override;

private:
// Starts the TabletSnapshotOp operation by assigning it a timestamp.
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/split_operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ Status SplitOperation::CheckOperationAllowed(
request()->new_tablet2_id().ToBuffer());
}

Status SplitOperation::Prepare() {
Status SplitOperation::Prepare(IsLeaderSide is_leader_side) {
VLOG_WITH_PREFIX(2) << "Prepare";
return Status::OK();
}
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/split_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class SplitOperation
const TabletId& child1, const TabletId& child2);

private:
Status Prepare() override;
Status Prepare(IsLeaderSide is_leader_side) override;
Status DoReplicated(int64_t leader_term, Status* complete_status) override;
Status DoAborted(const Status& status) override;
void AddedAsPending() override;
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/truncate_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class TruncateOperation : public OperationBase<OperationType::kTruncate, LWTrunc
explicit TruncateOperation(Args&&... args)
: OperationBase(std::forward<Args>(args)...) {}

Status Prepare() override { return Status::OK(); }
Status Prepare(IsLeaderSide is_leader_side) override { return Status::OK(); }

private:
// Starts the TruncateOperation by assigning it a timestamp.
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/update_txn_operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ LWTransactionStatePB* RequestTraits<LWTransactionStatePB>::MutableRequest(
return replicate->mutable_transaction_state();
}

Status UpdateTxnOperation::Prepare() {
Status UpdateTxnOperation::Prepare(IsLeaderSide is_leader_side) {
VLOG_WITH_PREFIX(2) << "Prepare";
return Status::OK();
}
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/update_txn_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class UpdateTxnOperation

private:
TransactionCoordinator& transaction_coordinator() const;
Status Prepare() override;
Status Prepare(IsLeaderSide is_leader_side) override;
Status DoReplicated(int64_t leader_term, Status* complete_status) override;
Status DoAborted(const Status& status) override;
};
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/write_operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ LWWritePB* RequestTraits<LWWritePB>::MutableRequest(consensus::LWReplicateMsg* r
return replicate->mutable_write();
}

Status WriteOperation::Prepare() {
Status WriteOperation::Prepare(IsLeaderSide is_leader_side) {
TRACE_EVENT0("txn", "WriteOperation::Prepare");
return Status::OK();
}
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/operations/write_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class WriteOperation : public OperationBase<OperationType::kWrite, LWWritePB> {
//
// Decodes the operations in the request PB and acquires row locks for each of the
// affected rows.
Status Prepare() override;
Status Prepare(IsLeaderSide is_leader_side) override;

// Executes an Apply for a write transaction.
//
Expand Down
3 changes: 2 additions & 1 deletion src/yb/tablet/tablet-test-util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ void YBTabletTest::AlterSchema(const Schema& schema) {
req.set_schema_version(tablet()->metadata()->schema_version() + 1);

ChangeMetadataOperation operation(nullptr, nullptr, &req);
ASSERT_OK(tablet()->CreatePreparedChangeMetadata(&operation, &schema));
ASSERT_OK(tablet()->CreatePreparedChangeMetadata(
&operation, &schema, IsLeaderSide::kTrue));
ASSERT_OK(tablet()->AlterSchema(&operation));
operation.Release();
}
Expand Down
22 changes: 13 additions & 9 deletions src/yb/tablet/tablet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1930,16 +1930,20 @@ Result<std::unique_ptr<docdb::YQLRowwiseIteratorIf>> Tablet::CreateCDCSnapshotIt
}

Status Tablet::CreatePreparedChangeMetadata(
ChangeMetadataOperation *operation, const Schema* schema) {
ChangeMetadataOperation *operation, const Schema* schema, IsLeaderSide is_leader_side) {
if (schema) {
auto key_schema = GetKeySchema(
operation->has_table_id() ? operation->table_id().ToBuffer() : "");
if (!key_schema.KeyEquals(*schema)) {
return STATUS_FORMAT(
InvalidArgument,
"Schema keys cannot be altered. New schema key: $0. Existing schema key: $1",
schema->CreateKeyProjection(),
key_schema);
// On follower, the previous op for adding table may not finish applying.
// GetKeySchema might fail in this case.
if (is_leader_side) {
auto key_schema = GetKeySchema(
operation->has_table_id() ? operation->table_id().ToBuffer() : "");
if (!key_schema.KeyEquals(*schema)) {
return STATUS_FORMAT(
InvalidArgument,
"Schema keys cannot be altered. New schema key: $0. Existing schema key: $1",
schema->CreateKeyProjection(),
key_schema);
}
}

if (!schema->has_column_ids()) {
Expand Down
4 changes: 3 additions & 1 deletion src/yb/tablet/tablet.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "yb/tablet/tablet_fwd.h"
#include "yb/tablet/abstract_tablet.h"
#include "yb/tablet/mvcc.h"
#include "yb/tablet/operations/operation.h"
#include "yb/tablet/operation_filter.h"
#include "yb/tablet/tablet_options.h"
#include "yb/tablet/transaction_intent_applier.h"
Expand Down Expand Up @@ -420,7 +421,8 @@ class Tablet : public AbstractTablet, public TransactionIntentApplier {
// key mismatch, or missing IDs)
Status CreatePreparedChangeMetadata(
ChangeMetadataOperation* operation,
const Schema* schema);
const Schema* schema,
IsLeaderSide is_leader_side);

// Apply the Schema of the specified operation.
Status AlterSchema(ChangeMetadataOperation* operation);
Expand Down
2 changes: 1 addition & 1 deletion src/yb/tablet/tablet_bootstrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1493,7 +1493,7 @@ class TabletBootstrap {
}

RETURN_NOT_OK(tablet_->CreatePreparedChangeMetadata(
&operation, request->has_schema() ? &schema : nullptr));
&operation, request->has_schema() ? &schema : nullptr, IsLeaderSide::kTrue));

if (request->has_schema()) {
// Apply the alter schema to the tablet.
Expand Down
31 changes: 31 additions & 0 deletions src/yb/yql/pgwrapper/pg_libpq-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1588,6 +1588,37 @@ class PgLibPqTablegroupTest : public PgLibPqTest {
}
};

TEST_F_EX(PgLibPqTest, YB_DISABLE_TEST_IN_TSAN(CreateTablesToTablegroup),
PgLibPqTablegroupTest) {
auto client = ASSERT_RESULT(cluster_->CreateClient());
const string kDatabaseName = "test_db";
const string kTablegroupName = "tg1";

// Let ts-1 be the leader of tablet.
ASSERT_OK(cluster_->SetFlagOnMasters("use_create_table_leader_hint", "false"));
ASSERT_OK(cluster_->SetFlag(
cluster_->tablet_server(1), "TEST_skip_election_when_fail_detected", "true"));
ASSERT_OK(cluster_->SetFlag(
cluster_->tablet_server(2), "TEST_skip_election_when_fail_detected", "true"));

// Make one follower ignore applying change metadata operations.
ASSERT_OK(cluster_->SetFlag(
cluster_->tablet_server(1), "TEST_ignore_apply_change_metadata_on_followers", "true"));

auto conn = ASSERT_RESULT(Connect());
CreateDatabaseWithTablegroup(kDatabaseName, kTablegroupName, &conn);

ASSERT_OK(conn.ExecuteFormat(
"CREATE TABLE test_tbl ("
"h INT PRIMARY KEY,"
"a INT,"
"b FLOAT CONSTRAINT test_tbl_uniq UNIQUE WITH (colocation_id=654321)"
") WITH (colocation_id=123456) TABLEGROUP $0",
kTablegroupName));

cluster_->AssertNoCrashes();
}

TEST_F_EX(PgLibPqTest, YB_DISABLE_TEST_IN_TSAN(ColocatedTablegroups),
PgLibPqTablegroupTest) {
auto client = ASSERT_RESULT(cluster_->CreateClient());
Expand Down

0 comments on commit a5455f3

Please sign in to comment.