Skip to content

Commit

Permalink
[#17134] ysql: CREATE INDEX add timeout on wait for backends
Browse files Browse the repository at this point in the history
Summary:
As of commit a1729c3 titled

    [#7376] ysql: fix state waits for online CREATE INDEX

CREATE INDEX may wait indefinitely on backends to catch up.  It is easy
to not know what is taking the CREATE INDEX so long.

In a situation where the CREATE INDEX DDL contends with DMLs, some
options are

A. have CREATE INDEX wait indefinitely until the old DMLs finish
B. have CREATE INDEX die after timeout
C. have CREATE INDEX wound the old DMLs

Change the default behavior from A to B, where timeout is five minutes
(hopefully, that is enough time for transactions to finish). Let the
timeout be configurable using per-session GUC variable
yb_wait_for_backends_catalog_version_timeout and corresponding PG flag
ysql_yb_wait_for_backends_catalog_version_timeout.  To get the old A
behavior, set the value to zero or negative, which is interpreted as no
timeout.

The benefit of timing out the CREATE INDEX is that it gives a chance to
return an error message with details about what is going on.

Close: #17134
Jira Issue(s): DB-6416

Test Plan:
    ./yb_build.sh fastdebug --gcc11 \
      --cxx-test pgwrapper_pg_backends-test \
      --gtest_filter PgBackendsTest.PgTimeout

Reviewers: hbhanawat, sshaikh, myang

Reviewed By: myang

Subscribers: yql

Differential Revision: https://phabricator.dev.yugabyte.com/D24932
  • Loading branch information
jaki committed May 3, 2023
1 parent b9e0efa commit bedcbe3
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 0 deletions.
22 changes: 22 additions & 0 deletions src/postgres/src/backend/commands/indexcmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include "postgres.h"

#include <inttypes.h>

#include "access/amapi.h"
#include "access/htup_details.h"
#include "access/reloptions.h"
Expand Down Expand Up @@ -3063,8 +3065,28 @@ YbWaitForBackendsCatalogVersion()

int num_lagging_backends = -1;
int retries_left = 10;
const TimestampTz start = GetCurrentTimestamp();
while (num_lagging_backends != 0)
{
if (yb_wait_for_backends_catalog_version_timeout > 0 &&
TimestampDifferenceExceeds(start,
GetCurrentTimestamp(),
yb_wait_for_backends_catalog_version_timeout))
ereport(ERROR,
(errmsg("timed out waiting for postgres backends to catch"
" up"),
errdetail("%d backends on database %u are still behind"
" catalog version %" PRIu64 ".",
num_lagging_backends,
MyDatabaseId,
YbGetCatalogCacheVersion()),
errhint("Run the following query on all tservers to find"
" the lagging backends: SELECT * FROM"
" pg_stat_activity WHERE catalog_version < %"
PRIu64 " AND datid = %u;",
YbGetCatalogCacheVersion(),
MyDatabaseId)));

YBCStatus s = YBCPgWaitForBackendsCatalogVersion(MyDatabaseId,
YbGetCatalogCacheVersion(),
&num_lagging_backends);
Expand Down
16 changes: 16 additions & 0 deletions src/postgres/src/backend/utils/misc/guc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3549,6 +3549,22 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},

{
{"yb_wait_for_backends_catalog_version_timeout", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Timeout in milliseconds to wait for backends to reach"
" desired catalog versions."),
gettext_noop("The actual time spent may be longer than that by as"
" much as master flag"
" wait_for_ysql_backends_catalog_version_client_master_rpc_timeout_ms."
" Setting to zero or less results in no timeout."
" Currently used by concurrent CREATE INDEX."),
GUC_UNIT_MS
},
&yb_wait_for_backends_catalog_version_timeout,
5 * 60 * 1000, 0, INT_MAX,
NULL, NULL, NULL
},

{
{"yb_test_planner_custom_plan_threshold", PGC_USERSET, QUERY_TUNING,
gettext_noop("The number of times to force custom plan generation "
Expand Down
1 change: 1 addition & 0 deletions src/postgres/src/backend/utils/misc/pg_yb_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,7 @@ bool yb_make_next_ddl_statement_nonbreaking = false;
bool yb_plpgsql_disable_prefetch_in_for_query = false;
bool yb_enable_sequence_pushdown = true;
bool yb_disable_wait_for_backends_catalog_version = false;
int yb_wait_for_backends_catalog_version_timeout = 5 * 60 * 1000; /* 5 min */

//------------------------------------------------------------------------------
// YB Debug utils.
Expand Down
5 changes: 5 additions & 0 deletions src/postgres/src/include/pg_yb_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,11 @@ extern bool yb_enable_sequence_pushdown;
*/
extern bool yb_disable_wait_for_backends_catalog_version;

/*
* Total timeout for waiting for backends to have up-to-date catalog version.
*/
extern int yb_wait_for_backends_catalog_version_timeout;

//------------------------------------------------------------------------------
// GUC variables needed by YB via their YB pointers.
extern int StatementTimeout;
Expand Down
49 changes: 49 additions & 0 deletions src/yb/yql/pgwrapper/pg_backends-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,55 @@ TEST_F_EX(PgBackendsTest, YB_DISABLE_TEST_IN_TSAN(ConnectionLimit), PgBackendsTe
ASSERT_EQ(0, num_backends);
}

class PgBackendsTestPgTimeout : public PgBackendsTest {
public:
void UpdateMiniClusterOptions(ExternalMiniClusterOptions* options) override {
PgBackendsTest::UpdateMiniClusterOptions(options);
options->extra_tserver_flags.insert(
options->extra_tserver_flags.end(),
{
Format("--wait_for_ysql_backends_catalog_version_client_master_rpc_timeout_ms=$0",
kRpcTimeout.ToMilliseconds()),
Format("--ysql_yb_wait_for_backends_catalog_version_timeout=$0",
kTimeout.ToMilliseconds()),
});
}

protected:
const MonoDelta kRpcTimeout = 1s;
const MonoDelta kTimeout = 3s;
};

// Test ysql_yb_wait_for_backends_catalog_version_timeout.
TEST_F_EX(PgBackendsTest,
YB_DISABLE_TEST_IN_TSAN(PgTimeout),
PgBackendsTestPgTimeout) {
LOG(INFO) << "Start connection that will be behind";
PGConn conn_begin = ASSERT_RESULT(Connect());
ASSERT_OK(conn_begin.Execute("BEGIN"));

LOG(INFO) << "Start create index connection";
PGConn conn_index = ASSERT_RESULT(Connect());
ASSERT_OK(conn_index.Execute("CREATE TABLE t (i int)"));

LOG(INFO) << "Do create index";
const auto start = MonoTime::Now();
Status s = conn_index.Execute("CREATE INDEX ON t (i)");
const auto end = MonoTime::Now();
ASSERT_NOK(s);
ASSERT_TRUE(s.IsNetworkError()) << s;
LOG(INFO) << "Error message: " << s.message().ToBuffer();
ASSERT_STR_CONTAINS(s.message().ToBuffer(), "timed out waiting for postgres backends");

const auto time_spent = end - start;
LOG(INFO) << "Time spent: " << time_spent;
// Add margin to cover time spent in setup (and teardown), such as creating the docdb index. The
// timeout timer starts on the first wait-for-backends call, which is after committing the initial
// state.
constexpr auto kMargin = 5s;
ASSERT_LT(time_spent, kTimeout + kRpcTimeout + kMargin);
}

class PgBackendsTestRf3 : public PgBackendsTest {
public:
int GetNumMasters() const override {
Expand Down
6 changes: 6 additions & 0 deletions src/yb/yql/pgwrapper/pg_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_bypass_cond_recheck, kLocalVolatile, false,
DEFINE_RUNTIME_PG_FLAG(int32, yb_index_state_flags_update_delay, 0,
"Delay in milliseconds between stages of online index build. For testing purposes.");

DEFINE_RUNTIME_PG_FLAG(int32, yb_wait_for_backends_catalog_version_timeout, 5 * 60 * 1000, // 5 min
"Timeout in milliseconds to wait for backends to reach desired catalog versions. The actual"
" time spent may be longer than that by as much as master flag"
" wait_for_ysql_backends_catalog_version_client_master_rpc_timeout_ms. Setting to zero or less"
" results in no timeout. Currently used by concurrent CREATE INDEX.");

DEFINE_RUNTIME_PG_FLAG(int32, yb_bnl_batch_size, 1,
"Batch size of nested loop joins.");

Expand Down

0 comments on commit bedcbe3

Please sign in to comment.