From c0cc94f8e195b5bc5b55b37d7312ed932f338b63 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Fri, 15 Nov 2024 17:37:01 +0800 Subject: [PATCH 1/3] Increase the max wait time to 5 seconds --- .../Interpreters/InterpreterCreateQuery.cpp | 148 ++++++++++-------- .../src/Interpreters/InterpreterCreateQuery.h | 3 +- dbms/src/TiDB/Schema/SchemaBuilder.cpp | 15 +- 3 files changed, 102 insertions(+), 64 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 0d375b87ebf..2647c58ef68 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -41,9 +41,11 @@ #include #include #include +#include #include #include +#include namespace DB @@ -67,9 +69,13 @@ extern const char exception_between_create_database_meta_and_directory[]; } -InterpreterCreateQuery::InterpreterCreateQuery(const ASTPtr & query_ptr_, Context & context_) +InterpreterCreateQuery::InterpreterCreateQuery( + const ASTPtr & query_ptr_, + Context & context_, + std::string_view log_suffix_) : query_ptr(query_ptr_) , context(context_) + , log_suffix(log_suffix_) {} @@ -447,7 +453,6 @@ ColumnsDescription InterpreterCreateQuery::setColumns( return res; } - void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const { if (create.storage) @@ -488,6 +493,77 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const } +/** If the table already exists, and the request specifies IF NOT EXISTS, + * then we allow concurrent CREATE queries (which do nothing). + * Otherwise, concurrent queries for creating a table, if the table does not exist, + * can throw an exception, even if IF NOT EXISTS is specified. + */ +std::unique_ptr tryGetDDLGuard( + Context & context, + const String & database_name, + const String & table_name, + bool create_if_not_exists, + size_t timeout_seconds, + std::string_view log_suffix) +{ + constexpr int wait_useconds = 50'000; + const size_t max_retries = timeout_seconds * 1'000'000 / wait_useconds; + try + { + auto guard = context.getDDLGuardIfTableDoesntExist( + database_name, + table_name, + "Table " + database_name + "." + table_name + " is creating or attaching right now"); + + if (!guard) + { + if (create_if_not_exists) + return {}; + else + throw Exception( + "Table " + database_name + "." + table_name + " already exists.", + ErrorCodes::TABLE_ALREADY_EXISTS); + } + } + catch (Exception & e) + { + // Concurrent queries for creating the same table may run into this branch. + // We have to wait for the table created completely, then return to use the table. + // Thus, we choose to do a retry here to wait the table created completed. + if (e.code() == ErrorCodes::TABLE_ALREADY_EXISTS || e.code() == ErrorCodes::DDL_GUARD_IS_ACTIVE) + { + auto log = Logger::get(log_suffix); + LOG_WARNING(log, "createTable failed, error_code={} error_msg={}", e.code(), e.message()); + for (size_t i = 0; i < max_retries; i++) + { + // Once we can get the table from `context`, consider the table create has been "completed" + // and return a null guard + if (context.isTableExist(database_name, table_name)) + return {}; + + // sleep a while and retry + LOG_ERROR( + log, + "createTable failed but table not exist now, we will sleep for {} ms and try again", + wait_useconds / 1000); + usleep(wait_useconds); + } + LOG_ERROR( + log, + "still failed to createTable in InterpreterCreateQuery for retry {} times, stack_info={}", + max_retries, + e.getStackTrace().toString()); + e.rethrow(); + } + else + { + e.addMessage(std::string(log_suffix)); + e.rethrow(); + } + } + return {}; // not reachable +} + BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { String path = context.getPath(); @@ -534,8 +610,6 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) /// Set the table engine if it was not specified explicitly. setEngine(create); - StoragePtr res; - { std::unique_ptr guard; @@ -552,67 +626,19 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) * Otherwise, concurrent queries for creating a table, if the table does not exist, * can throw an exception, even if IF NOT EXISTS is specified. */ - try - { - guard = context.getDDLGuardIfTableDoesntExist( - database_name, - table_name, - "Table " + database_name + "." + table_name + " is creating or attaching right now"); - - if (!guard) - { - if (create.if_not_exists) - return {}; - else - throw Exception( - "Table " + database_name + "." + table_name + " already exists.", - ErrorCodes::TABLE_ALREADY_EXISTS); - } - } - catch (Exception & e) - { - // Due to even if it throws this two error code, it can't ensure the table is completely created - // So we have to wait for the table created completely, then return to use the table. - // Thus, we choose to do a retry here to wait the table created completed. - if (e.code() == ErrorCodes::TABLE_ALREADY_EXISTS || e.code() == ErrorCodes::DDL_GUARD_IS_ACTIVE) - { - auto log = Logger::get(fmt::format("InterpreterCreateQuery {} {}", database_name, table_name)); - LOG_WARNING( - log, - "createTable failed with error code is {}, error info is {}, stack_info is {}", - e.code(), - e.displayText(), - e.getStackTrace().toString()); - const size_t max_retry = 50; - const int wait_useconds = 20000; - for (size_t i = 0; i < max_retry; i++) // retry - { - if (context.isTableExist(database_name, table_name)) - return {}; - - // sleep a while and retry - LOG_ERROR( - log, - "createTable failed but table not exist now, \nWe will sleep for {} ms and try again.", - wait_useconds / 1000); - usleep(wait_useconds); // sleep 20ms - } - LOG_ERROR( - log, - "still failed to createTable in InterpreterCreateQuery for retry {} times", - max_retry); - e.rethrow(); - } - else - { - e.rethrow(); - } - } + guard = tryGetDDLGuard( + context, + database_name, + table_name, + create.if_not_exists, + /*timeout_seconds=*/5, + log_suffix); } else if (context.tryGetExternalTable(table_name) && create.if_not_exists) return {}; - res = StorageFactory::instance().get( + // Guard is acquired, let's create the IStorage instance + StoragePtr res = StorageFactory::instance().get( create, data_path, table_name, diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index a63a52bfffd..5114155a881 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -34,7 +34,7 @@ using StoragePtr = std::shared_ptr; class InterpreterCreateQuery : public IInterpreter { public: - InterpreterCreateQuery(const ASTPtr & query_ptr_, Context & context_); + InterpreterCreateQuery(const ASTPtr & query_ptr_, Context & context_, std::string_view log_suffix_=""); BlockIO execute() override; @@ -68,6 +68,7 @@ class InterpreterCreateQuery : public IInterpreter ASTPtr query_ptr; Context & context; + std::string_view log_suffix; /// Using while loading database. ThreadPool * thread_pool = nullptr; diff --git a/dbms/src/TiDB/Schema/SchemaBuilder.cpp b/dbms/src/TiDB/Schema/SchemaBuilder.cpp index 42c015d9e52..e273074c9af 100644 --- a/dbms/src/TiDB/Schema/SchemaBuilder.cpp +++ b/dbms/src/TiDB/Schema/SchemaBuilder.cpp @@ -939,7 +939,10 @@ void SchemaBuilder::applyCreateDatabaseByInfo(const TiDB::DB ASTPtr ast = parseCreateStatement(statement); - InterpreterCreateQuery interpreter(ast, context); + InterpreterCreateQuery interpreter( + ast, + context, + fmt::format("keyspace={} database_id={}", keyspace_id, db_info->id)); interpreter.setInternal(true); interpreter.setForceRestoreData(false); interpreter.execute(); @@ -1186,7 +1189,15 @@ void SchemaBuilder::applyCreateStorageInstance( ast_create_query->if_not_exists = true; ast_create_query->database = database_mapped_name; - InterpreterCreateQuery interpreter(ast, context); + InterpreterCreateQuery interpreter( + ast, + context, + fmt::format( + "keyspace={} database_id={} table_id={} action={}", + keyspace_id, + database_id, + table_info->id, + action)); interpreter.setInternal(true); interpreter.setForceRestoreData(false); interpreter.execute(); From c31418ffbe7c1c4367db73dd2bcf97abafe7c3bd Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Fri, 15 Nov 2024 18:24:45 +0800 Subject: [PATCH 2/3] fix --- .../Interpreters/InterpreterCreateQuery.cpp | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 2647c58ef68..87bec3e6d3d 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -493,10 +493,17 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const } -/** If the table already exists, and the request specifies IF NOT EXISTS, - * then we allow concurrent CREATE queries (which do nothing). - * Otherwise, concurrent queries for creating a table, if the table does not exist, - * can throw an exception, even if IF NOT EXISTS is specified. +/** + * Try to acquire a DDLGuard to execute the "CREATE TABLE" actions. + * + * Return the gurad if this thread become the owner to execute "CREATE TABLE". + * If the thread does not is the owner to execute "CREATE TABLE". + * - If the table already exists, and the request specifies IF NOT EXISTS, + * then we allow concurrent CREATE queries (which do nothing). + * - Otherwise, concurrent queries for creating a table, if the table does not exist, + * wait for `timeout_seconds` at max to check whether the table creation is completly + * created. If the table has been created within timeout, then do nothing and return. + * If timeout happen at last, throw an exception. */ std::unique_ptr tryGetDDLGuard( Context & context, @@ -518,7 +525,7 @@ std::unique_ptr tryGetDDLGuard( if (!guard) { if (create_if_not_exists) - return {}; + return {}; // return a null guard else throw Exception( "Table " + database_name + "." + table_name + " already exists.", @@ -533,8 +540,8 @@ std::unique_ptr tryGetDDLGuard( if (e.code() == ErrorCodes::TABLE_ALREADY_EXISTS || e.code() == ErrorCodes::DDL_GUARD_IS_ACTIVE) { auto log = Logger::get(log_suffix); - LOG_WARNING(log, "createTable failed, error_code={} error_msg={}", e.code(), e.message()); - for (size_t i = 0; i < max_retries; i++) + LOG_WARNING(log, "Concurrent create table happens, error_code={} error_msg={}", e.code(), e.message()); + for (size_t i = 0; i < max_retries; ++i) { // Once we can get the table from `context`, consider the table create has been "completed" // and return a null guard @@ -542,15 +549,19 @@ std::unique_ptr tryGetDDLGuard( return {}; // sleep a while and retry - LOG_ERROR( + LOG_WARNING( log, - "createTable failed but table not exist now, we will sleep for {} ms and try again", + "Waiting for the completion of concurrent table creation action" + ", sleep for {} ms and try again", wait_useconds / 1000); usleep(wait_useconds); } + + // timeout, throw an exception LOG_ERROR( log, - "still failed to createTable in InterpreterCreateQuery for retry {} times, stack_info={}", + "still failed to wait for the completion of concurrent table creation in InterpreterCreateQuery, " + "max_retries={} stack_info={}", max_retries, e.getStackTrace().toString()); e.rethrow(); @@ -621,11 +632,6 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) database = context.getDatabase(database_name); data_path = database->getDataPath(); - /** If the table already exists, and the request specifies IF NOT EXISTS, - * then we allow concurrent CREATE queries (which do nothing). - * Otherwise, concurrent queries for creating a table, if the table does not exist, - * can throw an exception, even if IF NOT EXISTS is specified. - */ guard = tryGetDDLGuard( context, database_name, @@ -633,6 +639,12 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) create.if_not_exists, /*timeout_seconds=*/5, log_suffix); + if (!guard) + { + // Not the owner to create IStorage instance, and the table is created + // completely, let's return + return {}; + } } else if (context.tryGetExternalTable(table_name) && create.if_not_exists) return {}; @@ -668,6 +680,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!create.is_temporary) database->attachTable(table_name, res); + + // the table has been created completely } /// If the query is a CREATE SELECT, insert the data into the table. From b354a6d46c2e3b2899d9ebd3921ca86686f5f720 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Mon, 18 Nov 2024 12:06:36 +0800 Subject: [PATCH 3/3] Fix bug --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 1 + dbms/src/Interpreters/InterpreterCreateQuery.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 87bec3e6d3d..ec79af18604 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -531,6 +531,7 @@ std::unique_ptr tryGetDDLGuard( "Table " + database_name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } + return guard; } catch (Exception & e) { diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.h b/dbms/src/Interpreters/InterpreterCreateQuery.h index 5114155a881..d15e5887519 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.h +++ b/dbms/src/Interpreters/InterpreterCreateQuery.h @@ -34,7 +34,7 @@ using StoragePtr = std::shared_ptr; class InterpreterCreateQuery : public IInterpreter { public: - InterpreterCreateQuery(const ASTPtr & query_ptr_, Context & context_, std::string_view log_suffix_=""); + InterpreterCreateQuery(const ASTPtr & query_ptr_, Context & context_, std::string_view log_suffix_ = ""); BlockIO execute() override;