From d76d62827def95779ed474ac58b76014aeb2b356 Mon Sep 17 00:00:00 2001 From: Manuel Ung Date: Mon, 5 Feb 2018 15:05:13 -0800 Subject: [PATCH] Add rocksdb transaction options for write policy and commit time writebatch Summary: This adds two sysvars for MyRocks that controls rocksdb transactions: TransactionDBOptions::write_policy - This is a read only variable that determines when data is written into the database. The default is COMMITTED which means data is written at commit time. TransactionOptions::use_only_the_last_commit_time_batch_for_recovery - This is a dynamic variable that determines whether the commit time write batch is written into the database or not. If the commit time write batch is only useful for recovery, writting to WAL is enough. Differential Revision: D6905729 (https://github.com/facebook/mysql-5.6/commit/ad6a74de3dbab9db2a644debd8852b24900ae6c3) fbshipit-source-id: ac183b6378d --- mysql-test/suite/rocksdb/r/rocksdb.result | 2 + ...ommit_time_batch_for_recovery_basic.result | 121 ++++++++++++++++++ .../r/rocksdb_write_policy_basic.result | 15 +++ ..._commit_time_batch_for_recovery_basic.test | 20 +++ .../t/rocksdb_write_policy_basic.test | 17 +++ storage/rocksdb/ha_rocksdb.cc | 22 ++++ 6 files changed, 197 insertions(+) create mode 100644 mysql-test/suite/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result create mode 100644 mysql-test/suite/rocksdb_sys_vars/r/rocksdb_write_policy_basic.result create mode 100644 mysql-test/suite/rocksdb_sys_vars/t/rocksdb_commit_time_batch_for_recovery_basic.test create mode 100644 mysql-test/suite/rocksdb_sys_vars/t/rocksdb_write_policy_basic.test diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result index 7fee90a1c851..40cab6e76730 100644 --- a/mysql-test/suite/rocksdb/r/rocksdb.result +++ b/mysql-test/suite/rocksdb/r/rocksdb.result @@ -873,6 +873,7 @@ rocksdb_cache_index_and_filter_blocks ON rocksdb_checksums_pct 100 rocksdb_collect_sst_properties ON rocksdb_commit_in_the_middle OFF +rocksdb_commit_time_batch_for_recovery OFF rocksdb_compact_cf rocksdb_compaction_readahead_size 0 rocksdb_compaction_sequential_deletes 0 @@ -979,6 +980,7 @@ rocksdb_whole_key_filtering ON rocksdb_write_batch_max_bytes 0 rocksdb_write_disable_wal OFF rocksdb_write_ignore_missing_column_families OFF +rocksdb_write_policy 0 create table t47 (pk int primary key, col1 varchar(12)) engine=rocksdb; insert into t47 values (1, 'row1'); insert into t47 values (2, 'row2'); diff --git a/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result new file mode 100644 index 000000000000..1d409bbedb43 --- /dev/null +++ b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result @@ -0,0 +1,121 @@ +CREATE TABLE valid_values (value varchar(255)); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +INSERT INTO valid_values VALUES('off'); +CREATE TABLE invalid_values (value varchar(255)); +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +SET @start_global_value = @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +SELECT @start_global_value; +@start_global_value +0 +SET @start_session_value = @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +SELECT @start_session_value; +@start_session_value +0 +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 1" +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 1; +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 0" +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 0; +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to on" +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = on; +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to off" +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = off; +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +'# Setting to valid values in session scope#' +"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 1" +SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 1; +SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +1 +"Setting the session scope variable back to default" +SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; +SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 0" +SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 0; +SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Setting the session scope variable back to default" +SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; +SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to on" +SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = on; +SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +1 +"Setting the session scope variable back to default" +SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; +SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to off" +SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = off; +SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Setting the session scope variable back to default" +SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT; +SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 'aaa'" +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 'aaa'; +Got one of the listed errors +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 'bbb'" +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 'bbb'; +Got one of the listed errors +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = @start_global_value; +SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = @start_session_value; +SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY; +@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +0 +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_write_policy_basic.result b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_write_policy_basic.result new file mode 100644 index 000000000000..1399ed2ee920 --- /dev/null +++ b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_write_policy_basic.result @@ -0,0 +1,15 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(2); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +SET @start_global_value = @@global.ROCKSDB_WRITE_POLICY; +SELECT @start_global_value; +@start_global_value +0 +"Trying to set variable @@global.ROCKSDB_WRITE_POLICY to 444. It should fail because it is readonly." +SET @@global.ROCKSDB_WRITE_POLICY = 444; +ERROR HY000: Variable 'rocksdb_write_policy' is a read only variable +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_commit_time_batch_for_recovery_basic.test b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_commit_time_batch_for_recovery_basic.test new file mode 100644 index 000000000000..0d36e0a173da --- /dev/null +++ b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_commit_time_batch_for_recovery_basic.test @@ -0,0 +1,20 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +INSERT INTO valid_values VALUES('off'); + +CREATE TABLE invalid_values (value varchar(255)); +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); + +--let $sys_var=ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY +--let $read_only=0 +--let $session=1 +--let $sticky=1 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_write_policy_basic.test b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_write_policy_basic.test new file mode 100644 index 000000000000..cb2def2a0f03 --- /dev/null +++ b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_write_policy_basic.test @@ -0,0 +1,17 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(2); +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); + +--let $sys_var=ROCKSDB_WRITE_POLICY +--let $read_only=1 +--let $session=0 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 56d5156918de..14ee3c662c98 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -490,6 +490,8 @@ static my_bool rocksdb_enable_bulk_load_api = 1; static my_bool rocksdb_print_snapshot_conflict_queries = 0; static my_bool rocksdb_large_prefix = 0; static my_bool rocksdb_allow_to_start_after_corruption = 0; +static uint32_t rocksdb_write_policy = + rocksdb::TxnDBWritePolicy::WRITE_COMMITTED; std::atomic rocksdb_row_lock_deadlocks(0); std::atomic rocksdb_row_lock_wait_timeouts(0); @@ -649,6 +651,11 @@ static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG, /*min*/ 2, /*max*/ ULONG_MAX, 0); +static MYSQL_THDVAR_BOOL( + commit_time_batch_for_recovery, PLUGIN_VAR_RQCMDARG, + "TransactionOptions::commit_time_batch_for_recovery for RocksDB", nullptr, + nullptr, FALSE); + static MYSQL_THDVAR_BOOL( trace_sst_api, PLUGIN_VAR_RQCMDARG, "Generate trace output in the log for each call to the SstFileWriter", @@ -778,6 +785,13 @@ static MYSQL_SYSVAR_BOOL( "DBOptions::manual_wal_flush for RocksDB", nullptr, nullptr, rocksdb_db_options->manual_wal_flush); +static MYSQL_SYSVAR_UINT(write_policy, rocksdb_write_policy, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::write_policy for RocksDB", nullptr, + nullptr, rocksdb::TxnDBWritePolicy::WRITE_COMMITTED, + rocksdb::TxnDBWritePolicy::WRITE_COMMITTED, + rocksdb::TxnDBWritePolicy::WRITE_UNPREPARED, 0); + static MYSQL_SYSVAR_BOOL( create_missing_column_families, *reinterpret_cast( @@ -1516,6 +1530,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(lock_wait_timeout), MYSQL_SYSVAR(deadlock_detect), MYSQL_SYSVAR(deadlock_detect_depth), + MYSQL_SYSVAR(commit_time_batch_for_recovery), MYSQL_SYSVAR(max_row_locks), MYSQL_SYSVAR(write_batch_max_bytes), MYSQL_SYSVAR(lock_scanned_rows), @@ -1537,6 +1552,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = { MYSQL_SYSVAR(create_if_missing), MYSQL_SYSVAR(two_write_queues), MYSQL_SYSVAR(manual_wal_flush), + MYSQL_SYSVAR(write_policy), MYSQL_SYSVAR(create_missing_column_families), MYSQL_SYSVAR(error_if_exists), MYSQL_SYSVAR(paranoid_checks), @@ -2591,6 +2607,10 @@ class Rdb_transaction_impl : public Rdb_transaction { tx_opts.lock_timeout = rdb_convert_sec_to_ms(m_timeout_sec); tx_opts.deadlock_detect = THDVAR(m_thd, deadlock_detect); tx_opts.deadlock_detect_depth = THDVAR(m_thd, deadlock_detect_depth); + // If this variable is set, this will write commit time write batch + // information on recovery or memtable flush. + tx_opts.use_only_the_last_commit_time_batch_for_recovery = + THDVAR(m_thd, commit_time_batch_for_recovery); tx_opts.max_write_batch_size = THDVAR(m_thd, write_batch_max_bytes); write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); @@ -4310,6 +4330,8 @@ static int rocksdb_init_func(void *const p) { rocksdb::TransactionDBOptions tx_db_options; tx_db_options.transaction_lock_timeout = 2; // 2 seconds tx_db_options.custom_mutex_factory = std::make_shared(); + tx_db_options.write_policy = + static_cast(rocksdb_write_policy); status = check_rocksdb_options_compatibility(rocksdb_datadir, main_opts, cf_descr);