Skip to content

Commit

Permalink
Add rocksdb transaction options for write policy and commit time writ…
Browse files Browse the repository at this point in the history
…ebatch

Summary:
This adds two sysvars for MyRocks that controls rocksdb transactions:

TransactionDBOptions::write_policy - This is a read only variable that determines when data is written into the database. The default is COMMITTED which means data is written at commit time.
TransactionOptions::use_only_the_last_commit_time_batch_for_recovery - This is a dynamic variable that determines whether the commit time write batch is written into the database or not. If the commit time write batch is only useful for recovery, writting to WAL is enough.

Differential Revision: D6905729
  • Loading branch information
lth authored and inikep committed Jun 13, 2023
1 parent 17a5f54 commit ba3c13d
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 0 deletions.
2 changes: 2 additions & 0 deletions mysql-test/suite/rocksdb/r/rocksdb.result
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,7 @@ rocksdb_cache_index_and_filter_blocks ON
rocksdb_checksums_pct 100
rocksdb_collect_sst_properties ON
rocksdb_commit_in_the_middle OFF
rocksdb_commit_time_batch_for_recovery OFF
rocksdb_compact_cf
rocksdb_compaction_readahead_size 0
rocksdb_compaction_sequential_deletes 0
Expand Down Expand Up @@ -979,6 +980,7 @@ rocksdb_whole_key_filtering ON
rocksdb_write_batch_max_bytes 0
rocksdb_write_disable_wal OFF
rocksdb_write_ignore_missing_column_families OFF
rocksdb_write_policy 0
create table t47 (pk int primary key, col1 varchar(12)) engine=rocksdb;
insert into t47 values (1, 'row1');
insert into t47 values (2, 'row2');
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
CREATE TABLE valid_values (value varchar(255));
INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(0);
INSERT INTO valid_values VALUES('on');
INSERT INTO valid_values VALUES('off');
CREATE TABLE invalid_values (value varchar(255));
INSERT INTO invalid_values VALUES('\'aaa\'');
INSERT INTO invalid_values VALUES('\'bbb\'');
SET @start_global_value = @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SELECT @start_global_value;
@start_global_value
0
SET @start_session_value = @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
SELECT @start_session_value;
@start_session_value
0
'# Setting to valid values in global scope#'
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 1"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 1;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
1
"Setting the global scope variable back to default"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 0"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 0;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Setting the global scope variable back to default"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to on"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = on;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
1
"Setting the global scope variable back to default"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to off"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = off;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Setting the global scope variable back to default"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
'# Setting to valid values in session scope#'
"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 1"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 1;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
1
"Setting the session scope variable back to default"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 0"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 0;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Setting the session scope variable back to default"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to on"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = on;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
1
"Setting the session scope variable back to default"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Trying to set variable @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to off"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = off;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Setting the session scope variable back to default"
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = DEFAULT;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
'# Testing with invalid values in global scope #'
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 'aaa'"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 'aaa';
Got one of the listed errors
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
"Trying to set variable @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY to 'bbb'"
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = 'bbb';
Got one of the listed errors
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
SET @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = @start_global_value;
SELECT @@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@global.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
SET @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY = @start_session_value;
SELECT @@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY;
@@session.ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
0
DROP TABLE valid_values;
DROP TABLE invalid_values;
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO valid_values VALUES(2);
INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(0);
CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');
SET @start_global_value = @@global.ROCKSDB_WRITE_POLICY;
SELECT @start_global_value;
@start_global_value
0
"Trying to set variable @@global.ROCKSDB_WRITE_POLICY to 444. It should fail because it is readonly."
SET @@global.ROCKSDB_WRITE_POLICY = 444;
ERROR HY000: Variable 'rocksdb_write_policy' is a read only variable
DROP TABLE valid_values;
DROP TABLE invalid_values;
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
--source include/have_rocksdb.inc

CREATE TABLE valid_values (value varchar(255));
INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(0);
INSERT INTO valid_values VALUES('on');
INSERT INTO valid_values VALUES('off');

CREATE TABLE invalid_values (value varchar(255));
INSERT INTO invalid_values VALUES('\'aaa\'');
INSERT INTO invalid_values VALUES('\'bbb\'');

--let $sys_var=ROCKSDB_COMMIT_TIME_BATCH_FOR_RECOVERY
--let $read_only=0
--let $session=1
--let $sticky=1
--source ../include/rocksdb_sys_var.inc

DROP TABLE valid_values;
DROP TABLE invalid_values;
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
--source include/have_rocksdb.inc

CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO valid_values VALUES(2);
INSERT INTO valid_values VALUES(1);
INSERT INTO valid_values VALUES(0);

CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
INSERT INTO invalid_values VALUES('\'aaa\'');

--let $sys_var=ROCKSDB_WRITE_POLICY
--let $read_only=1
--let $session=0
--source ../include/rocksdb_sys_var.inc

DROP TABLE valid_values;
DROP TABLE invalid_values;
22 changes: 22 additions & 0 deletions storage/rocksdb/ha_rocksdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,8 @@ static my_bool rocksdb_enable_bulk_load_api = 1;
static my_bool rocksdb_print_snapshot_conflict_queries = 0;
static my_bool rocksdb_large_prefix = 0;
static my_bool rocksdb_allow_to_start_after_corruption = 0;
static uint32_t rocksdb_write_policy =
rocksdb::TxnDBWritePolicy::WRITE_COMMITTED;

std::atomic<uint64_t> rocksdb_row_lock_deadlocks(0);
std::atomic<uint64_t> rocksdb_row_lock_wait_timeouts(0);
Expand Down Expand Up @@ -649,6 +651,11 @@ static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG,
/*min*/ 2,
/*max*/ ULONG_MAX, 0);

static MYSQL_THDVAR_BOOL(
commit_time_batch_for_recovery, PLUGIN_VAR_RQCMDARG,
"TransactionOptions::commit_time_batch_for_recovery for RocksDB", nullptr,
nullptr, FALSE);

static MYSQL_THDVAR_BOOL(
trace_sst_api, PLUGIN_VAR_RQCMDARG,
"Generate trace output in the log for each call to the SstFileWriter",
Expand Down Expand Up @@ -778,6 +785,13 @@ static MYSQL_SYSVAR_BOOL(
"DBOptions::manual_wal_flush for RocksDB", nullptr, nullptr,
rocksdb_db_options->manual_wal_flush);

static MYSQL_SYSVAR_UINT(write_policy, rocksdb_write_policy,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DBOptions::write_policy for RocksDB", nullptr,
nullptr, rocksdb::TxnDBWritePolicy::WRITE_COMMITTED,
rocksdb::TxnDBWritePolicy::WRITE_COMMITTED,
rocksdb::TxnDBWritePolicy::WRITE_UNPREPARED, 0);

static MYSQL_SYSVAR_BOOL(
create_missing_column_families,
*reinterpret_cast<my_bool *>(
Expand Down Expand Up @@ -1516,6 +1530,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(lock_wait_timeout),
MYSQL_SYSVAR(deadlock_detect),
MYSQL_SYSVAR(deadlock_detect_depth),
MYSQL_SYSVAR(commit_time_batch_for_recovery),
MYSQL_SYSVAR(max_row_locks),
MYSQL_SYSVAR(write_batch_max_bytes),
MYSQL_SYSVAR(lock_scanned_rows),
Expand All @@ -1537,6 +1552,7 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(create_if_missing),
MYSQL_SYSVAR(two_write_queues),
MYSQL_SYSVAR(manual_wal_flush),
MYSQL_SYSVAR(write_policy),
MYSQL_SYSVAR(create_missing_column_families),
MYSQL_SYSVAR(error_if_exists),
MYSQL_SYSVAR(paranoid_checks),
Expand Down Expand Up @@ -2591,6 +2607,10 @@ class Rdb_transaction_impl : public Rdb_transaction {
tx_opts.lock_timeout = rdb_convert_sec_to_ms(m_timeout_sec);
tx_opts.deadlock_detect = THDVAR(m_thd, deadlock_detect);
tx_opts.deadlock_detect_depth = THDVAR(m_thd, deadlock_detect_depth);
// If this variable is set, this will write commit time write batch
// information on recovery or memtable flush.
tx_opts.use_only_the_last_commit_time_batch_for_recovery =
THDVAR(m_thd, commit_time_batch_for_recovery);
tx_opts.max_write_batch_size = THDVAR(m_thd, write_batch_max_bytes);

write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
Expand Down Expand Up @@ -4310,6 +4330,8 @@ static int rocksdb_init_func(void *const p) {
rocksdb::TransactionDBOptions tx_db_options;
tx_db_options.transaction_lock_timeout = 2; // 2 seconds
tx_db_options.custom_mutex_factory = std::make_shared<Rdb_mutex_factory>();
tx_db_options.write_policy =
static_cast<rocksdb::TxnDBWritePolicy>(rocksdb_write_policy);

status =
check_rocksdb_options_compatibility(rocksdb_datadir, main_opts, cf_descr);
Expand Down

0 comments on commit ba3c13d

Please sign in to comment.