From db05d645e90a7a653c114a13b4832e96517d1375 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 26 Aug 2024 15:27:51 +0800 Subject: [PATCH 01/10] Fix reconfiguring sub-replica causing data loss when myself change shard_id In this case, sender is myself's primary, when executing updateShardId, not only the sender's shard_id is updated, but also the shard_id of myself is updated, casuing the subsequent areInSameShard check, that is, the full_sync_required check to fail. This one follow #885 and closes #942. Signed-off-by: Binbin --- src/cluster_legacy.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 4f1e09d4ef..a638a7aa70 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3162,6 +3162,8 @@ int clusterProcessPacket(clusterLink *link) { /* PING, PONG, MEET: process config information. */ if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG || type == CLUSTERMSG_TYPE_MEET) { + int myself_shard_id_changed = 0; + serverLog(LL_DEBUG, "%s packet received: %.40s", clusterGetMessageTypeString(type), link->node ? link->node->name : "NULL"); @@ -3312,10 +3314,16 @@ int clusterProcessPacket(clusterLink *link) { if (sender->replicaof) clusterNodeRemoveReplica(sender->replicaof, sender); serverLog(LL_NOTICE, "Node %.40s (%s) is now a replica of node %.40s (%s) in shard %.40s", sender->name, sender->human_nodename, sender_claimed_primary->name, - sender_claimed_primary->human_nodename, sender->shard_id); + sender_claimed_primary->human_nodename, sender_claimed_primary->shard_id); clusterNodeAddReplica(sender_claimed_primary, sender); sender->replicaof = sender_claimed_primary; + /* The later updateShardId may change myself shard_id, and we + * need to remember whether this change has occurred. */ + if (sender_claimed_primary->shard_id && myself != sender && myself->replicaof == sender) { + myself_shard_id_changed = 1; + } + /* Update the shard_id when a replica is connected to its * primary in the very first time. */ updateShardId(sender, sender_claimed_primary->shard_id); @@ -3398,7 +3406,14 @@ int clusterProcessPacket(clusterLink *link) { * so we can try a psync. */ serverLog(LL_NOTICE, "I'm a sub-replica! Reconfiguring myself as a replica of %.40s from %.40s", myself->replicaof->replicaof->name, myself->replicaof->name); - clusterSetPrimary(myself->replicaof->replicaof, 1, !areInSameShard(myself->replicaof->replicaof, myself)); + if (myself_shard_id_changed) { + /* If myself shard_id changes during the clusterProcessPacket, myself + * will not be able to psync with the new shard. */ + clusterSetPrimary(myself->replicaof->replicaof, 1, 1); + } else { + int are_in_same_shard = areInSameShard(myself->replicaof->replicaof, myself); + clusterSetPrimary(myself->replicaof->replicaof, 1, !are_in_same_shard); + } clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG); } From 18dae3eabeaf08e4b3be36f195624f064ef0cbe8 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 26 Aug 2024 15:36:41 +0800 Subject: [PATCH 02/10] fix build warning Signed-off-by: Binbin --- src/cluster_legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index a638a7aa70..6009929396 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3320,7 +3320,7 @@ int clusterProcessPacket(clusterLink *link) { /* The later updateShardId may change myself shard_id, and we * need to remember whether this change has occurred. */ - if (sender_claimed_primary->shard_id && myself != sender && myself->replicaof == sender) { + if (myself != sender && myself->replicaof == sender) { myself_shard_id_changed = 1; } From 361f6175be2e4c9747f4db7a16679078c9e438bf Mon Sep 17 00:00:00 2001 From: Binbin Date: Wed, 28 Aug 2024 11:21:26 +0800 Subject: [PATCH 03/10] pull the chain replication reduction logic before the update of shard_id Signed-off-by: Binbin --- src/cluster_legacy.c | 53 ++++------ tests/unit/cluster/replica-migration.tcl | 125 ++++++++++++++++++++++- 2 files changed, 145 insertions(+), 33 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 6009929396..e15f465a6a 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3162,8 +3162,6 @@ int clusterProcessPacket(clusterLink *link) { /* PING, PONG, MEET: process config information. */ if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG || type == CLUSTERMSG_TYPE_MEET) { - int myself_shard_id_changed = 0; - serverLog(LL_DEBUG, "%s packet received: %.40s", clusterGetMessageTypeString(type), link->node ? link->node->name : "NULL"); @@ -3318,10 +3316,27 @@ int clusterProcessPacket(clusterLink *link) { clusterNodeAddReplica(sender_claimed_primary, sender); sender->replicaof = sender_claimed_primary; - /* The later updateShardId may change myself shard_id, and we - * need to remember whether this change has occurred. */ - if (myself != sender && myself->replicaof == sender) { - myself_shard_id_changed = 1; + /* Currently this is the only place where replicaof state can be updated on + * this function, since updateShardId may update myself shard_id and caused + * areInSameShard check failed. Explicitly check for a replication loop before + * attempting the replication chain folding logic. */ + if (myself->replicaof && myself->replicaof->replicaof && myself->replicaof->replicaof != myself) { + /* Safeguard against sub-replicas. + * + * A replica's primary can turn itself into a replica if its last slot + * is removed. If no other node takes over the slot, there is nothing + * else to trigger replica migration. In this case, they are not in the + * same shard, so a full sync is required. + * + * Or a replica's primary can turn itself into a replica of its other + * replica during a failover. In this case, they are in the same shard, + * so we can try a psync. */ + serverLog(LL_NOTICE, "I'm a sub-replica! Reconfiguring myself as a replica of %.40s from %.40s", + myself->replicaof->replicaof->name, myself->replicaof->name); + int are_in_same_shard = areInSameShard(myself->replicaof->replicaof, myself); + clusterSetPrimary(myself->replicaof->replicaof, 1, !are_in_same_shard); + /* We will add the CLUSTER_TODO_SAVE_CONFIG flag when we exit the if statement. */ + clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG); } /* Update the shard_id when a replica is connected to its @@ -3391,32 +3406,6 @@ int clusterProcessPacket(clusterLink *link) { } } - /* Explicitly check for a replication loop before attempting the replication - * chain folding logic. */ - if (myself->replicaof && myself->replicaof->replicaof && myself->replicaof->replicaof != myself) { - /* Safeguard against sub-replicas. - * - * A replica's primary can turn itself into a replica if its last slot - * is removed. If no other node takes over the slot, there is nothing - * else to trigger replica migration. In this case, they are not in the - * same shard, so a full sync is required. - * - * Or a replica's primary can turn itself into a replica of its other - * replica during a failover. In this case, they are in the same shard, - * so we can try a psync. */ - serverLog(LL_NOTICE, "I'm a sub-replica! Reconfiguring myself as a replica of %.40s from %.40s", - myself->replicaof->replicaof->name, myself->replicaof->name); - if (myself_shard_id_changed) { - /* If myself shard_id changes during the clusterProcessPacket, myself - * will not be able to psync with the new shard. */ - clusterSetPrimary(myself->replicaof->replicaof, 1, 1); - } else { - int are_in_same_shard = areInSameShard(myself->replicaof->replicaof, myself); - clusterSetPrimary(myself->replicaof->replicaof, 1, !are_in_same_shard); - } - clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG); - } - /* If our config epoch collides with the sender's try to fix * the problem. */ if (sender && nodeIsPrimary(myself) && nodeIsPrimary(sender) && diff --git a/tests/unit/cluster/replica-migration.tcl b/tests/unit/cluster/replica-migration.tcl index 06e9d70ee7..599f193638 100644 --- a/tests/unit/cluster/replica-migration.tcl +++ b/tests/unit/cluster/replica-migration.tcl @@ -190,7 +190,7 @@ proc test_nonempty_replica {type} { if {$type == "sigstop"} { resume_process $primary0_pid - # Waiting the old primary go online and become a replica. + # Wait for the old primary to go online and become a replica. wait_for_condition 1000 50 { [s 0 role] eq {slave} } else { @@ -208,6 +208,129 @@ start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout test_nonempty_replica "sigstop" } my_slot_allocation cluster_allocate_replicas ;# start_cluster +proc test_sub_replica {type} { + test "Sub-replica reports zero repl offset and rank, and fails to win election - $type" { + # Write some data to primary 0, slot 1, make a small repl_offset. + for {set i 0} {$i < 1024} {incr i} { + R 0 incr key_991803 + } + assert_equal {1024} [R 0 get key_991803] + + # Write some data to primary 3, slot 0, make a big repl_offset. + for {set i 0} {$i < 10240} {incr i} { + R 3 incr key_977613 + } + assert_equal {10240} [R 3 get key_977613] + + R 3 config set cluster-replica-validity-factor 0 + R 7 config set cluster-replica-validity-factor 0 + R 3 config set cluster-allow-replica-migration yes + R 7 config set cluster-allow-replica-migration no + + # Record the current primary node, server 3 will be migrated later. + # And server 7 will become a sub-replica that also will be migrated later. + set 3_old_role_response [R 3 role] + set 7_old_role_response [R 7 role] + + # 10s, make sure primary 0 will hang in the save. + R 0 config set rdb-key-save-delay 100000000 + + # Move slot 0 from primary 3 to primary 0. + set addr "[srv 0 host]:[srv 0 port]" + set myid [R 3 CLUSTER MYID] + set code [catch { + exec src/valkey-cli {*}[valkeycli_tls_config "./tests"] --cluster rebalance $addr --cluster-weight $myid=0 + } result] + if {$code != 0} { + fail "valkey-cli --cluster rebalance returns non-zero exit code, output below:\n$result" + } + + # Wait for server 3 and server 7 role response to change. + wait_for_condition 1000 50 { + [R 3 role] ne $3_old_role_response && + [R 7 role] ne $7_old_role_response + } else { + puts "R 3 role: [R 3 role]" + puts "R 7 role: [R 7 role]" + fail "Server 3 and 7 role response has not changed" + } + + # Make sure server 3 becomes a replica of primary 0. + set new_primary_ip [lindex [R 3 role] 1] + set new_primary_port [lindex [R 3 role] 2] + assert_equal [s -3 role] {slave} + assert_equal "$new_primary_ip:$new_primary_port" $addr + + # And server 7 becomes a replica of primary 0. + set new_primary_ip [lindex [R 7 role] 1] + set new_primary_port [lindex [R 7 role] 2] + assert_equal [s -7 role] {slave} + assert_equal "$new_primary_ip:$new_primary_port" $addr + + # Make sure server 7 got a sub-replica log. + verify_log_message -7 "*I'm a sub-replica!*" 0 + + if {$type == "shutdown"} { + # Shutdown primary 0. + catch {R 0 shutdown nosave} + } elseif {$type == "sigstop"} { + # Pause primary 0. + set primary0_pid [s 0 process_id] + pause_process $primary0_pid + } + + # Wait for the replica to become a primary, and make sure + # the other primary become a replica. + wait_for_condition 1000 50 { + [s -4 role] eq {master} && + [s -3 role] eq {slave} && + [s -7 role] eq {slave} + } else { + puts "s -4 role: [s -4 role]" + puts "s -3 role: [s -3 role]" + puts "s -7 role: [s -7 role]" + fail "Failover does not happened" + } + + # Make sure the offset of server 3 / 7 is 0. + verify_log_message -3 "*Start of election*offset 0*" 0 + verify_log_message -7 "*Start of election*offset 0*" 0 + + # Make sure the key exists and is consistent. + R 3 readonly + R 7 readonly + wait_for_condition 1000 50 { + [R 3 get key_991803] == 1024 && [R 3 get key_977613] == 10240 && + [R 4 get key_991803] == 1024 && [R 4 get key_977613] == 10240 && + [R 7 get key_991803] == 1024 && [R 7 get key_977613] == 10240 + } else { + puts "R 3: [R 3 keys *]" + puts "R 4: [R 4 keys *]" + puts "R 7: [R 7 keys *]" + fail "Key not consistent" + } + + if {$type == "sigstop"} { + resume_process $primary0_pid + + # Wait for the old primary to go online and become a replica. + wait_for_condition 1000 50 { + [s 0 role] eq {slave} + } else { + fail "The old primary was not converted into replica" + } + } + } +} + +start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} { + test_sub_replica "shutdown" +} my_slot_allocation cluster_allocate_replicas ;# start_cluster + +start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} { + test_sub_replica "sigstop" +} my_slot_allocation cluster_allocate_replicas ;# start_cluster + start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} { test "valkey-cli make source node ignores NOREPLICAS error when doing the last CLUSTER SETSLOT" { R 3 config set cluster-allow-replica-migration no From ed589dbd07434526b3148fd74ac9a218385f176f Mon Sep 17 00:00:00 2001 From: Binbin Date: Wed, 28 Aug 2024 15:06:08 +0800 Subject: [PATCH 04/10] fix timing issue Signed-off-by: Binbin --- tests/unit/cluster/replica-migration.tcl | 31 +++++++++--------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/tests/unit/cluster/replica-migration.tcl b/tests/unit/cluster/replica-migration.tcl index 599f193638..c3ef42ae00 100644 --- a/tests/unit/cluster/replica-migration.tcl +++ b/tests/unit/cluster/replica-migration.tcl @@ -11,6 +11,14 @@ proc my_slot_allocation {masters replicas} { R [expr $masters-1] cluster addslots 0 } +proc get_my_primary_peer {srv_idx} { + set role_response [R $srv_idx role] + set primary_ip [lindex $role_response 1] + set primary_port [lindex $role_response 2] + set primary_peer "$primary_ip:$primary_port" + return $primary_peer +} + proc test_migrated_replica {type} { test "Migrated replica reports zero repl offset and rank, and fails to win election - $type" { # Write some data to primary 0, slot 1, make a small repl_offset. @@ -227,11 +235,6 @@ proc test_sub_replica {type} { R 3 config set cluster-allow-replica-migration yes R 7 config set cluster-allow-replica-migration no - # Record the current primary node, server 3 will be migrated later. - # And server 7 will become a sub-replica that also will be migrated later. - set 3_old_role_response [R 3 role] - set 7_old_role_response [R 7 role] - # 10s, make sure primary 0 will hang in the save. R 0 config set rdb-key-save-delay 100000000 @@ -245,28 +248,16 @@ proc test_sub_replica {type} { fail "valkey-cli --cluster rebalance returns non-zero exit code, output below:\n$result" } - # Wait for server 3 and server 7 role response to change. + # Make sure server 3 and server 7 becomes a replica of primary 0. wait_for_condition 1000 50 { - [R 3 role] ne $3_old_role_response && - [R 7 role] ne $7_old_role_response + [get_my_primary_peer 3] eq $addr && + [get_my_primary_peer 7] eq $addr } else { puts "R 3 role: [R 3 role]" puts "R 7 role: [R 7 role]" fail "Server 3 and 7 role response has not changed" } - # Make sure server 3 becomes a replica of primary 0. - set new_primary_ip [lindex [R 3 role] 1] - set new_primary_port [lindex [R 3 role] 2] - assert_equal [s -3 role] {slave} - assert_equal "$new_primary_ip:$new_primary_port" $addr - - # And server 7 becomes a replica of primary 0. - set new_primary_ip [lindex [R 7 role] 1] - set new_primary_port [lindex [R 7 role] 2] - assert_equal [s -7 role] {slave} - assert_equal "$new_primary_ip:$new_primary_port" $addr - # Make sure server 7 got a sub-replica log. verify_log_message -7 "*I'm a sub-replica!*" 0 From 187352f6fa09cfafe715964b70603e6fd4c69b6a Mon Sep 17 00:00:00 2001 From: Binbin Date: Wed, 28 Aug 2024 15:44:29 +0800 Subject: [PATCH 05/10] fix cluster down timing issue Signed-off-by: Binbin --- tests/unit/cluster/replica-migration.tcl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/unit/cluster/replica-migration.tcl b/tests/unit/cluster/replica-migration.tcl index c3ef42ae00..0d232b2765 100644 --- a/tests/unit/cluster/replica-migration.tcl +++ b/tests/unit/cluster/replica-migration.tcl @@ -287,6 +287,18 @@ proc test_sub_replica {type} { verify_log_message -3 "*Start of election*offset 0*" 0 verify_log_message -7 "*Start of election*offset 0*" 0 + # Wait for the cluster to be ok. + wait_for_condition 1000 50 { + [CI 3 cluster_state] eq "ok" && + [CI 4 cluster_state] eq "ok" && + [CI 7 cluster_state] eq "ok" + } else { + puts "R 3: [R 3 cluster info]" + puts "R 4: [R 4 cluster info]" + puts "R 7: [R 7 cluster info]" + fail "Cluster is down" + } + # Make sure the key exists and is consistent. R 3 readonly R 7 readonly From f7c4367191ad413850202d85d683f44cbe8e9fe4 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 29 Aug 2024 14:42:23 +0800 Subject: [PATCH 06/10] Update src/cluster_legacy.c Co-authored-by: Ping Xie Signed-off-by: Binbin --- src/cluster_legacy.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index e15f465a6a..0cc0c6dd79 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3316,10 +3316,20 @@ int clusterProcessPacket(clusterLink *link) { clusterNodeAddReplica(sender_claimed_primary, sender); sender->replicaof = sender_claimed_primary; - /* Currently this is the only place where replicaof state can be updated on - * this function, since updateShardId may update myself shard_id and caused - * areInSameShard check failed. Explicitly check for a replication loop before - * attempting the replication chain folding logic. */ + /* The chain reduction logic requires correctly establishing the replication relationship. + * A key decision when designating a new primary for 'myself' is determining whether + * 'myself' and the new primary belong to the same shard, which would imply shared + * replication history and allow safe partial synchronization (psync). + * + * This decision hinges on the shard_id, a per-node property that helps verify if the + * two nodes share the same replication history. It's critical not to update 'myself's + * shard_id prematurely during this process. Doing so could incorrectly associate + * 'myself' with the sender's shard_id, leading the subsequent clusterSetPrimary call + * to falsely assume that 'myself' and the new primary have been in the same shard. + * This mistake could result in data loss by incorrectly permitting a psync. + * + * Therefore, it's essential to delay any shard_id updates until after the replication + * relationship has been properly established and verified. */ if (myself->replicaof && myself->replicaof->replicaof && myself->replicaof->replicaof != myself) { /* Safeguard against sub-replicas. * From 87dcb22e78c6184da5bf9d35ceabae5de4e74186 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 29 Aug 2024 14:43:24 +0800 Subject: [PATCH 07/10] Update src/cluster_legacy.c Co-authored-by: Ping Xie Signed-off-by: Binbin --- src/cluster_legacy.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 0cc0c6dd79..aeacb0270e 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3345,8 +3345,7 @@ int clusterProcessPacket(clusterLink *link) { myself->replicaof->replicaof->name, myself->replicaof->name); int are_in_same_shard = areInSameShard(myself->replicaof->replicaof, myself); clusterSetPrimary(myself->replicaof->replicaof, 1, !are_in_same_shard); - /* We will add the CLUSTER_TODO_SAVE_CONFIG flag when we exit the if statement. */ - clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG); + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG); } /* Update the shard_id when a replica is connected to its From 7788208e1a554fee1b5b3d3dd5d503e023989a85 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 29 Aug 2024 14:43:30 +0800 Subject: [PATCH 08/10] Update src/cluster_legacy.c Co-authored-by: Ping Xie Signed-off-by: Binbin --- src/cluster_legacy.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index aeacb0270e..5a341681e6 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3343,8 +3343,7 @@ int clusterProcessPacket(clusterLink *link) { * so we can try a psync. */ serverLog(LL_NOTICE, "I'm a sub-replica! Reconfiguring myself as a replica of %.40s from %.40s", myself->replicaof->replicaof->name, myself->replicaof->name); - int are_in_same_shard = areInSameShard(myself->replicaof->replicaof, myself); - clusterSetPrimary(myself->replicaof->replicaof, 1, !are_in_same_shard); + clusterSetPrimary(myself->replicaof->replicaof, 1, !areInSameShard(myself->replicaof->replicaof, myself)); clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG); } From 44c84f5ef799a7586079c3b86c250d5a2cd3bc80 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 29 Aug 2024 14:49:36 +0800 Subject: [PATCH 09/10] fix format Signed-off-by: Binbin --- src/cluster_legacy.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 5a341681e6..8ee99a3d2c 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3319,10 +3319,10 @@ int clusterProcessPacket(clusterLink *link) { /* The chain reduction logic requires correctly establishing the replication relationship. * A key decision when designating a new primary for 'myself' is determining whether * 'myself' and the new primary belong to the same shard, which would imply shared - * replication history and allow safe partial synchronization (psync). + * replication history and allow a safe partial synchronization (psync). * * This decision hinges on the shard_id, a per-node property that helps verify if the - * two nodes share the same replication history. It's critical not to update 'myself's + * two nodes share the same replication history. It's critical not to update myself's * shard_id prematurely during this process. Doing so could incorrectly associate * 'myself' with the sender's shard_id, leading the subsequent clusterSetPrimary call * to falsely assume that 'myself' and the new primary have been in the same shard. @@ -3343,8 +3343,10 @@ int clusterProcessPacket(clusterLink *link) { * so we can try a psync. */ serverLog(LL_NOTICE, "I'm a sub-replica! Reconfiguring myself as a replica of %.40s from %.40s", myself->replicaof->replicaof->name, myself->replicaof->name); - clusterSetPrimary(myself->replicaof->replicaof, 1, !areInSameShard(myself->replicaof->replicaof, myself)); - clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG); + clusterSetPrimary(myself->replicaof->replicaof, 1, + !areInSameShard(myself->replicaof->replicaof, myself)); + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| CLUSTER_TODO_UPDATE_STATE | + CLUSTER_TODO_FSYNC_CONFIG); } /* Update the shard_id when a replica is connected to its From f5cd6587dcdaebb44acf19ebbbfaaa9524f64df7 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 29 Aug 2024 15:03:12 +0800 Subject: [PATCH 10/10] fix format Signed-off-by: Binbin --- src/cluster_legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 8ee99a3d2c..21bdd09919 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3345,7 +3345,7 @@ int clusterProcessPacket(clusterLink *link) { myself->replicaof->replicaof->name, myself->replicaof->name); clusterSetPrimary(myself->replicaof->replicaof, 1, !areInSameShard(myself->replicaof->replicaof, myself)); - clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| CLUSTER_TODO_UPDATE_STATE | + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG); }