From 9383c42b9a1220044af686bd793e21745b0e7215 Mon Sep 17 00:00:00 2001 From: Jung-Sang Ahn Date: Wed, 26 Feb 2020 23:34:27 -0800 Subject: [PATCH] Fix a couple of bugs on removing servers * When we check the response from to-be-removed server, we should make sure that its last log index is bigger than the target index. If the to-be-removed server restarts, it may return response but may not sync up the latest config yet. * Adding server logic should check `srv_to_leave_` node is gone, before duplicate ID checking. --- src/handle_append_entries.cxx | 3 ++- src/handle_join_leave.cxx | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/handle_append_entries.cxx b/src/handle_append_entries.cxx index 35371e3b..44717622 100644 --- a/src/handle_append_entries.cxx +++ b/src/handle_append_entries.cxx @@ -692,7 +692,8 @@ void raft_server::handle_append_entries_resp(resp_msg& resp) { check_srv_to_leave_timeout(); if ( srv_to_leave_ && srv_to_leave_->get_id() == resp.get_src() && - srv_to_leave_->is_stepping_down() ) { + srv_to_leave_->is_stepping_down() && + resp.get_next_idx() > srv_to_leave_target_idx_ ) { // Catch-up is done. p_in("server to be removed %d fully caught up the " "target config log %zu", diff --git a/src/handle_join_leave.cxx b/src/handle_join_leave.cxx index 6a2579f9..67931840 100644 --- a/src/handle_join_leave.cxx +++ b/src/handle_join_leave.cxx @@ -54,6 +54,8 @@ ptr raft_server::handle_add_srv_req(req_msg& req) { return resp; } + // Before checking duplicate ID, confirm srv_to_leave_ is gone. + check_srv_to_leave_timeout(); ptr srv_conf = srv_config::deserialize( entries[0]->get_buf() ); if ( peers_.find( srv_conf->get_id() ) != peers_.end() ||