Skip to content

Commit

Permalink
Use pthread_exit to terminate raft service. (eBay#536)
Browse files Browse the repository at this point in the history
* Use pthread_exit to terminate raft service.

Allow the process to handle termination of raft service rather than
force process wide `exit()`. This is compile time configurable during CMake configuration.

Addresses eBay#535

---------

Co-authored-by: Jung-Sang Ahn <jungsang.ahn@gmail.com>
  • Loading branch information
szmyd and greensky00 authored Sep 18, 2024
1 parent ea1385e commit 6741a45
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 15 deletions.
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ endif()


# === Compiler flags ===
option(USE_PTHREAD_EXIT "Call pthread_exit on server threads" OFF)
if (NOT WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pessimizing-move")
Expand All @@ -95,11 +96,19 @@ if (NOT WIN32)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
endif ()
if (USE_PTHREAD_EXIT)
message(STATUS "Using ::pthread_exit for termination")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_PTHREAD_EXIT")
endif()

else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd5045 /wd4571 /wd4774 /wd4820 /wd5039 /wd4626 /wd4625 /wd5026 /wd5027 /wd4623 /wd4996 /wd4530 /wd4267 /wd4244 /W3")
message(STATUS "---- WIN32 ----")
set(DISABLE_SSL 1)

if (USE_PTHREAD_EXIT)
message(FATAL_ERROR "Using ::pthread_exit not supported on Windows")
endif()
endif ()

# === Disable SSL ===
Expand Down
15 changes: 15 additions & 0 deletions src/exit_handler.hxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

// What should be called to shutdown when raft_server is in abnormal state
#ifdef USE_PTHREAD_EXIT
extern "C" {
#include <pthread.h>
}
#define _sys_exit(status) ::pthread_exit(nullptr)

#else

#include <cstdlib>
#define _sys_exit(status) ::exit((status))

#endif
3 changes: 2 additions & 1 deletion src/handle_append_entries.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ limitations under the License.
#include "cluster_config.hxx"
#include "error_code.hxx"
#include "event_awaiter.hxx"
#include "exit_handler.hxx"
#include "handle_custom_notification.hxx"
#include "peer.hxx"
#include "snapshot.hxx"
Expand Down Expand Up @@ -406,7 +407,7 @@ ptr<req_msg> raft_server::create_append_entries_req(ptr<peer>& pp) {
p_er( "Peer's lastLogIndex is too large %" PRIu64 " v.s. %" PRIu64 ", ",
last_log_idx, cur_nxt_idx );
ctx_->state_mgr_->system_exit(raft_err::N8_peer_last_log_idx_too_large);
::exit(-1);
_sys_exit(-1);
return ptr<req_msg>();
// LCOV_EXCL_STOP
}
Expand Down
13 changes: 7 additions & 6 deletions src/handle_commit.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ limitations under the License.

#include "cluster_config.hxx"
#include "error_code.hxx"
#include "exit_handler.hxx"
#include "handle_client_request.hxx"
#include "global_mgr.hxx"
#include "peer.hxx"
Expand Down Expand Up @@ -150,7 +151,7 @@ void raft_server::commit_in_bg() {
"exiting to protect the system",
err.what() );
ctx_->state_mgr_->system_exit(raft_err::N20_background_commit_err);
::exit(-1);
_sys_exit(-1);
// LCOV_EXCL_STOP
}
}
Expand Down Expand Up @@ -228,7 +229,7 @@ bool raft_server::commit_in_bg_exec(size_t timeout_ms) {
// LCOV_EXCL_START
p_ft( "failed to get log entry with idx %" PRIu64 "", index_to_commit );
ctx_->state_mgr_->system_exit(raft_err::N19_bad_log_idx_for_term);
::exit(-1);
_sys_exit(-1);
// LCOV_EXCL_STOP
}

Expand All @@ -239,7 +240,7 @@ bool raft_server::commit_in_bg_exec(size_t timeout_ms) {
p_ft( "empty log at idx %" PRIu64 ", must be log corruption",
index_to_commit );
ctx_->state_mgr_->system_exit(raft_err::N19_bad_log_idx_for_term);
::exit(-1);
_sys_exit(-1);
// LCOV_EXCL_STOP
}

Expand Down Expand Up @@ -304,7 +305,7 @@ void raft_server::commit_app_log(ulong idx_to_commit,
p_ft( "pre-commit index %" PRIu64 " is smaller than commit index %" PRIu64,
pc_idx, sm_idx );
ctx_->state_mgr_->system_exit(raft_err::N23_precommit_order_inversion);
::exit(-1);
_sys_exit(-1);
}
ret_value = state_machine_->commit_ext
( state_machine::ext_op_params( sm_idx, buf ) );
Expand Down Expand Up @@ -583,7 +584,7 @@ bool raft_server::snapshot_and_compact(ulong committed_idx, bool forced_creation
"cannot be found in current committed logs, "
"this is a system error, exiting");
ctx_->state_mgr_->system_exit(raft_err::N6_no_snapshot_found);
::exit(-1);
_sys_exit(-1);
return false;
// LCOV_EXCL_STOP
}
Expand All @@ -600,7 +601,7 @@ bool raft_server::snapshot_and_compact(ulong committed_idx, bool forced_creation
", committed idx %" PRIu64,
conf->get_log_idx(), conf->get_prev_log_idx(), committed_idx);
//ctx_->state_mgr_->system_exit(raft_err::N7_no_config_at_idx_one);
//::exit(-1);
//_sys_exit(-1);
//return;
}

Expand Down
15 changes: 8 additions & 7 deletions src/handle_snapshot_sync.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ limitations under the License.
#include "context.hxx"
#include "error_code.hxx"
#include "event_awaiter.hxx"
#include "exit_handler.hxx"
#include "peer.hxx"
#include "snapshot.hxx"
#include "snapshot_sync_ctx.hxx"
Expand Down Expand Up @@ -123,7 +124,7 @@ ptr<req_msg> raft_server::create_sync_snapshot_req(ptr<peer>& pp,
last_log_idx, snp->get_last_log_idx());
}
ctx_->state_mgr_->system_exit(raft_err::N16_snapshot_for_peer_not_found);
::exit(-1);
_sys_exit(-1);
return ptr<req_msg>();
// LCOV_EXCL_STOP
}
Expand All @@ -135,7 +136,7 @@ ptr<req_msg> raft_server::create_sync_snapshot_req(ptr<peer>& pp,
"machine implementation, stop the system to prevent "
"further errors");
ctx_->state_mgr_->system_exit(raft_err::N17_empty_snapshot);
::exit(-1);
_sys_exit(-1);
return ptr<req_msg>();
// LCOV_EXCL_STOP
}
Expand Down Expand Up @@ -187,7 +188,7 @@ ptr<req_msg> raft_server::create_sync_snapshot_req(ptr<peer>& pp,
"bytes are expected, must be something wrong, exit.",
sz_rd, data->size() );
ctx_->state_mgr_->system_exit(raft_err::N18_partial_snapshot_block);
::exit(-1);
_sys_exit(-1);
return ptr<req_msg>();
// LCOV_EXCL_STOP
}
Expand Down Expand Up @@ -251,7 +252,7 @@ ptr<resp_msg> raft_server::handle_install_snapshot_req(req_msg& req, std::unique
req.get_src() );
ctx_->state_mgr_->system_exit
( raft_err::N10_leader_receive_InstallSnapshotRequest );
::exit(-1);
_sys_exit(-1);
return ptr<resp_msg>();
// LCOV_EXCL_STOP

Expand Down Expand Up @@ -543,7 +544,7 @@ bool raft_server::handle_snapshot_sync_req(snapshot_sync_req& req, std::unique_l
// LCOV_EXCL_START
p_er("bad server role for applying a snapshot, exit for debugging");
ctx_->state_mgr_->system_exit(raft_err::N11_not_follower_for_snapshot);
::exit(-1);
_sys_exit(-1);
// LCOV_EXCL_STOP
}

Expand All @@ -564,7 +565,7 @@ bool raft_server::handle_snapshot_sync_req(snapshot_sync_req& req, std::unique_l
p_er("failed to apply the snapshot after log compacted, "
"to ensure the safety, will shutdown the system");
ctx_->state_mgr_->system_exit(raft_err::N12_apply_snapshot_failed);
::exit(-1);
_sys_exit(-1);
return false;
// LCOV_EXCL_STOP
}
Expand Down Expand Up @@ -608,7 +609,7 @@ bool raft_server::handle_snapshot_sync_req(snapshot_sync_req& req, std::unique_l
// LCOV_EXCL_START
p_er("failed to handle snapshot installation due to system errors");
ctx_->state_mgr_->system_exit(raft_err::N13_snapshot_install_failed);
::exit(-1);
_sys_exit(-1);
return false;
// LCOV_EXCL_STOP
}
Expand Down
3 changes: 2 additions & 1 deletion src/raft_server.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ limitations under the License.
#include "context.hxx"
#include "error_code.hxx"
#include "event_awaiter.hxx"
#include "exit_handler.hxx"
#include "global_mgr.hxx"
#include "handle_client_request.hxx"
#include "handle_custom_notification.hxx"
Expand Down Expand Up @@ -1628,7 +1629,7 @@ ulong raft_server::term_for_log(ulong log_idx) {
}
p_lv(log_lv, "log_store_->start_index() %" PRIu64, log_store_->start_index());
//ctx_->state_mgr_->system_exit(raft_err::N19_bad_log_idx_for_term);
//::exit(-1);
//_sys_exit(-1);
return 0L;
}

Expand Down

0 comments on commit 6741a45

Please sign in to comment.