-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathceremonyclient_start_cluster.sh
executable file
·136 lines (110 loc) · 3.73 KB
/
ceremonyclient_start_cluster.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/bin/bash
set -x # for debugging purposes - this prints the command that is to be executed before the command is executed
# Gracefully exit node when script is stopped
kill_process() {
pkill -SIGINT -P $$
wait
exit 0
}
trap kill_process SIGINT
# Figure out what directory I'm in
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$SCRIPT_DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
SCRIPT_ROOT_DIR=$(echo "$SCRIPT_DIR" | awk -F'/' 'BEGIN{OFS=FS} {$NF=""; print}' | sed 's/\/*$//')
RELEASE_ARCH=$(bash $SCRIPT_DIR/tools/ceremonyclient_env.sh -arch)
RELEASE_OS=$(bash $SCRIPT_DIR/tools/ceremonyclient_env.sh -os)
RELEASE_LINE="$RELEASE_OS-$RELEASE_ARCH"
START_CORE_INDEX=1
if [[ "$RELEASE_OS" == "darwin" ]]; then
DATA_WORKER_COUNT=$(sysctl -n hw.logicalcpu)
elif [[ "$RELEASE_OS" == "linux" ]]; then
DATA_WORKER_COUNT=$(nproc)
fi
PARENT_PID=$$
# Some variables for paths and binaries
QUIL_NODE_PATH=$HOME/ceremonyclient/node
NODE_BINARY="node-2.0.4.2-$RELEASE_LINE"
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--core-index-start)
START_CORE_INDEX="$2"
shift 2
;;
--data-worker-count)
DATA_WORKER_COUNT="$2"
shift 2
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
# Validate START_CORE_INDEX
if ! [[ "$START_CORE_INDEX" =~ ^[0-9]+$ ]]; then
echo "Error: --core-index-start must be a non-negative integer"
exit 1
fi
# Validate DATA_WORKER_COUNT
if ! [[ "$DATA_WORKER_COUNT" =~ ^[1-9][0-9]*$ ]]; then
echo "Error: --data-worker-count must be a positive integer"
exit 1
fi
if [[ "$RELEASE_OS" == "darwin" ]]; then
MAX_CORES=$(sysctl -n hw.logicalcpu)
elif [[ "$RELEASE_OS" == "linux" ]]; then
MAX_CORES=$(nproc)
fi
# Adjust DATA_WORKER_COUNT if START_CORE_INDEX is 1
if [ "$START_CORE_INDEX" -eq 1 ]; then
# Adjust MAX_CORES if START_CORE_INDEX is 1
echo "Adjusting max cores available to $((MAX_CORES - 1)) (from $MAX_CORES) due to starting the master node on core 0"
MAX_CORES=$((MAX_CORES - 1))
fi
# If DATA_WORKER_COUNT is greater than MAX_CORES, set it to MAX_CORES
if [ "$DATA_WORKER_COUNT" -gt "$MAX_CORES" ]; then
DATA_WORKER_COUNT=$MAX_CORES
echo "DATA_WORKER_COUNT adjusted down to maximum: $DATA_WORKER_COUNT"
fi
MASTER_PID=0
# kill off any stragglers
pkill node-*
# Function to start the master node up if this is master node
start_master() {
$QUIL_NODE_PATH/$NODE_BINARY &
MASTER_PID=$!
}
if [ $START_CORE_INDEX -eq 1 ]; then
start_master
fi
# Function to start the data workers if this is a worker node
# Loops through the data worker count and start each core
start_workers() {
# start the master node
for ((i=0; i<DATA_WORKER_COUNT; i++)); do
CORE=$((START_CORE_INDEX + i))
echo "Starting core $CORE"
$QUIL_NODE_PATH/$NODE_BINARY --core $CORE --parent-process $PARENT_PID &
done
}
is_master_process_running() {
ps -p $MASTER_PID > /dev/null 2>&1
return $?
}
start_workers
while true
do
# we only care about restarting the master process because the cores should be alive
# as long as this file is running (and this will only run on the machine with a start index of 1)
if [ $START_CORE_INDEX -eq 1 ] && ! is_master_process_running; then
echo "Process crashed or stopped. restarting..."
start_master
fi
sleep 440
done