Skip to content

Commit

Permalink
Merge pull request #287 from MinecraftServerControl/automatic-restarts
Browse files Browse the repository at this point in the history
Add automatic restart on crash
  • Loading branch information
sandain authored Jun 3, 2021
2 parents 21cdd21 + 8bfef14 commit ee00f9a
Showing 1 changed file with 157 additions and 3 deletions.
160 changes: 157 additions & 3 deletions msctl
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ WGET=$(which wget)
RDIFF_BACKUP=$(which rdiff-backup)
RSYNC=$(which rsync)
SOCAT=$(which socat)
FLOCK=$(which flock)

# Script Usage
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -276,6 +277,9 @@ mscs_defaults() {
; for the world server selected.
# mscs-default-server-command=\$JAVA -Xms\$INITIAL_MEMORY -Xmx\$MAXIMUM_MEMORY \$JVM_ARGS -jar \$SERVER_LOCATION/\$SERVER_JAR \$SERVER_ARGS
; Default behavior if to restart the server after crash is detected (default disabled).
# mscs-default-restart-after-crash=false
; Location to store backup files.
# mscs-backup-location=/opt/mscs/backups
Expand Down Expand Up @@ -1315,6 +1319,123 @@ serverConsole() {
done
}

# ---------------------------------------------------------------------------
# Retrieve the timestamp.
#
# @return The current date and time.
# ---------------------------------------------------------------------------
timestamp() {
date +"%Y-%m-%d_%H-%M-%S"
}

# ---------------------------------------------------------------------------
# Stop the server monitor.
#
# @param 1 The world server to stop.
# ---------------------------------------------------------------------------
stopServerMonitor() {
local WORLD_DIR MONITOR_LOG MONITOR_PID MONITOR_LOCK_FILE ACQUIRED_LOCK
WORLD_DIR="$WORLDS_LOCATION/$1"
MONITOR_LOG="$WORLD_DIR/logs/mscs.monitor.log"
MONITOR_PID=$(cat "$WORLD_DIR/monitor.pid")
MONITOR_LOCK_FILE="$WORLD_DIR/monitor.lock"
# Check if server monitor instance currently running.
(
$FLOCK -n 9
ACQUIRED_LOCK=$?
if [ "$ACQUIRED_LOCK" -eq 1 ]; then # Server monitor is running.
printf "[$(timestamp)] [INFO]: Stop command received for server monitor. Attempting to kill server monitor...\n" >> "$MONITOR_LOG"
# Kill the server monitor.
kill -9 "$MONITOR_PID"
# Verify it was actually killed.
if [ $? -eq 1 ]; then
printf "[$(timestamp)] [ERROR]: Unable to kill monitor process.\n" >> "$MONITOR_LOG"
exit 1
else
printf "[$(timestamp)] [INFO]: Server monitor process killed successfully.\n" >> "$MONITOR_LOG"
# Remove the monitor PID file.
rm -f "$WORLD_DIR/monitor.pid"
fi
fi
) 9>"$MONITOR_LOCK_FILE"
}
# ---------------------------------------------------------------------------
# Run the server monitor.
#
# @param 1 The world server to monitor.
# ---------------------------------------------------------------------------
serverMonitor() {
local WORLD_DIR MONITOR_LOG SERVER_LOG LAST_START_STATUS_LOG MONITOR_PID
WORLD_DIR="$WORLDS_LOCATION/$1"
MONITOR_LOG="$WORLD_DIR/logs/mscs.monitor.log"
SERVER_LOG="$WORLD_DIR/logs/latest.log"
LAST_START_STATUS_LOG="$WORLD_DIR/logs/last-start-status.log"
MONITOR_PID=$(cat "$WORLD_DIR/monitor.pid")
touch $LAST_START_STATUS_LOG

printf "[$(timestamp)] [INFO]: Server monitoring started for $1. Server PID: $(getJavaPID $1). Monitor PID: $MONITOR_PID.\n"
# Run monitor until the server is stopped and the PID file is removed (i.e. clean shutdown).
until ! serverRunning $1 && [ ! -f "$WORLDS_LOCATION/$1.pid" ]; do
# If server isn't running and server PID file exists, server crashed.
if ! serverRunning $1 && [ -f "$WORLDS_LOCATION/$1.pid" ]; then
printf "[$(timestamp)] [WARN]: Server crash detected. Attempting to restart $1...\n"
start $1
# Verify that the server restarted successfully.
if [ $? -eq 0 ]; then
printf "[$(timestamp)] [INFO]: Server monitoring resumed for $1. Server PID: $(getJavaPID $1). Monitor PID: $MONITOR_PID.\n"
printf " $1 automatically restarted from a crash (or in-game stop command)\n" > "$LAST_START_STATUS_LOG"
printf " on $(timestamp). See\n" >> "$LAST_START_STATUS_LOG"
printf " $WORLD_DIR/logs/mscs.monitor.log and\n" >> "$LAST_START_STATUS_LOG"
printf " $WORLD_DIR/crash_reports/ and\n" >> "$LAST_START_STATUS_LOG"
printf " $WORLD_DIR/logs/ for more information.\n" >> "$LAST_START_STATUS_LOG"
else
printf "[$(timestamp)] [ERROR]: Failed to restart $1.\n"
stopServerMonitor $1
fi
# If server is running and server PID file doesn't exist, error occurred.
elif serverRunning $1 && [ ! -f "$WORLDS_LOCATION/$1.pid" ]; then
printf "[$(timestamp)] [ERROR]: PID file doesn't exist.\n"
stopServerMonitor $1
fi
done
}

# ---------------------------------------------------------------------------
# Start the server monitor.
#
# @param 1 The world server to monitor.
# ---------------------------------------------------------------------------
startServerMonitor() {
local WORLD_DIR MONITOR_LOG MONITOR_PID MONITOR_LOCK_FILE ACQUIRED_LOCK
WORLD_DIR="$WORLDS_LOCATION/$1"
MONITOR_LOG="$WORLD_DIR/logs/mscs.monitor.log"
MONITOR_PID="$WORLD_DIR/monitor.pid"
MONITOR_LOCK_FILE="$WORLD_DIR/monitor.lock"
RESTART_AFTER_CRASH=$(getMSCSValue "$1" "mscs-restart-after-crash" "$DEFAULT_RESTART_AFTER_CRASH")

# Verify option is enabled.
if true_value "$RESTART_AFTER_CRASH"; then
# Verify that there is no monitor instance currently running.
(
$FLOCK -n 9
ACQUIRED_LOCK="$?"
if [ "$ACQUIRED_LOCK" -eq 0 ]; then # Server monitor doesn't exist.
# Delete old log file greater than $LOG_DURATION, if it exists.
if [ -f "$MONITOR_LOG" ]; then
if [ "$LOG_DURATION" -gt 0 ]; then
find "$MONITOR_LOG" -type f -mtime +"$LOG_DURATION" -delete
fi
fi
# Run the server monitor.
# Nohup does not allow you to pass functions. However, the code below mimics nohup behavior by doing the following:
# Start subshell, ignore HUP signal, redirect stdin to /dev/null, redirect stdout and stderr to log file, run in background.
# Also store the PID of this process for later use.
( trap "" HUP ; echo $(exec sh -c 'echo "$PPID"') > "$MONITOR_PID"; serverMonitor $1 ) </dev/null 2>&1 1>>"$MONITOR_LOG" &
fi
) 9>"$MONITOR_LOCK_FILE"
fi
}

# ---------------------------------------------------------------------------
# Start the world server. Generate the appropriate environment for the
# server if it doesn't already exist.
Expand Down Expand Up @@ -1418,6 +1539,8 @@ start() {
fi
# Create a PID file for the world server.
echo $PID >"$WORLDS_LOCATION/$1.pid"
# Start the server crash monitor, if enabled.
startServerMonitor $1
}

# ---------------------------------------------------------------------------
Expand All @@ -1426,6 +1549,8 @@ start() {
# @param 1 The world server to stop.
# ---------------------------------------------------------------------------
stop() {
# Stop the server monitor if it is running.
stopServerMonitor $1
# Tell the server to stop.
sendCommand $1 "stop"
sendCommand $1 "end"
Expand Down Expand Up @@ -1454,6 +1579,8 @@ stop() {
# ---------------------------------------------------------------------------
forceStop() {
local WAIT
# Stop the server monitor if it is running.
stopServerMonitor $1
# Try to stop the server cleanly first.
sendCommand $1 "stop"
sendCommand $1 "end"
Expand Down Expand Up @@ -2039,7 +2166,10 @@ queryNumUsers() {
# @param 1 The world server of interest.
# ---------------------------------------------------------------------------
worldStatus() {
local STATUS NUM MAX PLAYERS COUNTER VERSION
local WORLD_DIR LAST_START_STATUS_LOG MONITOR_PID STATUS NUM MAX PLAYERS COUNTER VERSION
WORLD_DIR="$WORLDS_LOCATION/$1"
MONITOR_PID="$WORLD_DIR/monitor.pid"
LAST_START_STATUS_LOG="$WORLD_DIR/logs/last-start-status.log"
if serverRunning $1; then
STATUS=$(queryDetailedStatus $1)
if [ -n "$STATUS" ]; then
Expand Down Expand Up @@ -2069,6 +2199,17 @@ worldStatus() {
printf " Memory used: $(getJavaMemory "$1" | awk '{$1=int(100 * $1/1024/1024)/100"GB";}{ print;}')"
printf " ($(getMSCSValue "$1" "mscs-maximum-memory" "$DEFAULT_MAXIMUM_MEMORY" | rev | cut -c 2- | rev | awk '{$1=int($1/1024)"GB";}{ print;}') allocated).\n"
printf " Process ID: %d.\n" $(getJavaPID "$1")
# Display crash monitor PID if it's running (i.e. monitor.pid file exists and not empty).
if [ -f "$MONITOR_PID" ] && [ -s "$MONITOR_PID" ]; then
printf " Crash Monitor PID: $(cat $MONITOR_PID)\n"
fi
# If the last-status log exists and not empty, then last restart was from a crash.
# Display notice once.
if [ -f "$LAST_START_STATUS_LOG" ] && [ -s "$LAST_START_STATUS_LOG" ]; then
printf "$(cat $LAST_START_STATUS_LOG)\n"
# Remove it so user doesn't see it next time they run the status command.
rm -f $LAST_START_STATUS_LOG
fi
elif ! true_value "$(getMSCSValue $1 'mscs-enabled')"; then
printf "disabled.\n"
else
Expand Down Expand Up @@ -2103,7 +2244,7 @@ worldStatusJSON() {
# ---------------------------------------------------------------------------

# Make sure that Java, Perl, libjson-perl, libwww-perl, Python, Wget,
# Rdiff-backup, Rsync, and Socat are installed.
# Rdiff-backup, Rsync, Socat and flock are installed.
# ---------------------------------------------------------------------------
if [ ! -e "$JAVA" ]; then
echo "ERROR: Java not found!"
Expand Down Expand Up @@ -2168,6 +2309,12 @@ if [ ! -e "$SOCAT" ]; then
echo "sudo apt-get install socat"
exit 1
fi
if [ ! -e "$FLOCK" ]; then
echo "ERROR: flock not found!"
echo "Try installing this with:"
echo "sudo apt-get install util-linux"
exit 1
fi

# Parse command-line options
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -2259,8 +2406,9 @@ fi
# mscs-default-maximum-memory - Default maximum amount of memory for a world server.
# mscs-default-server-location - Default location of the server .jar file.
# mscs-default-server-command - Default command to run for a world server.
# mscs-default-restart-after-crash - Default behavior if to restart the server after crash is detected (default disabled).
# mscs-backup-location - Location to store backup files.
# mscs-backup-log - Lcation of the backup log file.
# mscs-backup-log - Location of the backup log file.
# mscs-backup-excluded-files - Comma separated list of files and directories excluded from backups.
# mscs-backup-duration - Length in days that backups survive.
# mscs-log-duration - Length in days that logs survive.
Expand Down Expand Up @@ -2309,6 +2457,7 @@ fi
# mscs-default-maximum-memory=2048M
# mscs-default-server-location=/opt/mscs/server
# mscs-default-server-command=$JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS
# mscs-default-restart-after-crash=false
# mscs-backup-location=/opt/mscs/backups
# mscs-backup-log=/opt/mscs/backups/backup.log
# mscs-backup-excluded_files=
Expand Down Expand Up @@ -2356,6 +2505,7 @@ DEFAULT_INITIAL_MEMORY=$(getDefaultsValue 'mscs-default-initial-memory' '128M')
DEFAULT_MAXIMUM_MEMORY=$(getDefaultsValue 'mscs-default-maximum-memory' '2048M')
DEFAULT_SERVER_LOCATION=$(getDefaultsValue 'mscs-default-server-location' $LOCATION'/server')
DEFAULT_SERVER_COMMAND=$(getDefaultsValue 'mscs-default-server-command' '$JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS')
DEFAULT_RESTART_AFTER_CRASH=$(getDefaultsValue 'mscs-default-restart-after-crash' 'false')
# Each world server can override the default values in a similar manner by
# adding certain key/value pairs to the world's mscs.properties file.
#
Expand All @@ -2375,6 +2525,7 @@ DEFAULT_SERVER_COMMAND=$(getDefaultsValue 'mscs-default-server-command' '$JAVA -
# mscs-maximum-memory - Assign the maximum amount of memory for the server.
# mscs-server-location - Assign the location of the server .jar file.
# mscs-server-command - Assign the command to run for the server.
# mscs-restart-after-crash - Restart the server after a crash (default disabled).
#
# Like above, the following variables may be used in some of the key values:
# $JAVA - The Java virtual machine.
Expand Down Expand Up @@ -2404,6 +2555,7 @@ DEFAULT_SERVER_COMMAND=$(getDefaultsValue 'mscs-default-server-command' '$JAVA -
# mscs-maximum-memory=2048M
# mscs-server-location=/opt/mscs/server
# mscs-server-command=$JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS
# mscs-restart-after-crash=false

# World (Server Instance) Configuration
# ---------------------------------------------------------------------------
Expand All @@ -2415,6 +2567,8 @@ VERSIONS_JSON=$(getDefaultsValue 'mscs-versions-json' $LOCATION'/version_manifes
VERSIONS_DURATION=$(getDefaultsValue 'mscs-versions-duration' '30')
# The duration (in minutes) to keep lock files before removing.
LOCKFILE_DURATION=$(getDefaultsValue 'mscs-lockfile-duration' '1440')
# Enable the option to restart the server after a crash is detected (default disabled).
RESTART_AFTER_CRASH=$(getDefaultsValue 'mscs-restart-after-crash' 'false')

# Backup Configuration
# ---------------------------------------------------------------------------
Expand Down

0 comments on commit ee00f9a

Please sign in to comment.