Skip to content

Commit

Permalink
update: graceful shutdown for recording sidecar container in K8s
Browse files Browse the repository at this point in the history
Signed-off-by: Viet Nguyen Duc <nguyenducviet4496@gmail.com>
  • Loading branch information
VietND96 committed Aug 26, 2024
1 parent 75fb697 commit 918765f
Show file tree
Hide file tree
Showing 21 changed files with 229 additions and 65 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/docker-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ jobs:
uses: nick-invision/retry@master
if: matrix.build-all != true
with:
timeout_minutes: 20
timeout_minutes: 30
max_attempts: 3
retry_wait_seconds: 60
command: |
Expand All @@ -156,7 +156,7 @@ jobs:
- name: Run Docker Compose to ${{ matrix.test-strategy }}
uses: nick-invision/retry@master
with:
timeout_minutes: 20
timeout_minutes: 40
max_attempts: 2
retry_wait_seconds: 60
command: |
Expand Down
2 changes: 1 addition & 1 deletion Base/check-grid.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ do
esac
done

curl -sSL http://${HOST}:${PORT}/wd/hub/status | jq -r '.value.ready' | grep -q "true" || exit 1
curl -skSL ${SE_SERVER_PROTOCOL:-"http"}://${HOST}:${PORT}/wd/hub/status | jq -r '.value.ready' | grep -q "true" || exit 1
5 changes: 5 additions & 0 deletions Base/entry_point.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ function shutdown {
echo "Waiting for Selenium Node to shutdown gracefully..."
bash ${NODE_CONFIG_DIRECTORY}/nodePreStop.sh
fi
if [ -n "${SE_VIDEO_CONTAINER_NAME}" ]; then
# For K8s, when video sidecar container and shareProcessNamespace are enabled in pod spec
echo "Shutting down ${SE_VIDEO_CONTAINER_NAME} container..."
pkill -f "${SE_VIDEO_CONTAINER_NAME}"
fi
kill -s SIGTERM ${SUPERVISOR_PID}
wait ${SUPERVISOR_PID}
echo "Shutdown complete"
Expand Down
3 changes: 3 additions & 0 deletions NodeBase/selenium.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ priority=0
command=/opt/bin/start-xvfb.sh
autostart=true
autorestart=true
killasgroup=true

;Logs
redirect_stderr=false
Expand All @@ -24,6 +25,7 @@ priority=5
command=/opt/bin/start-vnc.sh
autostart=true
autorestart=true
killasgroup=true

;Logs
redirect_stderr=false
Expand All @@ -41,6 +43,7 @@ priority=10
command=/opt/bin/start-novnc.sh
autostart=true
autorestart=true
killasgroup=true

;Logs
redirect_stderr=false
Expand Down
2 changes: 0 additions & 2 deletions Video/entry_point.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ SUPERVISOR_PID=$!
function shutdown {
echo "Trapped SIGTERM/SIGINT/x so shutting down supervisord..."
kill -s SIGTERM ${SUPERVISOR_PID}
wait `pgrep -f ffmpeg | tr '\n' ' '`
wait `pgrep -f rclone | tr '\n' ' '`
wait ${SUPERVISOR_PID}
echo "Shutdown complete"
}
Expand Down
3 changes: 0 additions & 3 deletions Video/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ minprocs=200 ; (min. avail process descriptors;
[program:video-recording]
priority=0
command=/opt/bin/video.sh
stopasgroup = true
killasgroup=true
autostart=true
startsecs=0
Expand All @@ -28,7 +27,6 @@ stdout_logfile_maxbytes=0
[program:video-ready]
priority=5
command=python3 /opt/bin/video_ready.py
stopasgroup = true
killasgroup=true
autostart=true
autorestart=true
Expand All @@ -41,7 +39,6 @@ stdout_logfile_maxbytes=0
[program:video-upload]
priority=10
command=/opt/bin/upload.sh
stopasgroup = true
killasgroup=true
autostart=%(ENV_SE_VIDEO_INTERNAL_UPLOAD)s
autorestart=%(ENV_SE_VIDEO_INTERNAL_UPLOAD)s
Expand Down
41 changes: 21 additions & 20 deletions Video/upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ UPLOAD_OPTS=${SE_UPLOAD_OPTS:-"-P --cutoff-mode SOFT --metadata"}
UPLOAD_RETAIN_LOCAL_FILE=${SE_UPLOAD_RETAIN_LOCAL_FILE:-"false"}
UPLOAD_PIPE_FILE_NAME=${SE_UPLOAD_PIPE_FILE_NAME:-"uploadpipe"}
SE_VIDEO_INTERNAL_UPLOAD=${SE_VIDEO_INTERNAL_UPLOAD:-"false"}
VIDEO_UPLOAD_ENABLED=${SE_VIDEO_UPLOAD_ENABLED:-"false"}
VIDEO_UPLOAD_BATCH_CHECK=${SE_VIDEO_UPLOAD_BATCH_CHECK:-"10"}
process_name="video.uploader"

Expand Down Expand Up @@ -44,24 +43,6 @@ function rename_rclone_env() {
done
}

function consume_pipe_file() {
while read FILE DESTINATION < ${UPLOAD_PIPE_FILE};
do
if [ "${FILE}" = "exit" ];
then
FORCE_EXIT=true
exit
elif [ "$FILE" != "" ] && [ "$DESTINATION" != "" ];
then
rclone_upload "${FILE}" "${DESTINATION}"
elif [ -f ${FORCE_EXIT_FILE} ];
then
echo "$(date +%FT%T%Z) [${process_name}] - Force exit signal detected"
exit
fi
done
}

list_rclone_pid=()
function check_and_clear_background() {
# Wait for a batch rclone processes to finish
Expand All @@ -79,7 +60,27 @@ function rclone_upload() {
local source=$1
local target=$2
echo "$(date +%FT%T%Z) [${process_name}] - Uploading ${source} to ${target}"
exec rclone --config ${UPLOAD_CONFIG_DIRECTORY}/${UPLOAD_CONFIG_FILE_NAME} ${UPLOAD_COMMAND} ${UPLOAD_OPTS} "${source}" "${target}" &
rclone --config ${UPLOAD_CONFIG_DIRECTORY}/${UPLOAD_CONFIG_FILE_NAME} ${UPLOAD_COMMAND} ${UPLOAD_OPTS} "${source}" "${target}" &
list_rclone_pid+=($!)
check_and_clear_background
}

function consume_pipe_file() {
while read FILE DESTINATION < ${UPLOAD_PIPE_FILE};
do
if [ "${FILE}" = "exit" ];
then
FORCE_EXIT=true
exit
elif [ "$FILE" != "" ] && [ "$DESTINATION" != "" ];
then
rclone_upload "${FILE}" "${DESTINATION}"
elif [ -f ${FORCE_EXIT_FILE} ];
then
echo "$(date +%FT%T%Z) [${process_name}] - Force exit signal detected"
exit
fi
done
}

function graceful_exit() {
Expand Down
64 changes: 37 additions & 27 deletions Video/video.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ VIDEO_FOLDER=${VIDEO_FOLDER}
VIDEO_UPLOAD_ENABLED=${VIDEO_UPLOAD_ENABLED:-$SE_VIDEO_UPLOAD_ENABLED}
VIDEO_CONFIG_DIRECTORY=${VIDEO_CONFIG_DIRECTORY:-"/opt/bin"}
UPLOAD_DESTINATION_PREFIX=${UPLOAD_DESTINATION_PREFIX:-$SE_UPLOAD_DESTINATION_PREFIX}
UPLOAD_PIPE_FILE_NAME=${UPLOAD_PIPE_FILE_NAME:-"uploadpipe"}
UPLOAD_PIPE_FILE_NAME=${SE_UPLOAD_PIPE_FILE_NAME:-"uploadpipe"}
SE_VIDEO_INTERNAL_UPLOAD=${SE_VIDEO_INTERNAL_UPLOAD:-"false"}
SE_SERVER_PROTOCOL=${SE_SERVER_PROTOCOL:-"http"}
max_attempts=${SE_VIDEO_WAIT_ATTEMPTS:-50}
Expand Down Expand Up @@ -57,13 +57,34 @@ function create_pipe() {
fi
}

function wait_for_display() {
export DISPLAY=${DISPLAY_CONTAINER_NAME}:${DISPLAY_NUM}.0
attempts=0

echo "$(date +%FT%T%Z) [${process_name}] - Checking if the display is open"
until xset b off || [[ $attempts = "$max_attempts" ]]
do
echo "$(date +%FT%T%Z) [${process_name}] - Waiting before next display check"
sleep 0.5
attempts=$((attempts+1))
done
if [[ $attempts = "$max_attempts" ]];
then
echo "$(date +%FT%T%Z) [${process_name}] - Can not open display, exiting."
exit
fi

VIDEO_SIZE=$(xdpyinfo | grep 'dimensions:' | awk '{print $2}')
}

function wait_util_uploader_shutdown() {
max_wait=5
wait=0
if [[ "${VIDEO_UPLOAD_ENABLED}" != "false" ]] && [[ -n "${UPLOAD_DESTINATION_PREFIX}" ]];
then
while [[ -f ${FORCE_EXIT_FILE} ]] && [[ ${wait} -lt ${max_wait} ]];
do
echo "exit" >> ${UPLOAD_PIPE_FILE} &
echo "$(date +%FT%T%Z) [${process_name}] - Waiting for force exit file to be consumed by external upload container"
sleep 1
wait=$((wait+1))
Expand All @@ -73,6 +94,7 @@ function wait_util_uploader_shutdown() {
then
while [[ $(pgrep rclone | wc -l) -gt 0 ]]
do
echo "exit" >> ${UPLOAD_PIPE_FILE} &
echo "$(date +%FT%T%Z) [${process_name}] - Recorder is waiting for RCLONE to finish"
sleep 1
done
Expand Down Expand Up @@ -134,6 +156,12 @@ function check_if_recording_inprogress() {
fi
}

function log_node_response() {
if [[ -f "/tmp/graphQL_$session_id.json" ]]; then
jq '.' "/tmp/graphQL_$session_id.json";
fi
}

function graceful_exit() {
check_if_recording_inprogress
send_exit_signal_to_uploader
Expand All @@ -160,27 +188,9 @@ if [[ "${VIDEO_UPLOAD_ENABLED}" != "true" ]] && [[ "${VIDEO_FILE_NAME}" != "auto
-video_size ${VIDEO_SIZE} -r ${FRAME_RATE} -i ${DISPLAY_CONTAINER_NAME}:${DISPLAY_NUM}.0 -codec:v ${CODEC} ${PRESET} -pix_fmt yuv420p "$VIDEO_FOLDER/$VIDEO_FILE_NAME"

else
create_pipe
trap graceful_exit SIGTERM SIGINT EXIT
export DISPLAY=${DISPLAY_CONTAINER_NAME}:${DISPLAY_NUM}.0

attempts=0

echo "$(date +%FT%T%Z) [${process_name}] - Checking if the display is open"
until xset b off || [[ $attempts = "$max_attempts" ]]
do
echo "$(date +%FT%T%Z) [${process_name}] - Waiting before next display check"
sleep 0.5
attempts=$((attempts+1))
done
if [[ $attempts = "$max_attempts" ]];
then
echo "$(date +%FT%T%Z) [${process_name}] - Can not open display, exiting."
exit
fi

VIDEO_SIZE=$(xdpyinfo | grep 'dimensions:' | awk '{print $2}')

create_pipe
wait_for_display
recording_started="false"
video_file_name=""
video_file=""
Expand All @@ -201,7 +211,7 @@ else
done
if [[ $attempts = "$max_attempts" ]];
then
echo "$(date +%FT%T%Z) [${process_name}] - Can not reach node API, exiting."
echo "$(date +%FT%T%Z) [${process_name}] - Can not reach node API, reach the max attempts $max_attempts, exiting."
exit
fi
while curl --noproxy "*" -sk --request GET ${NODE_STATUS_ENDPOINT} > /tmp/status.json
Expand All @@ -214,15 +224,13 @@ else
caps_se_video_record=${return_list[0]}
video_file_name="${return_list[1]}.mp4"
echo "$(date +%FT%T%Z) [${process_name}] - Start recording: $caps_se_video_record, video file name: $video_file_name"
if [[ -f "/tmp/graphQL_$session_id.json" ]]; then
jq '.' "/tmp/graphQL_$session_id.json";
fi
log_node_response
fi
if [[ "$session_id" != "null" && "$session_id" != "" && "$session_id" != "reserved" && "$recording_started" = "false" && "$caps_se_video_record" = "true" ]];
then
video_file="${VIDEO_FOLDER}/$video_file_name"
echo "$(date +%FT%T%Z) [${process_name}] - Starting to record video"
exec ffmpeg -hide_banner -loglevel warning -flags low_delay -threads 2 -fflags nobuffer+genpts -strict experimental -y -f x11grab \
ffmpeg -hide_banner -loglevel warning -flags low_delay -threads 2 -fflags nobuffer+genpts -strict experimental -y -f x11grab \
-video_size ${VIDEO_SIZE} -r ${FRAME_RATE} -i ${DISPLAY} -codec:v ${CODEC} ${PRESET} -pix_fmt yuv420p "$video_file" &
recording_started="true"
echo "$(date +%FT%T%Z) [${process_name}] - Video recording started"
Expand All @@ -244,6 +252,8 @@ else
fi
prev_session_id=$session_id
done
echo "$(date +%FT%T%Z) [${process_name}] - Node API is not responding, exiting."
echo "$(date +%FT%T%Z) [${process_name}] - Last response from node API..."
log_node_response
echo "$(date +%FT%T%Z) [${process_name}] - Node API is not responding now, exiting..."
exit
fi
7 changes: 5 additions & 2 deletions charts/selenium-grid/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ template:
{{- toYaml . | nindent 6 }}
{{- end }}
spec:
shareProcessNamespace: {{ $.Values.global.seleniumGrid.stdoutProbeLog | ternary "false" .node.shareProcessNamespace }}
serviceAccountName: {{ template "seleniumGrid.serviceAccount.fullname" . }}
serviceAccount: {{ template "seleniumGrid.serviceAccount.fullname" . }}
restartPolicy: {{ template "seleniumGrid.node.restartPolicy" . }}
Expand Down Expand Up @@ -422,7 +423,7 @@ template:
{{- toYaml .node.sidecars | nindent 6 }}
{{- end }}
{{- if $.Values.videoRecorder.enabled }}
- name: video
- name: {{ $.Values.videoRecorder.name }}
{{- $imageTag := default $.Values.global.seleniumGrid.videoImageTag $.Values.videoRecorder.imageTag }}
{{- $imageRegistry := default $.Values.global.seleniumGrid.imageRegistry $.Values.videoRecorder.imageRegistry }}
image: {{ printf "%s/%s:%s" $imageRegistry $.Values.videoRecorder.imageName $imageTag }}
Expand Down Expand Up @@ -482,7 +483,7 @@ template:
lifecycle: {{- toYaml . | nindent 10 }}
{{- end }}
{{- if and $.Values.videoRecorder.uploader.enabled (not (empty $.Values.videoRecorder.uploader.name)) }}
- name: uploader
- name: {{ default "uploader" $.Values.videoRecorder.uploader.name }}
{{- $imageTag := .uploader.imageTag }}
{{- $imageRegistry := .uploader.imageRegistry }}
image: {{ printf "%s/%s:%s" $imageRegistry .uploader.imageName $imageTag }}
Expand Down Expand Up @@ -725,6 +726,8 @@ Define terminationGracePeriodSeconds of the node pod.
{{- $period := $nodePeriod -}}
{{- if and (eq .Values.autoscaling.scalingType "deployment") (eq (include "seleniumGrid.useKEDA" $) "true") -}}
{{- $period = ternary $nodePeriod $autoscalingPeriod (gt $nodePeriod $autoscalingPeriod) -}}
{{- else if and (eq .Values.autoscaling.scalingType "job") (eq (include "seleniumGrid.useKEDA" $) "true") }}
{{- $period = 30 -}}
{{- end -}}
{{- $period -}}
{{- end -}}
Expand Down
3 changes: 3 additions & 0 deletions charts/selenium-grid/templates/node-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ data:
SE_DRAIN_AFTER_SESSION_COUNT: '{{- and (eq (include "seleniumGrid.useKEDA" .) "true") (eq .Values.autoscaling.scalingType "job") | ternary "1" "0" -}}'
SE_NODE_GRID_URL: '{{ include "seleniumGrid.url" $ }}'
SE_NODE_GRID_GRAPHQL_URL: '{{ include "seleniumGrid.graphqlURL" $ }}'
{{- if $.Values.videoRecorder.enabled }}
SE_VIDEO_CONTAINER_NAME: {{ $.Values.videoRecorder.name | quote }}
{{- end }}
{{- if $.Values.nodeConfigMap.leftoversCleanup.enabled }}
SE_ENABLE_BROWSER_LEFTOVERS_CLEANUP: 'true'
{{- with $.Values.nodeConfigMap.leftoversCleanup.jobIntervalInSecs }}
Expand Down
7 changes: 6 additions & 1 deletion charts/selenium-grid/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,7 @@ chromeNode:
annotations: {}
# Labels for chrome-node pods
labels: {}
shareProcessNamespace: true
# Resources for chrome-node container
resources:
requests:
Expand Down Expand Up @@ -909,6 +910,7 @@ firefoxNode:
tolerations: []
# Node selector for firefox-node pods
nodeSelector: {}
shareProcessNamespace: true
# Resources for firefox-node container
resources:
requests:
Expand Down Expand Up @@ -1068,6 +1070,7 @@ edgeNode:
tolerations: []
# Node selector for edge-node pods
nodeSelector: {}
shareProcessNamespace: true
# Resources for edge-node container
resources:
requests:
Expand Down Expand Up @@ -1191,6 +1194,8 @@ edgeNode:

videoRecorder:
enabled: false
# Container name is set to resource specs
name: video
# imageRegistry: selenium
# Image of video recorder
imageName: video
Expand All @@ -1204,7 +1209,7 @@ videoRecorder:
enabled: false
# Where to upload the video file e.g. remoteName://bucketName/path. Refer to destination syntax of rclone https://rclone.org/docs/
destinationPrefix:
# What uploader to use. See .videRecorder.rclone for how to create a new one.
# What uploader to use. See .videRecorder.s3 for how to create a new one.
name:
configFileName: upload.conf
entryPointFileName: upload.sh
Expand Down
4 changes: 2 additions & 2 deletions tests/charts/ci/JobAutoscaling-values.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
autoscaling:
scalingType: job
scaledJobOptions:
successfulJobsHistoryLimit: 0
failedJobsHistoryLimit: 0
successfulJobsHistoryLimit: 100
failedJobsHistoryLimit: 100
scalingStrategy:
strategy: default
scaledOptions:
Expand Down
4 changes: 2 additions & 2 deletions tests/charts/ci/base-auth-ingress-values.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
global:
seleniumGrid:
logLevel: INFO
stdoutProbeLog: true
stdoutProbeLog: false

serverConfigMap:
env:
SE_SUPERVISORD_LOG_LEVEL: "error"
SE_SUPERVISORD_LOG_LEVEL: "info"

ingress:
enabled: true
Expand Down
Loading

0 comments on commit 918765f

Please sign in to comment.