Skip to content

Commit

Permalink
[Service] Enable/disable container auto-restart based on configuratio…
Browse files Browse the repository at this point in the history
  • Loading branch information
yozhao101 authored and pphuchar committed Mar 9, 2020
1 parent 2a6c1dd commit 7924df6
Showing 20 changed files with 72 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
2 changes: 1 addition & 1 deletion dockers/docker-lldp-sv2/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name lldp
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
2 changes: 1 addition & 1 deletion dockers/docker-orchagent/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name swss
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name pmon
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-script]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name radv
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
2 changes: 1 addition & 1 deletion dockers/docker-sflow/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name sflow
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
2 changes: 1 addition & 1 deletion dockers/docker-snmp-sv2/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name snmp
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
2 changes: 1 addition & 1 deletion dockers/docker-sonic-telemetry/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name telemetry
events=PROCESS_STATE_EXITED
autostart=true
autorestart=false
2 changes: 1 addition & 1 deletion dockers/docker-teamd/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name teamd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
42 changes: 38 additions & 4 deletions files/scripts/supervisor-proc-exit-listener
Original file line number Diff line number Diff line change
@@ -1,17 +1,34 @@
#!/usr/bin/env python

import getopt
import os
import signal
import sys
import syslog

import swsssdk

from supervisor import childutils

# Contents of file should be the names of critical processes (as defined in
# supervisor.conf file), one per line
CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes'

def main():
# This table in databse contains the features for container and each
# feature for a row will be configured a state or number.
CONTAINER_FEATURE_TABLE_NAME = 'CONTAINER_FEATURE'

def main(argv):
container_name = None
opts, args = getopt.getopt(argv, "c:", ["container-name="])
for opt, arg in opts:
if opt in ("-c", "--container-name"):
container_name = arg

if not container_name:
syslog.syslog(syslog.LOG_ERR, "Container name not specified. Exiting...")
sys.exit(1)

# Read the list of critical processes from a file
with open(CRITICAL_PROCESSES_FILE, 'r') as f:
critical_processes = [line.rstrip('\n') for line in f]
@@ -35,12 +52,29 @@ def main():
processname = payload_headers['processname']
groupname = payload_headers['groupname']

# If a critical process exited unexpectedly, terminate supervisor
if expected == 0 and processname in critical_processes or groupname in critical_processes:
config_db = swsssdk.ConfigDBConnector()
config_db.connect()
container_features_table = config_db.get_table(CONTAINER_FEATURE_TABLE_NAME)
if not container_features_table:
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve container features table from Config DB. Exiting...")
sys.exit(2)

if not container_features_table.has_key(container_name):
syslog.syslog(syslog.LOG_ERR, "Unable to retrieve features for container '{}'. Exiting...".format(container_name))
sys.exit(3)

restart_feature = container_features_table[container_name].get('auto_restart')
if not restart_feature:
syslog.syslog(syslog.LOG_ERR, "Unable to determine auto-restart feature status for container '{}'. Exiting...".format(container_name))
sys.exit(4)

# If auto-restart feature is enabled and a critical process exited unexpectedly, terminate supervisor
if restart_feature == 'enabled' and expected == 0 and (processname in critical_processes or groupname in critical_processes):
MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..."
msg = MSG_FORMAT_STR.format(payload_headers['processname'])
syslog.syslog(syslog.LOG_INFO, msg)
os.kill(os.getppid(), signal.SIGTERM)


if __name__ == "__main__":
main()
main(sys.argv[1:])
6 changes: 6 additions & 0 deletions platform/barefoot/docker-syncd-bfn/supervisord.conf
Original file line number Diff line number Diff line change
@@ -3,6 +3,12 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

[program:start.sh]
command=/usr/bin/start.sh
priority=1
2 changes: 1 addition & 1 deletion platform/broadcom/docker-syncd-brcm/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
2 changes: 1 addition & 1 deletion platform/cavium/docker-syncd-cavm/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
2 changes: 1 addition & 1 deletion platform/centec/docker-syncd-centec/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
6 changes: 6 additions & 0 deletions platform/marvell-arm64/docker-syncd-mrvl/supervisord.conf
Original file line number Diff line number Diff line change
@@ -3,6 +3,12 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

[program:start.sh]
command=/usr/bin/start.sh
priority=1
6 changes: 6 additions & 0 deletions platform/marvell-armhf/docker-syncd-mrvl/supervisord.conf
Original file line number Diff line number Diff line change
@@ -3,6 +3,12 @@ logfile_maxbytes=1MB
logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

[program:start.sh]
command=/usr/bin/start.sh
priority=1
2 changes: 1 addition & 1 deletion platform/marvell/docker-syncd-mrvl/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
2 changes: 1 addition & 1 deletion platform/mellanox/docker-syncd-mlnx/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
2 changes: 1 addition & 1 deletion platform/nephos/docker-syncd-nephos/supervisord.conf
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name syncd
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ logfile_backups=2
nodaemon=true

[eventlistener:supervisor-proc-exit-listener]
command=/usr/bin/supervisor-proc-exit-listener
command=/usr/bin/supervisor-proc-exit-listener --container-name dhcp_relay
events=PROCESS_STATE_EXITED
autostart=true
autorestart=unexpected

0 comments on commit 7924df6

Please sign in to comment.