-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
Copy pathswss.sh
executable file
·551 lines (477 loc) · 17.3 KB
/
swss.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
#!/bin/bash
DEV=$2
SERVICE="swss"
PEER="syncd"
DEBUGLOG="/tmp/swss-syncd-debug$DEV.log"
LOCKFILE="/tmp/swss-syncd-lock$DEV"
NAMESPACE_PREFIX="asic"
ETC_SONIC_PATH="/etc/sonic/"
. /usr/local/bin/asic_status.sh
function debug()
{
/usr/bin/logger $1
/bin/echo `date` "- $1" >> ${DEBUGLOG}
}
function read_dependent_services()
{
# Update dependent list based on other packages requirements
if [[ -f ${ETC_SONIC_PATH}/${SERVICE}_dependent ]]; then
DEPENDENT="${DEPENDENT} $(cat ${ETC_SONIC_PATH}/${SERVICE}_dependent)"
fi
if [[ -f ${ETC_SONIC_PATH}/${SERVICE}_multi_inst_dependent ]]; then
MULTI_INST_DEPENDENT="${MULTI_INST_DEPENDENT} $(cat ${ETC_SONIC_PATH}/${SERVICE}_multi_inst_dependent)"
fi
}
function lock_service_state_change()
{
debug "Locking ${LOCKFILE} from ${SERVICE}$DEV service"
exec {LOCKFD}>${LOCKFILE}
/usr/bin/flock -x ${LOCKFD}
trap "/usr/bin/flock -u ${LOCKFD}" EXIT
debug "Locked ${LOCKFILE} (${LOCKFD}) from ${SERVICE}$DEV service"
}
function unlock_service_state_change()
{
debug "Unlocking ${LOCKFILE} (${LOCKFD}) from ${SERVICE}$DEV service"
/usr/bin/flock -u ${LOCKFD}
}
function check_warm_boot()
{
SYSTEM_WARM_START=`$SONIC_DB_CLI STATE_DB hget "WARM_RESTART_ENABLE_TABLE|system" enable`
SERVICE_WARM_START=`$SONIC_DB_CLI STATE_DB hget "WARM_RESTART_ENABLE_TABLE|${SERVICE}" enable`
if [[ x"$SYSTEM_WARM_START" == x"true" ]] || [[ x"$SERVICE_WARM_START" == x"true" ]]; then
WARM_BOOT="true"
else
WARM_BOOT="false"
fi
}
function check_fast_boot()
{
SYSTEM_FAST_REBOOT=`sonic-db-cli STATE_DB hget "FAST_RESTART_ENABLE_TABLE|system" enable`
if [[ x"${SYSTEM_FAST_REBOOT}" == x"true" ]]; then
FAST_BOOT="true"
else
FAST_BOOT="false"
fi
}
function validate_restore_count()
{
if [[ x"$WARM_BOOT" == x"true" ]]; then
RESTORE_COUNT=`$SONIC_DB_CLI STATE_DB hget "WARM_RESTART_TABLE|orchagent" restore_count`
# We have to make sure db data has not been flushed.
if [[ -z "$RESTORE_COUNT" ]]; then
WARM_BOOT="false"
fi
fi
}
function wait_for_database_service()
{
# Wait for redis server start before database clean
until [[ $($SONIC_DB_CLI PING | grep -c PONG) -gt 0 ]]; do
sleep 1;
done
# Wait for configDB initialization
until [[ $($SONIC_DB_CLI CONFIG_DB GET "CONFIG_DB_INITIALIZED") -eq 1 ]];
do sleep 1;
done
}
# This function cleans up the tables with specific prefixes from the database
# $1 the index of the database
# $2 the string of a list of table prefixes
function clean_up_tables()
{
$SONIC_DB_CLI $1 EVAL "
local tables = {$2}
for i = 1, table.getn(tables) do
local matches = redis.call('KEYS', tables[i])
for j,name in ipairs(matches) do
redis.call('DEL', name)
end
end" 0
}
# This function cleans up the chassis db table entries created ONLY by this asic
# This is used to do the clean up operation when the line card / asic reboots
# When the asic/lc is RE-booting, the chassis db server is supposed to be running
# in the supervisor. So the clean up is done when only the chassis db connectable.
# Otherwise no need to do the clean up since both the supervisor and line card may be
# rebooting (the whole chassis scenario)
# The clean up operation is required to delete only those entries created by
# the asic that is rebooted. Entries from the following tables are deleted in the order
# given below
# (1) SYSTEM_NEIGH
# (2) SYSTEM_INTERFACE
# (3) SYSTEM_LAG_MEMBER_TABLE
# (4) SYSTEM_LAG_TABLE
# (5) The corresponding LAG IDs of the entries from SYSTEM_LAG_TABLE
# SYSTEM_LAG_ID_TABLE and SYSTEM_LAG_ID_SET are adjusted appropriately
function clean_up_chassis_db_tables()
{
switch_type=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'switch_type'`
# Run clean up only in swss running for voq switches
if is_chassis_supervisor || [[ $switch_type != 'voq' ]]; then
return
fi
until [[ $($SONIC_DB_CLI CHASSIS_APP_DB PING | grep -c True) -gt 0 ]]; do
sleep 1
done
lc=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'hostname'`
until [[ -n "${lc}" ]]; do
lc=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'hostname'`
sleep 1
done
debug "Chassis db clean up for ${SERVICE}$DEV. hostname=$lc"
asic=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'asic_name'`
until [[ -n "${asic}" ]]; do
asic=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' 'asic_name'`
sleep 1
done
debug "Chassis db clean up for ${SERVICE}$DEV. asic=$asic"
# First, delete SYSTEM_NEIGH entries
num_neigh=`$SONIC_DB_CLI CHASSIS_APP_DB EVAL "
local nn = 0
local host = string.gsub(ARGV[1], '%-', '%%-')
local dev = ARGV[2]
local ps = 'SYSTEM_NEIGH*|' .. host .. '|' .. dev
local keylist = redis.call('KEYS', 'SYSTEM_NEIGH*')
for j,key in ipairs(keylist) do
if string.match(key, ps) ~= nil then
redis.call('DEL', key)
nn = nn + 1
end
end
return nn" 0 $lc $asic`
debug "Chassis db clean up for ${SERVICE}$DEV. Number of SYSTEM_NEIGH entries deleted: $num_neigh"
# Wait for some time before deleting system interface so that the system interface's "object in use"
# is cleared in both orchangent and in syncd. Without this delay, the orchagent clears the refcount
# but the syncd (meta) still has no-zero refcount. Because of this, orchagent gets "object still in use"
# error and aborts.
# This delay is needed only if some system neighbors were deleted.
if [[ $num_neigh > 0 ]]; then
sleep 30
fi
# Next, delete SYSTEM_INTERFACE entries
num_sys_intf=`$SONIC_DB_CLI CHASSIS_APP_DB EVAL "
local nsi = 0
local host = string.gsub(ARGV[1], '%-', '%%-')
local dev = ARGV[2]
local ps = 'SYSTEM_INTERFACE*|' .. host .. '|' .. dev
local keylist = redis.call('KEYS', 'SYSTEM_INTERFACE*')
for j,key in ipairs(keylist) do
if string.match(key, ps) ~= nil then
redis.call('DEL', key)
nsi = nsi + 1
end
end
return nsi" 0 $lc $asic`
debug "Chassis db clean up for ${SERVICE}$DEV. Number of SYSTEM_INTERFACE entries deleted: $num_sys_intf"
# Next, delete SYSTEM_LAG_MEMBER_TABLE entries
num_lag_mem=`$SONIC_DB_CLI CHASSIS_APP_DB EVAL "
local nlm = 0
local host = string.gsub(ARGV[1], '%-', '%%-')
local dev = ARGV[2]
local ps = 'SYSTEM_LAG_MEMBER_TABLE*|' .. host .. '|' .. dev
local keylist = redis.call('KEYS', 'SYSTEM_LAG_MEMBER_TABLE*')
for j,key in ipairs(keylist) do
if string.match(key, ps) ~= nil then
redis.call('DEL', key)
nlm = nlm + 1
end
end
return nlm" 0 $lc $asic`
debug "Chassis db clean up for ${SERVICE}$DEV. Number of SYSTEM_LAG_MEMBER_TABLE entries deleted: $num_lag_mem"
# Wait for some time before deleting system lag so that the all the memebers of the
# system lag will be cleared.
# This delay is needed only if some system lag members were deleted
if [[ $num_lag_mem > 0 ]]; then
sleep 15
fi
# Finally, delete SYSTEM_LAG_TABLE entries and deallot LAG IDs
num_sys_lag=`$SONIC_DB_CLI CHASSIS_APP_DB EVAL "
local nsl = 0
local host = string.gsub(ARGV[1], '%-', '%%-')
local dev = ARGV[2]
local ps = 'SYSTEM_LAG_TABLE*|' .. '(' .. host .. '|' .. dev ..'.*' .. ')'
local keylist = redis.call('KEYS', 'SYSTEM_LAG_TABLE*')
for j,key in ipairs(keylist) do
local lagname = string.match(key, ps)
if lagname ~= nil then
redis.call('DEL', key)
local lagid = redis.call('HGET', 'SYSTEM_LAG_ID_TABLE', lagname)
redis.call('SREM', 'SYSTEM_LAG_ID_SET', lagid)
redis.call('HDEL', 'SYSTEM_LAG_ID_TABLE', lagname)
redis.call('rpush', 'SYSTEM_LAG_IDS_FREE_LIST', lagid)
nsl = nsl + 1
end
end
return nsl" 0 $lc $asic`
debug "Chassis db clean up for ${SERVICE}$DEV. Number of SYSTEM_LAG_TABLE entries deleted: $num_sys_lag"
}
is_feature_enabled()
{
s=$1
service=${s%@*}
state=$(sonic-db-cli CONFIG_DB hget "FEATURE|${service}" "state")
if [[ $state == "enabled" ]]; then
echo "true"
else
echo "false"
fi
}
start_peer_and_dependent_services() {
check_warm_boot
if [[ x"$WARM_BOOT" != x"true" ]]; then
for peer in ${PEER}; do
if [[ ! -z $DEV ]]; then
/bin/systemctl start ${peer}@$DEV
else
/bin/systemctl start ${peer}
fi
done
for dep in ${DEPENDENT}; do
if [[ $dep == "dhcp_relay" ]]; then
state=$(is_feature_enabled $dep)
if [[ $state == "true" ]]; then
/bin/systemctl start ${dep}
fi
else
/bin/systemctl start ${dep}
fi
done
for dep in ${MULTI_INST_DEPENDENT}; do
if [[ ! -z $DEV ]]; then
/bin/systemctl start ${dep}@$DEV
else
/bin/systemctl start ${dep}
fi
done
fi
}
stop_peer_and_dependent_services() {
# if warm/fast start enabled or peer lock exists, don't stop peer service docker
if [[ x"$WARM_BOOT" != x"true" ]] && [[ x"$FAST_BOOT" != x"true" ]]; then
for dep in ${MULTI_INST_DEPENDENT}; do
if [[ ! -z $DEV ]]; then
/bin/systemctl stop ${dep}@$DEV
else
/bin/systemctl stop ${dep}
fi
done
for dep in ${DEPENDENT}; do
/bin/systemctl stop ${dep}
done
for peer in ${PEER}; do
if [[ ! -z $DEV ]]; then
/bin/systemctl stop ${peer}@$DEV
else
/bin/systemctl stop ${peer}
fi
done
fi
}
start() {
debug "Starting ${SERVICE}$DEV service..."
lock_service_state_change
wait_for_database_service
check_warm_boot
validate_restore_count
debug "Warm boot flag: ${SERVICE}$DEV ${WARM_BOOT}."
# Don't flush DB during warm boot
if [[ x"$WARM_BOOT" != x"true" ]]; then
debug "Flushing APP, ASIC, COUNTER, CONFIG, and partial STATE databases ..."
$SONIC_DB_CLI APPL_DB FLUSHDB
$SONIC_DB_CLI ASIC_DB FLUSHDB
$SONIC_DB_CLI COUNTERS_DB FLUSHDB
$SONIC_DB_CLI FLEX_COUNTER_DB FLUSHDB
$SONIC_DB_CLI GB_ASIC_DB FLUSHDB
$SONIC_DB_CLI GB_COUNTERS_DB FLUSHDB
$SONIC_DB_CLI RESTAPI_DB FLUSHDB
clean_up_tables STATE_DB "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'LAG_TABLE*', 'LAG_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*', 'VRF_TABLE*', 'FDB_TABLE*', 'FG_ROUTE_TABLE*', 'BUFFER_POOL*', 'BUFFER_PROFILE*', 'MUX_CABLE_TABLE*', 'ADVERTISE_NETWORK_TABLE*', 'VXLAN_TUNNEL_TABLE*', 'VNET_ROUTE*', 'MACSEC_PORT_TABLE*', 'MACSEC_INGRESS_SA_TABLE*', 'MACSEC_EGRESS_SA_TABLE*', 'MACSEC_INGRESS_SC_TABLE*', 'MACSEC_EGRESS_SC_TABLE*', 'VRF_OBJECT_TABLE*', 'VNET_MONITOR_TABLE*', 'BFD_SESSION_TABLE*', 'SYSTEM_NEIGH_TABLE*', 'FABRIC_PORT_TABLE*', 'TUNNEL_DECAP_TABLE*', 'TUNNEL_DECAP_TERM_TABLE*'"
$SONIC_DB_CLI APPL_STATE_DB FLUSHDB
clean_up_chassis_db_tables
rm -rf /tmp/cache
fi
# On supervisor card, skip starting asic related services here. In wait(),
# wait until the asic is detected by pmon and published via database.
if ! is_chassis_supervisor; then
# start service docker
/usr/bin/${SERVICE}.sh start $DEV
debug "Started ${SERVICE}$DEV service..."
fi
# Unlock has to happen before reaching out to peer service
unlock_service_state_change
}
wait() {
# On supervisor card, wait for asic to be online before starting the docker.
if is_chassis_supervisor; then
check_asic_status
ASIC_STATUS=$?
# start service docker
if [[ $ASIC_STATUS == 0 ]]; then
/usr/bin/${SERVICE}.sh start $DEV
debug "Started ${SERVICE}$DEV service..."
fi
fi
start_peer_and_dependent_services
# Allow some time for peer container to start
# NOTE: This assumes Docker containers share the same names as their
# corresponding services
for SECS in {1..60}; do
ALL_PEERS_RUNNING=true
for peer in ${PEER}; do
if [[ ! -z $DEV ]]; then
RUNNING=$(docker inspect -f '{{.State.Running}}' ${peer}$DEV)
else
RUNNING=$(docker inspect -f '{{.State.Running}}' ${peer})
fi
if [[ x"$RUNNING" != x"true" ]]; then
ALL_PEERS_RUNNING=false
break
fi
done
ALL_DEPS_RUNNING=true
for dep in ${MULTI_INST_DEPENDENT}; do
if [[ ! -z $DEV ]]; then
DEP_RUNNING=$(docker inspect -f '{{.State.Running}}' ${dep}$DEV)
else
DEP_RUNNING=$(docker inspect -f '{{.State.Running}}' ${dep})
fi
if [[ x"$DEP_RUNNING" != x"true" ]]; then
ALL_DEPS_RUNNING=false
break
fi
done
if [[ x"$ALL_PEERS_RUNNING" == x"true" && x"$ALL_DEPS_RUNNING" == x"true" ]]; then
break
else
sleep 1
fi
done
# NOTE: This assumes Docker containers share the same names as their
# corresponding services
for dep in ${MULTI_INST_DEPENDENT}; do
if [[ ! -z $DEV ]]; then
ALL_DEPS="$ALL_DEPS ${dep}$DEV"
else
ALL_DEPS="$ALL_DEPS ${dep}"
fi
done
if [[ ! -z $DEV ]]; then
/usr/bin/docker-wait-any -s ${SERVICE}$DEV -d `printf "%s$DEV " ${PEER}` ${ALL_DEPS}
else
/usr/bin/docker-wait-any -s ${SERVICE} -d ${PEER} ${ALL_DEPS}
fi
}
stop() {
debug "Stopping ${SERVICE}$DEV service..."
[[ -f ${LOCKFILE} ]] || /usr/bin/touch ${LOCKFILE}
lock_service_state_change
check_warm_boot
debug "Warm boot flag: ${SERVICE}$DEV ${WARM_BOOT}."
check_fast_boot
debug "Fast boot flag: ${SERVICE}$DEV ${FAST_BOOT}."
# For WARM/FAST boot do not perform service stop
if [[ x"$WARM_BOOT" != x"true" ]] && [[ x"$FAST_BOOT" != x"true" ]]; then
/usr/bin/${SERVICE}.sh stop $DEV
debug "Stopped ${SERVICE}$DEV service..."
$SONIC_DB_CLI APPL_DB DEL PORT_TABLE:PortInitDone
debug "Cleared PortInitDone from APPL_DB for ${SERVICE}$DEV..."
clean_up_tables STATE_DB "'FABRIC_PORT_TABLE*'"
debug "Cleared FABRIC_PORT_TABLE from STATE_DB for ${SERVICE}$DEV..."
else
debug "Killing Docker swss..."
/usr/bin/docker kill swss &> /dev/null || debug "Docker swss is not running ($?) ..."
fi
# Flush FAST_REBOOT table when swss needs to stop. The only
# time when this would take effect is when fast-reboot
# encountered error, e.g. syncd crashed. And swss needs to
# be restarted.
if [[ x"$FAST_BOOT" != x"true" ]]; then
debug "Clearing FAST_RESTART_ENABLE_TABLE flag..."
sonic-db-cli STATE_DB hset "FAST_RESTART_ENABLE_TABLE|system" "enable" "false"
fi
# Unlock has to happen before reaching out to peer service
unlock_service_state_change
stop_peer_and_dependent_services
}
function check_peer_gbsyncd()
{
GEARBOX_CONFIG=/usr/share/sonic/device/$PLATFORM/$HWSKU/$DEV/gearbox_config.json
if [ -f $GEARBOX_CONFIG ]; then
PEER="$PEER gbsyncd"
fi
}
function check_macsec()
{
MACSEC_STATE=`$SONIC_DB_CLI CONFIG_DB hget 'FEATURE|macsec' state`
if [[ ${MACSEC_STATE} == 'enabled' ]]; then
if [ "$DEV" ]; then
DEPENDENT="${DEPENDENT} macsec@${DEV}"
else
DEPENDENT="${DEPENDENT} macsec"
fi
fi
}
function check_add_bgp_dependency()
{
if ! is_chassis_supervisor; then
if [ "$DEV" ]; then
DEPENDENT="${DEPENDENT} bgp@${DEV}"
else
DEPENDENT="${DEPENDENT} bgp"
fi
fi
}
function check_ports_present()
{
PORT_CONFIG_INI=/usr/share/sonic/device/$PLATFORM/$HWSKU/$DEV/port_config.ini
HWSKU_JSON=/usr/share/sonic/device/$PLATFORM/$HWSKU/$DEV/hwsku.json
if [[ -f $PORT_CONFIG_INI ]] || [[ -f $HWSKU_JSON ]]; then
return 0
fi
return 1
}
function check_service_exists()
{
systemctl list-units --full -all 2>/dev/null | grep -Fq $1
if [[ $? -eq 0 ]]; then
echo true
return
else
echo false
return
fi
}
# DEPENDENT initially contains namespace independent services
# namespace specific services are added later in this script.
DEPENDENT=""
MULTI_INST_DEPENDENT=""
if [[ $(check_service_exists radv) == "true" ]]; then
DEPENDENT="$DEPENDENT radv"
fi
if [ "$DEV" ]; then
NET_NS="$NAMESPACE_PREFIX$DEV" #name of the network namespace
SONIC_DB_CLI="sonic-db-cli -n $NET_NS"
else
NET_NS=""
SONIC_DB_CLI="sonic-db-cli"
fi
PLATFORM=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' platform`
HWSKU=`$SONIC_DB_CLI CONFIG_DB hget 'DEVICE_METADATA|localhost' hwsku`
check_peer_gbsyncd
check_macsec
check_add_bgp_dependency
check_ports_present
PORTS_PRESENT=$?
if [[ $PORTS_PRESENT == 0 ]] && [[ $(check_service_exists teamd) == "true" ]]; then
MULTI_INST_DEPENDENT="teamd"
fi
read_dependent_services
case "$1" in
start|wait|stop)
$1
;;
*)
echo "Usage: $0 {start|wait|stop}"
exit 1
;;
esac