Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FastReboot]: Update FR to make it working with 1.0.3 #95

Merged
merged 2 commits into from
Aug 14, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions scripts/fast-reboot
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ then
exit
fi

# Unload previous kernel if any loaded
if [ $(cat /sys/kernel/kexec_loaded) -eq 1 ]

# Unload the previously loaded kernel if any loaded
if [ "$(cat /sys/kernel/kexec_loaded)" -eq 1 ]
then
/sbin/kexec -u
fi
Expand All @@ -27,10 +28,12 @@ case "$BOOT_OPTIONS" in
;;
esac

# Load kernel into memory
sonic_asic_type=$(sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type)

# Load kernel into the memory
/sbin/kexec -l "$KERNEL_IMAGE" --initrd="$INITRD" --append="$BOOT_OPTIONS"

# Save ARP and FDB table to files
# Dump the ARP and FDB tables to files
/usr/bin/fast-reboot-dump.py
docker cp /tmp/fdb.json swss:/
docker cp /tmp/arp.json swss:/
Expand All @@ -40,19 +43,27 @@ docker exec -ti bgp killall -9 watchquagga
docker exec -ti bgp killall -9 zebra
docker exec -ti bgp killall -9 bgpd

# Stop docker container engine. Otherwise will have broken docker storage
# Kill lldp, otherwise it sends informotion about reboot
docker kill lldp

# Kill teamd, otherwise it gets down all LAGs
docker kill teamd

# Kill other containers to make reboot faster
docker ps -qa | xargs docker kill

# Stop the docker container engine. Otherwise we will have a broken docker storage
systemctl stop docker.service

# Stop opennsl modules for Broadcom platform
sonic_asic_type=$(sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type)
if [ $sonic_asic_type = 'broadcom' ];
if [ "$sonic_asic_type" = 'broadcom' ];
then
systemctl stop opennsl-modules-3.16.0-4-amd64.service
fi

# Wait until all buffers synced with disk
sync
sleep 3
sleep 1
sync

# Reboot
Expand Down
66 changes: 54 additions & 12 deletions scripts/fast-reboot-dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pprint import pprint


def generate_arp_entries(filename):
def generate_arp_entries(filename, all_available_macs):
db = swsssdk.SonicV2Connector()
db.connect(db.APPL_DB, False) # Make one attempt only

Expand All @@ -15,9 +15,13 @@ def generate_arp_entries(filename):
keys = db.keys(db.APPL_DB, 'NEIGH_TABLE:*')
keys = [] if keys is None else keys
for key in keys:
entry = db.get_all(db.APPL_DB, key)
if entry['neigh'].lower() not in all_available_macs:
# print me to log
continue
obj = {
'OP': 'SET',
key: db.get_all(db.APPL_DB, key)
key: entry,
'OP': 'SET'
}
arp_output.append(obj)

Expand Down Expand Up @@ -49,24 +53,56 @@ def get_vlan_ifaces():

return vlans

def get_bridge_port_id_2_port_id(db):
bridge_port_id_2_port_id = {}
keys = db.keys(db.ASIC_DB, 'ASIC_STATE:SAI_OBJECT_TYPE_BRIDGE_PORT:oid:*')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how to make sure the ASIC_STATE:SAI_OBJECT_TYPE_BRIDGE_PORT table is available be the time you run this function?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it possible the asic db is not completely initialized and you do not have all bridge_port id to port id mappings?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I run this command before the switch goes into the FR. I think external tools should check a state of a switch before the initiate FR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's possible, but if the device is not initialized we shouldn't reboot a device.

keys = [] if keys is None else keys
for key in keys:
value = db.get_all(db.ASIC_DB, key)
port_type = value['SAI_BRIDGE_PORT_ATTR_TYPE']
if port_type != 'SAI_BRIDGE_PORT_TYPE_PORT':
continue
port_id = value['SAI_BRIDGE_PORT_ATTR_PORT_ID']
# ignore admin status
bridge_id = key.replace('ASIC_STATE:SAI_OBJECT_TYPE_BRIDGE_PORT:', '')
bridge_port_id_2_port_id[bridge_id] = port_id

return bridge_port_id_2_port_id

def get_map_port_id_2_iface_name(db):
port_id_2_iface = {}
keys = db.keys(db.ASIC_DB, 'ASIC_STATE:SAI_OBJECT_TYPE_HOSTIF:oid:*')
keys = [] if keys is None else keys
for key in keys:
value = db.get_all(db.ASIC_DB, key)
port_id = value['SAI_HOSTIF_ATTR_RIF_OR_PORT_ID']
port_id = value['SAI_HOSTIF_ATTR_OBJ_ID']
iface_name = value['SAI_HOSTIF_ATTR_NAME']
port_id_2_iface[port_id] = iface_name

return port_id_2_iface

def get_fdb(db, vlan_id, port_id_2_iface):
def get_map_bridge_port_id_2_iface_name(db):
bridge_port_id_2_port_id = get_bridge_port_id_2_port_id(db)
port_id_2_iface = get_map_port_id_2_iface_name(db)

bridge_port_id_2_iface_name = {}

for bridge_port_id, port_id in bridge_port_id_2_port_id.items():
if port_id in port_id_2_iface:
bridge_port_id_2_iface_name[bridge_port_id] = port_id_2_iface[port_id]
else:
print "Not found"

return bridge_port_id_2_iface_name

def get_fdb(db, vlan_id, bridge_id_2_iface):
fdb_types = {
'SAI_FDB_ENTRY_TYPE_DYNAMIC': 'dynamic',
'SAI_FDB_ENTRY_TYPE_STATIC' : 'static'
}

available_macs = set()

entries = []
keys = db.keys(db.ASIC_DB, 'ASIC_STATE:SAI_OBJECT_TYPE_FDB_ENTRY:{*\"vlan\":\"%d\"}' % vlan_id)
keys = [] if keys is None else keys
Expand All @@ -76,12 +112,15 @@ def get_fdb(db, vlan_id, port_id_2_iface):
mac = str(key_obj['mac'])
if not is_mac_unicast(mac):
continue
available_macs.add(mac.lower())
mac = mac.replace(':', '-')
# FIXME: mac is unicast
# get attributes
value = db.get_all(db.ASIC_DB, key)
type = fdb_types[value['SAI_FDB_ENTRY_ATTR_TYPE']]
port = port_id_2_iface[value['SAI_FDB_ENTRY_ATTR_PORT_ID']]
if value['SAI_FDB_ENTRY_ATTR_BRIDGE_PORT_ID'] not in bridge_id_2_iface:
continue
port = bridge_id_2_iface[value['SAI_FDB_ENTRY_ATTR_BRIDGE_PORT_ID']]

obj = {
'FDB_TABLE:Vlan%d:%s' % (vlan_id, mac) : {
Expand All @@ -93,7 +132,7 @@ def get_fdb(db, vlan_id, port_id_2_iface):

entries.append(obj)

return entries
return entries, available_macs


def generate_fdb_entries(filename):
Expand All @@ -102,24 +141,27 @@ def generate_fdb_entries(filename):
db = swsssdk.SonicV2Connector()
db.connect(db.ASIC_DB, False) # Make one attempt only

port_id_2_iface = get_map_port_id_2_iface_name(db)
bridge_id_2_iface = get_map_bridge_port_id_2_iface_name(db)

vlan_ifaces = get_vlan_ifaces()

all_available_macs = set()
for vlan in vlan_ifaces:
vlan_id = int(vlan.replace('Vlan', ''))
fdb_entries.extend(get_fdb(db, vlan_id, port_id_2_iface))
fdb_entry, available_macs = get_fdb(db, vlan_id, bridge_id_2_iface)
all_available_macs |= available_macs
fdb_entries.extend(fdb_entry)

db.close(db.ASIC_DB)

with open(filename, 'w') as fp:
json.dump(fdb_entries, fp, indent=2, separators=(',', ': '))

return
return all_available_macs

def main():
generate_arp_entries('/tmp/arp.json')
generate_fdb_entries('/tmp/fdb.json')
all_available_macs = generate_fdb_entries('/tmp/fdb.json')
generate_arp_entries('/tmp/arp.json', all_available_macs)

return

Expand Down