Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incremental QOS config updater test update #5028

Merged
merged 6 commits into from
Feb 14, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/generic_config_updater/gu_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

CONTAINER_SERVICES_LIST = ["swss", "syncd", "radv", "lldp", "dhcp_relay", "teamd", "bgp", "pmon", "telemetry", "acms"]
DEFAULT_CHECKPOINT_NAME = "test"
YANG_IGNORED_OPTIONS = "-i /FEATURE -i /QUEUE -i /SCHEDULER"
YANG_IGNORED_OPTIONS = "-i /FEATURE -i /QUEUE -i /SCHEDULER -i /BUFFER_PORT_INGRESS_PROFILE_LIST -i /BUFFER_PORT_EGRESS_PROFILE_LIST"

def generate_tmpfile(duthost):
"""Generate temp file
Expand Down
209 changes: 169 additions & 40 deletions tests/generic_config_updater/test_incremental_qos.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,27 @@
from tests.common.helpers.dut_utils import verify_orchagent_running_or_assert
from tests.generic_config_updater.gu_utils import apply_patch, expect_op_success, expect_res_success, expect_op_failure
from tests.generic_config_updater.gu_utils import generate_tmpfile, delete_tmpfile
from tests.generic_config_updater.gu_utils import create_checkpoint, delete_checkpoint, rollback_or_reload

pytestmark = [
pytest.mark.topology('t0'),
pytest.mark.topology('t0', 't1'),
pytest.mark.asic('mellanox')
Copy link
Contributor

@qiluo-msft qiluo-msft Jan 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mellanox

Is it safe to run this test on mellanox switch but not 2700? #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These formula are only applicable to 2700

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wangxin Is there a marker we can add for 2700?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you only want this test to run on 2700? If so, you can add a pytest_requires condition in your test code entrance to only allow 2700. But why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the Mellanox SPC1 buffer calculations only apply to 2700 devices. @neethajohn may confirm/correct me

And thanks, I added pytest_require to check for 2700 in hwsku

]

logger = logging.getLogger(__name__)

LOSSLESS_PGS = 2
LOSSY_PGS = 1
MGMT_POOL = 256
EGRESS_MIRRORING = 10
MIN_LOSSY_BUFFER_THRESHOLD = 19
EGRESS_POOL_THRESHOLD = 9
OPER_HEADROOM_SIZE = 19
INGRESS_POOL_THRESHOLD = 10
HEADROOM_POOL_OVERSUB = 2
MMU_SIZE = 13619
READ_ASICDB_TIMEOUT = 480
READ_ASICDB_INTERVAL = 20

@pytest.fixture(scope="module")
def ensure_dut_readiness(duthost):
Expand All @@ -25,69 +38,185 @@ def ensure_dut_readiness(duthost):
Args:
duthost: DUT host object
"""
config_tmpfile = generate_tmpfile(duthost)
logger.info("config_tmpfile {}".format(config_tmpfile))
logger.info("Backing up config_db.json")
duthost.shell("sudo cp /etc/sonic/config_db.json {}".format(config_tmpfile))
verify_orchagent_running_or_assert(duthost)
create_checkpoint(duthost)

yield

verify_orchagent_running_or_assert(duthost)
logger.info("Restoring config_db.json")
duthost.shell("sudo cp {} /etc/sonic/config_db.json".format(config_tmpfile))
delete_tmpfile(duthost, config_tmpfile)
config_reload(duthost)
try:
verify_orchagent_running_or_assert(duthost)
logger.info("Rolled back to original checkpoint")
rollback_or_reload(duthost)
finally:
delete_checkpoint(duthost)


logger.info("TEARDOWN COMPLETED")
def get_uplink_downlink_count(duthost, tbinfo):
"""
Retrieves uplink and downlink count from DEVICE_NEIGHBOR_METADATA based on topology

Args:
duthost: DUT host object
tbinfo: information about the running testbed

def prepare_configdb_field(duthost, configdb_field, value):
Returns:
uplink count, downlink count

"""
config_db_json = json.loads(duthost.shell("show runningconfig all")["stdout"])
device_neighbor_metadata = config_db_json["DEVICE_NEIGHBOR_METADATA"]
Copy link
Contributor

@qiluo-msft qiluo-msft Jan 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

topo = tbinfo['topo']['name']

if "t1" in topo:
isabelmsft marked this conversation as resolved.
Show resolved Hide resolved
spine_router_count = 0
tor_router_count = 0
for neighbor in device_neighbor_metadata:
neighbor_data = device_neighbor_metadata[neighbor]
if neighbor_data['type'] == "SpineRouter":
spine_router_count += 1
elif neighbor_data['type'] == "ToRRouter":
tor_router_count += 1
return spine_router_count, tor_router_count

elif "t0" in topo:
leaf_router_count = 0
server_count = 0
for neighbor in device_neighbor_metadata:
neighbor_data = device_neighbor_metadata[neighbor]
if neighbor_data['type'] == "LeafRouter":
leaf_router_count += 1
elif neighbor_data['type'] == "Server":
server_count += 1
return leaf_router_count, server_count


def get_neighbor_type_to_pg_headroom_map(duthost):
"""
Prepares config db by setting BUFFER_POOL key and field to specified value. If value is empty string or None, delete the current entry.
Calculates pg headroom based on the present neighbor types

Args:
duthost: DUT host object
configdb_field: field in config_db BUFFER_POOL table of the form key/value
value: BUFFER_POOL table value to be set

Returns:
A map of neighbor type to its corresponding pg headroom value
"""
config_db_json = json.loads(duthost.shell("show runningconfig all")["stdout"])
device_neighbor_metadata = config_db_json["DEVICE_NEIGHBOR_METADATA"]
interfaces_data = config_db_json["PORT"]
neighbor_set = set()
neighbor_to_interface_map = {}
neighbor_to_type_map = {}
neighbor_type_to_pg_headroom_map = {}

for neighbor in device_neighbor_metadata:
neighbor_set.add(neighbor)
neighbor_data = device_neighbor_metadata[neighbor]
neighbor_to_type_map[neighbor] = neighbor_data['type']

configdb_field_elements = configdb_field.split('/')
pytest_assert((len(configdb_field_elements) == 2), "Configdb field not identifiable")
for interface in interfaces_data:
for neighbor in neighbor_set:
if neighbor in json.dumps(interfaces_data[interface]):
neighbor_to_interface_map[neighbor] = interface
break

for neighbor in neighbor_set:
interface = neighbor_to_interface_map[neighbor]

cable_length = duthost.shell('sonic-db-cli CONFIG_DB hget "CABLE_LENGTH|AZURE" {}'.format(interface))['stdout']
port_speed = duthost.shell('sonic-db-cli CONFIG_DB hget "PORT|{}" speed'.format(interface))['stdout']

expected_profile = 'pg_lossless_{}_{}_profile'.format(port_speed, cable_length)

xoff = int(duthost.shell('redis-cli hget "BUFFER_PROFILE_TABLE:{}" xoff'.format(expected_profile))['stdout'])
Copy link
Contributor

@qiluo-msft qiluo-msft Jan 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redis-cli

redis-cli will read AppDB (database 0) by default. I think you need sonic-db-cli CONFIG_DB. #Closed

xon = int(duthost.shell('redis-cli hget "BUFFER_PROFILE_TABLE:{}" xon'.format(expected_profile))['stdout'])
pg_headroom = (xoff + xon) / 1024
Copy link
Contributor

@qiluo-msft qiluo-msft Feb 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/

LGTM reports a warning here.
The main concern is that the behavior of / is different in python3 vs python2. And we can improve the code to make it behave the same way.

You may find some solutions https://stackoverflow.com/a/15633893/2514803. I prefer from __future__ import division with a int() conversion. #Closed


neighbor_type = neighbor_to_type_map[neighbor]
neighbor_type_to_pg_headroom_map[neighbor_type] = pg_headroom

return neighbor_type_to_pg_headroom_map


def calculate_field_value(duthost, tbinfo, field):
"""
Calculates value of specified field

key = configdb_field_elements[0]
field = configdb_field_elements[1]
logger.info("Setting configdb key: {} field: {} to value: {}".format(key, field, value))

if value:
cmd = "sonic-db-cli CONFIG_DB hset \"BUFFER_POOL|{}\" \"{}\" \"{}\" ".format(key, field, value)
Args:
duthost: DUT host object
tbinfo: information about the running testbed
field: xoff, ingress_lossless_pool_size, or egress_lossy_pool_size

"""
uplink, downlink = get_uplink_downlink_count(duthost, tbinfo)
uplink_downlink_sum = uplink + downlink
system_reserved = uplink_downlink_sum * EGRESS_MIRRORING + MGMT_POOL
user_reserved = uplink_downlink_sum * LOSSY_PGS * MIN_LOSSY_BUFFER_THRESHOLD + uplink_downlink_sum * EGRESS_POOL_THRESHOLD
private_headroom = uplink_downlink_sum * LOSSLESS_PGS * OPER_HEADROOM_SIZE + uplink_downlink_sum * INGRESS_POOL_THRESHOLD

config_headroom_int_sum = 0
neighbor_type_to_pg_headroom_map = get_neighbor_type_to_pg_headroom_map(duthost)
for neighbor_type in neighbor_type_to_pg_headroom_map:
if neighbor_type == "SpineRouter" or "LeafRouter":
config_headroom_uplink_multiplier = neighbor_type_to_pg_headroom_map[neighbor_type]
config_headroom_int_sum = uplink * config_headroom_uplink_multiplier + config_headroom_int_sum
elif neighbor_type == "LeafRouter" or "Server":
config_headroom_downlink_multiplier = neighbor_type_to_pg_headroom_map[neighbor_type]
config_headroom_int_sum = downlink * config_headroom_downlink_multiplier + config_headroom_int_sum
config_headroom = LOSSLESS_PGS * config_headroom_int_sum

headroom_pool = (config_headroom - private_headroom) / HEADROOM_POOL_OVERSUB

if ("xoff" in field):
return headroom_pool
else:
cmd = "sonic-db-cli CONFIG_DB del \"BUFFER_POOL|{}\" \"{}\" ".format(key, field)

verify_orchagent_running_or_assert(duthost)
operational_headroom = headroom_pool + private_headroom
ingress_lossless_egress_lossy = MMU_SIZE - operational_headroom - user_reserved - system_reserved
return ingress_lossless_egress_lossy


def ensure_application_of_updated_config(duthost, configdb_field, value):
"""
Ensures application of the JSON patch config update

Args:
duthost: DUT host object
configdb_field: config db field under test
value: expected value of configdb_field
"""
def _confirm_value_in_asic_db():
if "ingress_lossless_pool" in configdb_field:
buffer_pool = "ingress_lossless_pool"
elif "egress_lossy_pool" in configdb_field:
buffer_pool = "egress_lossy_pool"
oid = duthost.shell('redis-cli -n 2 HGET COUNTERS_BUFFER_POOL_NAME_MAP {}'.format(buffer_pool))["stdout"]
Copy link
Contributor

@qiluo-msft qiluo-msft Jan 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redis-cli -n 2

redis-cli is deprecated. Suggest use sonic-db-cli ASIC_DB #Closed

buffer_pool_data = duthost.shell('redis-cli -n 1 hgetall ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL:{}'.format(oid))["stdout"]
return str(value) in buffer_pool_data

pytest_assert(
wait_until(READ_ASICDB_TIMEOUT, READ_ASICDB_INTERVAL, 0, _confirm_value_in_asic_db),
"ASIC DB does not properly reflect newly configured field: {} expected value: {}".format(configdb_field, value)
)


@pytest.mark.parametrize("configdb_field", ["ingress_lossless_pool/xoff", "ingress_lossless_pool/size", "egress_lossy_pool/size"])
@pytest.mark.parametrize("operation", ["add", "replace", "remove"])
@pytest.mark.parametrize("field_pre_status", ["existing", "nonexistent"])
def test_incremental_qos_config_updates(duthost, ensure_dut_readiness, configdb_field, operation, field_pre_status):
operation_to_new_value_map = {"add": "678", "replace": "789", "remove": ""}
field_pre_status_to_value_map = {"existing": "567", "nonexistent": ""}

prepare_configdb_field(duthost, configdb_field, field_pre_status_to_value_map[field_pre_status])

def test_incremental_qos_config_updates(duthost, tbinfo, ensure_dut_readiness, configdb_field, operation):
tmpfile = generate_tmpfile(duthost)
logger.info("tmpfile {} created for json patch of field: {} and operation: {}".format(tmpfile, configdb_field, operation))

if operation == "remove":
value= ""
else:
value = calculate_field_value(duthost, tbinfo, configdb_field)
logger.info("value to be added to json patch: {} operation: {} field: {}".format(value, operation, configdb_field))

json_patch = [
{
"op": "{}".format(operation),
"path": "/BUFFER_POOL/{}".format(configdb_field),
"value": "{}".format(operation_to_new_value_map[operation])
}
]
"op": "{}".format(operation),
"path": "/BUFFER_POOL/{}".format(configdb_field),
"value": "{}".format(value)
}]

output = apply_patch(duthost, json_data=json_patch, dest_file=tmpfile)
expect_op_success(duthost, output)

delete_tmpfile(duthost, tmpfile)
ensure_application_of_updated_config(duthost, configdb_field, value)