Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[auto-ts] add memory check #10433

Merged
merged 7 commits into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion files/build_templates/init_cfg.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
"rate_limit_interval" : "180",
"max_techsupport_limit" : "10.0",
"max_core_limit" : "5.0",
"available_mem_threshold": "10.0",
"min_available_mem": "200",
"since" : "2 days ago"
}
},
Expand All @@ -81,7 +83,8 @@
{%- if enable_auto_tech_support == "y" %}
"state" : "enabled", {% else %}
"state" : "disabled", {% endif %}
"rate_limit_interval" : "600"
"rate_limit_interval" : "600",
"available_mem_threshold": "10.0"
}{%if not loop.last %},{% endif -%}
{% endfor %}
}
Expand Down
3 changes: 3 additions & 0 deletions files/image_config/monit/conf.d/sonic-host
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,6 @@ check program vnetRouteCheck with path "/usr/local/bin/vnet_route_check.py"
every 5 cycles
if status != 0 for 3 cycle then alert repeat every 1 cycles

# memory_check tool that verifies that memory usage does not cross the threshold or invokes techsupport.
check program memory_check with path "/usr/local/bin/memory_threshold_check.py"
if status == 2 for 10 times within 20 cycles then exec "/usr/local/bin/memory_threshold_check_handler.py"
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"AUTO_TECHSUPPORT_INVALID_RATE_LIMIT_FORMAT": {
"desc" : "Configure cooloff with a value of invalid format",
"eStrKey": "InvalidValue"
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
"desc" : "Configure a value for core-uage outside the range [0, 100)",
Expand All @@ -19,9 +19,23 @@
},
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100) but with 3 fractional digits",
"eStrKey": "InvalidValue"
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_RATE_LIMIT_INTERVAL_TEST": {
"desc" : "Configure and test the valid configuration"
},
"AUTO_TECHSUPPORT_AVAILABLE_MEM_THRESHOLD": {
"desc" : "Configure and test the valid configuration"
},
"AUTO_TECHSUPPORT_INVALID_AVAILABLE_MEM_THRESHOLD": {
"desc" : "Configure a value for available_mem_threshold inside the range [0, 100) but with 3 fractional digits",
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_VALID": {
"desc" : "Configure and test the valid configuration"
},
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_INVALID_THRESHOLD": {
"desc" : "Configure a value for available_mem_threshold inside the range [0, 100) but with 3 fractional digits",
"eStrKey": "InvalidValue"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"max_techsupport_limit" : "10.0",
"max_core_limit" : "5.0",
"since" : "2 days ago"
}
}
}
}
},
Expand All @@ -20,8 +20,8 @@
"rate_limit_interval" : "180",
"max_techsupport_limit" : "10.0",
"max_core_limit" : "5.0",
"since" : "2 days ago"
}
"since" : "2 days ago"
}
}
}
},
Expand All @@ -30,7 +30,7 @@
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"rate_limit_interval" : "whatever"
}
}
}
}
},
Expand All @@ -40,7 +40,7 @@
"sonic-auto_techsupport:GLOBAL": {
"max_core_limit" : "100.00",
"rate_limit_interval" : "180"
}
}
}
}
},
Expand All @@ -50,7 +50,7 @@
"sonic-auto_techsupport:GLOBAL": {
"max_techsupport_limit" : "11.23",
"max_core_limit" : "99.99"
}
}
}
}
},
Expand All @@ -60,7 +60,7 @@
"sonic-auto_techsupport:GLOBAL": {
"max_techsupport_limit" : "11.111",
"max_core_limit" : "99.99"
}
}
}
}
},
Expand All @@ -81,5 +81,60 @@
]
}
}
},
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_VALID": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"available_mem_threshold": "10.0",
"min_available_mem": "900"
}
}
}
},
"AUTO_TECHSUPPORT_GLOBAL_MEM_THRESHOLD_INVALID_THRESHOLD": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"available_mem_threshold": "11.111"
}
}
}
},
"AUTO_TECHSUPPORT_AVAILABLE_MEM_THRESHOLD": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT_FEATURE": {
"AUTO_TECHSUPPORT_FEATURE_LIST": [
{
"feature_name" : "bgp",
"state" : "enabled",
"available_mem_threshold": "10.0"
},
{
"feature_name" : "swss",
"state" : "disabled",
"available_mem_threshold": "10.0"
}
]
}
}
},
"AUTO_TECHSUPPORT_INVALID_AVAILABLE_MEM_THRESHOLD": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT_FEATURE": {
"AUTO_TECHSUPPORT_FEATURE_LIST": [
{
"feature_name" : "bgp",
"state" : "enabled",
"available_mem_threshold": "11.111"
},
{
"feature_name" : "swss",
"state" : "disabled",
"available_mem_threshold": "10.0"
}
]
}
}
}
}
18 changes: 18 additions & 0 deletions src/sonic-yang-models/yang-models/sonic-auto_techsupport.yang
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,18 @@ module sonic-auto_techsupport {
description "Max Limit in percentage for the cummulative size of core dumps. No cleanup is performed if the value isn't congiured or is 0.0";
type decimal-repr;
}

leaf available_mem_threshold {
description "Memory threshold; 0 to disable techsupport invocation on memory usage threshold crossing";
type decimal-repr;
default 10.0;
}

leaf min_available_mem {
description "Minimum Free memory (in MB) that should be available for the techsupport execution to start";
type uint32;
default 200;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add unit test for these two new fields?


leaf since {
/*
Expand Down Expand Up @@ -96,6 +108,12 @@ module sonic-auto_techsupport {
type stypes:admin_mode;
}

leaf available_mem_threshold {
description "Memory threshold; 0 to disable techsupport invocation on memory usage threshold crossing";
type decimal-repr;
default 10.0;
}

leaf rate_limit_interval {
description "Rate limit interval for the corresponding feature. Configure 0 to explicitly disable";
type uint16;
Expand Down