Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce the frequency of firewall telemetry #3124

Merged
merged 3 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions azurelinuxagent/common/osutil/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,27 +266,27 @@ def remove_legacy_firewall_rule(self, dst_ip):

def enable_firewall(self, dst_ip, uid):
"""
It checks if every iptable rule exists and add them if not present. It returns a tuple(enable firewall success status, update rules flag)
It checks if every iptable rule exists and add them if not present. It returns a tuple(enable firewall success status, missing rules array)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of returning a boolean indicating that the rules were updated, now this method returns an array containing the rules that were missing when the firewall was updated.

enable firewall success status: Returns True if every firewall rule exists otherwise False
update rules flag: Returns True if rules are updated otherwise False
missirg rules: array with names of the missing rules ("ACCEPT DNS", "ACCEPT", "DROP")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should clarify in a comment here that these are the missing rules before reapply operation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any suggestions? "It checks if every iptable rule exists" above seems enough to me

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

something like?

Suggested change
missirg rules: array with names of the missing rules ("ACCEPT DNS", "ACCEPT", "DROP")
missing rules: array with names of the missing rules before attempt to add them ("ACCEPT DNS", "ACCEPT", "DROP")

I was just thinking of case where we attempt to re-add the rules and some get added but agent fails to add others. Then we should clarify if the missing rules array was created before or after that attempt.

It's clear from the code though, so might be overkill to add the comment.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I'll leave it as is. Anyways, all this code is already changed in the fix for nftables and this return value is no longer needed.

"""
# This is to send telemetry when iptable rules updated
is_firewall_rules_updated = False
# If a previous attempt failed, do not retry
global _enable_firewall # pylint: disable=W0603
maddieford marked this conversation as resolved.
Show resolved Hide resolved
if not _enable_firewall:
return False, is_firewall_rules_updated
return False, []

missing_rules = []

try:
wait = self.get_firewall_will_wait()

# check every iptable rule and delete others if any rule is missing
# and append every iptable rule to the end of the chain.
try:
if not AddFirewallRules.verify_iptables_rules_exist(wait, dst_ip, uid):
missing_rules.extend(AddFirewallRules.get_missing_iptables_rules(wait, dst_ip, uid))
if len(missing_rules) > 0:
self.remove_firewall(dst_ip, uid, wait)
AddFirewallRules.add_iptables_rules(wait, dst_ip, uid)
is_firewall_rules_updated = True
except CommandError as e:
if e.returncode == 2:
self.remove_firewall(dst_ip, uid, wait)
Expand All @@ -297,14 +297,14 @@ def enable_firewall(self, dst_ip, uid):
logger.warn(ustr(error))
raise

return True, is_firewall_rules_updated
return True, missing_rules

except Exception as e:
_enable_firewall = False
logger.info("Unable to establish firewall -- "
"no further attempts will be made: "
"{0}".format(ustr(e)))
return False, is_firewall_rules_updated
return False, missing_rules

def get_firewall_list(self, wait=None):
try:
Expand Down
18 changes: 13 additions & 5 deletions azurelinuxagent/common/utils/networkutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,22 @@ def __execute_check_command(cmd):
return False

@staticmethod
def verify_iptables_rules_exist(wait, dst_ip, uid):
def get_missing_iptables_rules(wait, dst_ip, uid):
missing = []

check_cmd_tcp_rule = AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait)
check_cmd_accept_rule = AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, uid,
wait=wait)
if not AddFirewallRules.__execute_check_command(check_cmd_tcp_rule):
missing.append("ACCEPT DNS")

check_cmd_accept_rule = AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, uid, wait=wait)
if not AddFirewallRules.__execute_check_command(check_cmd_accept_rule):
missing.append("ACCEPT")

check_cmd_drop_rule = AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait)
if not AddFirewallRules.__execute_check_command(check_cmd_drop_rule):
missing.append("DROP")

return AddFirewallRules.__execute_check_command(check_cmd_tcp_rule) and AddFirewallRules.__execute_check_command(check_cmd_accept_rule) \
and AddFirewallRules.__execute_check_command(check_cmd_drop_rule)
return missing

@staticmethod
def __execute_firewall_commands(dst_ip, uid, command=APPEND_COMMAND, firewalld_command="", wait=""):
Expand Down
34 changes: 25 additions & 9 deletions azurelinuxagent/ga/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#
# Requires Python 2.6+ and Openssl 1.0+
#

import datetime
import re
import os
import socket
Expand Down Expand Up @@ -105,6 +105,9 @@ def __init__(self, osutil, protocol):
self._protocol = protocol
self._try_remove_legacy_firewall_rule = False
self._is_first_setup = True
self._reset_count = 0
self._report_after = datetime.datetime.min
self._report_period = datetime.timedelta(seconds=0) # 0 indicates "report immediately"

def _operation(self):
# If the rules ever change we must reset all rules and start over again.
Expand All @@ -118,19 +121,32 @@ def _operation(self):
self._osutil.remove_legacy_firewall_rule(dst_ip=self._protocol.get_endpoint())
self._try_remove_legacy_firewall_rule = True

firewall_state = self._get_firewall_state()

success, is_firewall_rules_updated = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid())
success, missing_firewall_rules = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid())

if is_firewall_rules_updated:
if len(missing_firewall_rules) > 0:
if self._is_first_setup:
msg = "Created Azure fabric firewall rules:\n{0}".format(self._get_firewall_state())
msg = "Created firewall rules for the Azure Fabric:\n{0}".format(self._get_firewall_state())
logger.info(msg)
add_event(op=WALAEventOperation.Firewall, message=msg)
else:
msg = "Reset Azure fabric firewall rules.\nInitial state:\n{0}\nCurrent state:\n{1}".format(firewall_state, self._get_firewall_state())
logger.info(msg)
add_event(op=WALAEventOperation.ResetFirewall, message=msg)
self._reset_count += 1
# We report immediately (period == 0) the first 5 instances, then we switch the period to every few hours
if self._report_period.total_seconds() == 0:
msg = "Some firewall rules were missing: {0}. Re-created all the rules:\n{1}".format(missing_firewall_rules, self._get_firewall_state())
if self._reset_count >= 5:
self._report_period = datetime.timedelta(hours=3)
self._reset_count = 0
self._report_after = datetime.datetime.now() + self._report_period
elif datetime.datetime.now() >= self._report_after:
msg = "Some firewall rules were missing: {0}. This has happened {1} time(s) since the last report. Re-created all the rules:\n{2}".format(
missing_firewall_rules, self._reset_count, self._get_firewall_state())
self._reset_count = 0
self._report_after = datetime.datetime.now() + self._report_period
else:
msg = ""
if msg != "":
logger.info(msg)
add_event(op=WALAEventOperation.ResetFirewall, message=msg)

add_periodic(
logger.EVERY_HOUR,
Expand Down
Loading