diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index c52146ca7c..f763d5f5cb 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -266,16 +266,16 @@ def remove_legacy_firewall_rule(self, dst_ip): def enable_firewall(self, dst_ip, uid): """ - It checks if every iptable rule exists and add them if not present. It returns a tuple(enable firewall success status, update rules flag) + It checks if every iptable rule exists and add them if not present. It returns a tuple(enable firewall success status, missing rules array) enable firewall success status: Returns True if every firewall rule exists otherwise False - update rules flag: Returns True if rules are updated otherwise False + missing rules: array with names of the missing rules ("ACCEPT DNS", "ACCEPT", "DROP") """ - # This is to send telemetry when iptable rules updated - is_firewall_rules_updated = False # If a previous attempt failed, do not retry global _enable_firewall # pylint: disable=W0603 if not _enable_firewall: - return False, is_firewall_rules_updated + return False, [] + + missing_rules = [] try: wait = self.get_firewall_will_wait() @@ -283,10 +283,10 @@ def enable_firewall(self, dst_ip, uid): # check every iptable rule and delete others if any rule is missing # and append every iptable rule to the end of the chain. try: - if not AddFirewallRules.verify_iptables_rules_exist(wait, dst_ip, uid): + missing_rules.extend(AddFirewallRules.get_missing_iptables_rules(wait, dst_ip, uid)) + if len(missing_rules) > 0: self.remove_firewall(dst_ip, uid, wait) AddFirewallRules.add_iptables_rules(wait, dst_ip, uid) - is_firewall_rules_updated = True except CommandError as e: if e.returncode == 2: self.remove_firewall(dst_ip, uid, wait) @@ -297,14 +297,14 @@ def enable_firewall(self, dst_ip, uid): logger.warn(ustr(error)) raise - return True, is_firewall_rules_updated + return True, missing_rules except Exception as e: _enable_firewall = False logger.info("Unable to establish firewall -- " "no further attempts will be made: " "{0}".format(ustr(e))) - return False, is_firewall_rules_updated + return False, missing_rules def get_firewall_list(self, wait=None): try: diff --git a/azurelinuxagent/common/utils/networkutil.py b/azurelinuxagent/common/utils/networkutil.py index 160f17514b..7f69801441 100644 --- a/azurelinuxagent/common/utils/networkutil.py +++ b/azurelinuxagent/common/utils/networkutil.py @@ -217,14 +217,22 @@ def __execute_check_command(cmd): return False @staticmethod - def verify_iptables_rules_exist(wait, dst_ip, uid): + def get_missing_iptables_rules(wait, dst_ip, uid): + missing = [] + check_cmd_tcp_rule = AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait) - check_cmd_accept_rule = AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, uid, - wait=wait) + if not AddFirewallRules.__execute_check_command(check_cmd_tcp_rule): + missing.append("ACCEPT DNS") + + check_cmd_accept_rule = AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, uid, wait=wait) + if not AddFirewallRules.__execute_check_command(check_cmd_accept_rule): + missing.append("ACCEPT") + check_cmd_drop_rule = AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait) + if not AddFirewallRules.__execute_check_command(check_cmd_drop_rule): + missing.append("DROP") - return AddFirewallRules.__execute_check_command(check_cmd_tcp_rule) and AddFirewallRules.__execute_check_command(check_cmd_accept_rule) \ - and AddFirewallRules.__execute_check_command(check_cmd_drop_rule) + return missing @staticmethod def __execute_firewall_commands(dst_ip, uid, command=APPEND_COMMAND, firewalld_command="", wait=""): diff --git a/azurelinuxagent/ga/env.py b/azurelinuxagent/ga/env.py index 4c55377d52..9e7e7285ef 100644 --- a/azurelinuxagent/ga/env.py +++ b/azurelinuxagent/ga/env.py @@ -16,7 +16,7 @@ # # Requires Python 2.6+ and Openssl 1.0+ # - +import datetime import re import os import socket @@ -105,6 +105,9 @@ def __init__(self, osutil, protocol): self._protocol = protocol self._try_remove_legacy_firewall_rule = False self._is_first_setup = True + self._reset_count = 0 + self._report_after = datetime.datetime.min + self._report_period = None # None indicates "report immediately" def _operation(self): # If the rules ever change we must reset all rules and start over again. @@ -118,19 +121,32 @@ def _operation(self): self._osutil.remove_legacy_firewall_rule(dst_ip=self._protocol.get_endpoint()) self._try_remove_legacy_firewall_rule = True - firewall_state = self._get_firewall_state() - - success, is_firewall_rules_updated = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid()) + success, missing_firewall_rules = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid()) - if is_firewall_rules_updated: + if len(missing_firewall_rules) > 0: if self._is_first_setup: - msg = "Created Azure fabric firewall rules:\n{0}".format(self._get_firewall_state()) + msg = "Created firewall rules for the Azure Fabric:\n{0}".format(self._get_firewall_state()) logger.info(msg) add_event(op=WALAEventOperation.Firewall, message=msg) else: - msg = "Reset Azure fabric firewall rules.\nInitial state:\n{0}\nCurrent state:\n{1}".format(firewall_state, self._get_firewall_state()) - logger.info(msg) - add_event(op=WALAEventOperation.ResetFirewall, message=msg) + self._reset_count += 1 + # We report immediately (when period is None) the first 5 instances, then we switch the period to every few hours + if self._report_period is None: + msg = "Some firewall rules were missing: {0}. Re-created all the rules:\n{1}".format(missing_firewall_rules, self._get_firewall_state()) + if self._reset_count >= 5: + self._report_period = datetime.timedelta(hours=3) + self._reset_count = 0 + self._report_after = datetime.datetime.now() + self._report_period + elif datetime.datetime.now() >= self._report_after: + msg = "Some firewall rules were missing: {0}. This has happened {1} time(s) since the last report. Re-created all the rules:\n{2}".format( + missing_firewall_rules, self._reset_count, self._get_firewall_state()) + self._reset_count = 0 + self._report_after = datetime.datetime.now() + self._report_period + else: + msg = "" + if msg != "": + logger.info(msg) + add_event(op=WALAEventOperation.ResetFirewall, message=msg) add_periodic( logger.EVERY_HOUR, diff --git a/tests/common/osutil/test_default.py b/tests/common/osutil/test_default.py index 68bd282d70..afe27ae595 100644 --- a/tests/common/osutil/test_default.py +++ b/tests/common/osutil/test_default.py @@ -822,13 +822,13 @@ def test_enable_firewall_should_not_use_wait_when_iptables_does_not_support_it(s success, _ = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid) self.assertTrue(success, "Enabling the firewall was not successful") - # Exactly 8 calls have to be made. - # First check rule, delete 4 rules, + # Exactly 10 calls have to be made. + # First check 3 rules, delete 4 rules, # and Append the IPTable 3 rules. - self.assertEqual(len(mock_iptables.command_calls), 8, + self.assertEqual(len(mock_iptables.command_calls), 10, "Incorrect number of calls to iptables: [{0}]".format(mock_iptables.command_calls)) for command in mock_iptables.command_calls: - self.assertNotIn("-w", command, "The -w option should have been used in {0}".format(command)) + self.assertNotIn("-w", command, "The -w option sh ould have been used in {0}".format(command)) self.assertTrue(osutil._enable_firewall, "The firewall should not have been disabled")