Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip downgrade if requested version below daemon version #2850

Merged
merged 9 commits into from
Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions azurelinuxagent/common/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,15 @@ def __init__(self, msg=None, inner=None):
super(AgentNetworkError, self).__init__(msg, inner)


class AgentUpdateError(AgentError):
"""
When agent failed to update.
"""

def __init__(self, msg=None, inner=None):
super(AgentUpdateError, self).__init__(msg, inner)


class CGroupsException(AgentError):
"""
Exception to classify any cgroups related issue.
Expand Down
48 changes: 34 additions & 14 deletions azurelinuxagent/ga/agent_update_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

from azurelinuxagent.common import conf, logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.exception import AgentUpgradeExitException
from azurelinuxagent.common.exception import AgentUpgradeExitException, AgentUpdateError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.logger import LogLevel
from azurelinuxagent.common.protocol.extensions_goal_state import GoalStateSource
from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatuses, VMAgentUpdateStatus
from azurelinuxagent.common.protocol.restapi import VERSION_0, VMAgentUpdateStatuses, VMAgentUpdateStatus
from azurelinuxagent.common.utils import fileutil, textutil
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.version import CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN
from azurelinuxagent.common.version import get_daemon_version, CURRENT_VERSION, AGENT_NAME, AGENT_DIR_PATTERN
from azurelinuxagent.ga.guestagent import GuestAgent, GAUpdateReportState


Expand All @@ -37,8 +37,6 @@ def __init__(self):
self.last_attempted_requested_version_update_time = datetime.datetime.min
self.last_attempted_hotfix_update_time = datetime.datetime.min
self.last_attempted_normal_update_time = datetime.datetime.min
self.last_warning = ""
self.last_warning_time = datetime.datetime.min


class AgentUpdateHandler(object):
Expand Down Expand Up @@ -130,10 +128,10 @@ def __get_agent_family_manifests(self, goal_state):
agent_family_manifests.append(m)

if not family_found:
raise Exception(u"Agent family: {0} not found in the goal state, skipping agent update".format(family))
raise AgentUpdateError(u"Agent family: {0} not found in the goal state, skipping agent update".format(family))

if len(agent_family_manifests) == 0:
raise Exception(
raise AgentUpdateError(
u"No manifest links found for agent family: {0} for incarnation: {1}, skipping agent update".format(
self._ga_family, self._gs_id))
return agent_family_manifests[0]
Expand Down Expand Up @@ -179,7 +177,7 @@ def __get_agent_package_to_download(self, agent_manifest, version):
# Found a matching package, only download that one
return pkg

raise Exception("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, "
raise AgentUpdateError("No matching package found in the agent manifest for requested version: {0} in goal state incarnation: {1}, "
"skipping agent update".format(str(version), self._gs_id))

@staticmethod
Expand Down Expand Up @@ -245,6 +243,15 @@ def __get_all_agents_on_disk():
path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))
return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)]

@staticmethod
def __get_daemon_version_for_update():
daemon_version = get_daemon_version()
if daemon_version != FlexibleVersion(VERSION_0):
return daemon_version
# We return 0.0.0.0 if daemon version is not specified. In that case,
# use the min version as 2.2.53 as we started setting the daemon version starting 2.2.53.
return FlexibleVersion("2.2.53")

@staticmethod
def __log_event(level, msg, success=True):
if level == LogLevel.INFO:
Expand Down Expand Up @@ -291,11 +298,20 @@ def run(self, goal_state):
if warn_msg != "":
self.__log_event(LogLevel.WARNING, warn_msg)

msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format(
self._gs_id, str(requested_version))
self.__log_event(LogLevel.INFO, msg)

try:
daemon_version = self.__get_daemon_version_for_update()
if requested_version < daemon_version:
# Don't process the update if the requested version is less than daemon version,
# as historically we don't support downgrades below daemon versions. So daemon will not pickup that requested version rather start with
# installed latest version again. When that happens agent go into loop of downloading the requested version, exiting and start again with same version.
#
raise AgentUpdateError("The Agent received a request to downgrade to version {0}, but downgrading to a version less than "
"the Agent installed on the image ({1}) is not supported. Skipping downgrade.".format(requested_version, daemon_version))

msg = "Goal state {0} is requesting a new agent version {1}, will update the agent before processing the goal state.".format(
self._gs_id, str(requested_version))
self.__log_event(LogLevel.INFO, msg)

agent = self.__download_and_get_agent(goal_state, agent_family, agent_manifest, requested_version)

if agent.is_blacklisted or not agent.is_downloaded:
Expand All @@ -314,9 +330,13 @@ def run(self, goal_state):
except Exception as err:
if isinstance(err, AgentUpgradeExitException):
raise err
elif isinstance(err, AgentUpdateError):
error_msg = ustr(err)
else:
error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err))
self.__log_event(LogLevel.WARNING, error_msg, success=False)
if "Missing requested version" not in GAUpdateReportState.report_error_msg:
GAUpdateReportState.report_error_msg = "Unable to update Agent: {0}".format(textutil.format_exception(err))
self.__log_event(LogLevel.WARNING, GAUpdateReportState.report_error_msg, success=False)
GAUpdateReportState.report_error_msg = error_msg

def get_vmagent_update_status(self):
"""
Expand Down
3 changes: 3 additions & 0 deletions tests/data/wire/ga_manifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
<Plugin>
<Version>2.1.0</Version><Uris><Uri>http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.1.0</Uri></Uris>
</Plugin>
<Plugin>
<Version>2.5.0</Version><Uris><Uri>http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.5.0</Uri></Uris>
</Plugin>
<Plugin>
<Version>9.9.9.10</Version>
<Uris>
Expand Down
26 changes: 24 additions & 2 deletions tests/ga/test_agent_update_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __assert_agent_requested_version_in_goal_state(self, mock_telemetry, inc=1,

def __assert_no_agent_package_telemetry_emitted(self, mock_telemetry, version="9.9.9.10"):
upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
'Unable to update Agent: No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[
'No matching package found in the agent manifest for requested version: {0}'.format(version) in kwarg['message'] and kwarg[
'op'] == WALAEventOperation.AgentUpgrade]
self.assertEqual(1, len(upgrade_event_msgs),
"Did not find the event indicating that the agent package not found. Got: {0}".format(
Expand Down Expand Up @@ -217,7 +217,7 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c
self.prepare_agents()
self.assertEqual(20, self.agent_count(), "Agent directories not set properly")

downgraded_version = "1.2.0"
downgraded_version = "2.5.0"

with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry):
agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version)
Expand All @@ -230,6 +230,28 @@ def test_it_should_downgrade_agent_if_requested_version_is_available_less_than_c
versions=[downgraded_version, str(CURRENT_VERSION)])
self.assertIn("Agent update found, Exiting current process", ustr(context.exception.reason))

def test_it_should_not_downgrade_below_daemon_version(self):
data_file = DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"

# Set the test environment by adding 20 random agents to the agent directory
self.prepare_agents()
self.assertEqual(20, self.agent_count(), "Agent directories not set properly")

downgraded_version = "1.2.0"

with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry):
agent_update_handler._protocol.mock_wire_data.set_extension_config_requested_version(downgraded_version)
agent_update_handler._protocol.mock_wire_data.set_incarnation(2)
agent_update_handler._protocol.client.update_goal_state()
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())
self.assertFalse(os.path.exists(self.agent_dir(downgraded_version)),
"New agent directory should not be found")
self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
"The Agent received a request to downgrade to version" in kwarg[
'message'] and kwarg[
'op'] == WALAEventOperation.AgentUpgrade]), "We should allow downgrade above daemon version")

def test_handles_if_requested_version_not_found_in_pkgs_to_download(self):
data_file = DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
Expand Down
2 changes: 1 addition & 1 deletion tests/ga/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -1818,7 +1818,7 @@ def test_it_should_mark_current_agent_as_bad_version_on_downgrade(self):
self.assertTrue(os.path.exists(self.agent_dir(CURRENT_VERSION)))
self.assertFalse(next(agent for agent in self.agents() if agent.version == CURRENT_VERSION).is_blacklisted,
"The current agent should not be blacklisted")
downgraded_version = "1.2.0"
downgraded_version = "2.5.0"

data_file = mockwiredata.DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
Expand Down
19 changes: 19 additions & 0 deletions tests_e2e/tests/agent_update/rsm_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from typing import List, Dict, Any

import requests
from assertpy import assert_that
from azure.identity import DefaultAzureCredential
from azure.mgmt.compute.models import VirtualMachine
from msrestazure.azure_cloud import Cloud
Expand Down Expand Up @@ -96,8 +97,19 @@ def run(self) -> None:
version: str = "1.3.1.0"
log.info("Attempting update version same as current version %s", upgrade_version)
self._request_rsm_update(version)
self._check_rsm_gs(version)
self._verify_guest_agent_update(version)

# verify requested version below daemon version
log.info("*******Verifying requested version below daemon version scenario*******")
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
log.info("Current agent version running on the vm before update \n%s", stdout)
version: str = "0.5.0"
log.info("Attempting requested version %s", version)
self._request_rsm_update(version)
self._check_rsm_gs(version)
self._verify_no_guest_agent_update(stdout)

def _check_rsm_gs(self, requested_version: str) -> None:
# This checks if RSM GS available to the agent after we mock the rsm update request
output = self._ssh_client.run_command(f"wait_for_rsm_goal_state.py --version {requested_version}", use_sudo=True)
Expand Down Expand Up @@ -184,6 +196,13 @@ def _check_agent_version(requested_version: str) -> bool:
stdout: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
log.info(f"Verified agent updated to requested version. Current agent version running:\n {stdout}")

def _verify_no_guest_agent_update(self, previous_agent: str) -> None:
"""
verify current agent version is same as previous after update attempt
"""
current_agent: str = self._ssh_client.run_command("waagent-version", use_sudo=True)
assert_that(current_agent).is_equal_to(previous_agent).described_as(f"Agent version changed.\n Previous Agent {previous_agent} \n Current agent {current_agent}")

def _verify_agent_reported_supported_feature_flag(self):
"""
RSM update rely on supported flag that agent sends to CRP.So, checking if GA reports feature flag from the agent log
Expand Down
Loading