From 787be769648da118dfa084b61a0e94b7714db74b Mon Sep 17 00:00:00 2001 From: narrieta Date: Mon, 24 Jan 2022 15:49:19 -0800 Subject: [PATCH 1/5] Move get_vm_settings() to HostPluginProtocol --- azurelinuxagent/common/protocol/hostplugin.py | 182 ++++++++++++++- azurelinuxagent/common/protocol/wire.py | 209 ++---------------- tests/protocol/test_extensions_goal_state.py | 4 +- tests/protocol/test_hostplugin.py | 125 ++++++++++- tests/protocol/test_wire.py | 120 +--------- 5 files changed, 322 insertions(+), 318 deletions(-) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 3198161275..5da5b73134 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -25,9 +25,11 @@ from azurelinuxagent.common import logger from azurelinuxagent.common.errorstate import ErrorState, ERROR_STATE_HOST_PLUGIN_FAILURE from azurelinuxagent.common.event import WALAEventOperation, add_event -from azurelinuxagent.common.exception import HttpError, ProtocolError -from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.exception import HttpError, ProtocolError, ResourceGoneError +from azurelinuxagent.common.utils.flexible_version import FlexibleVersion +from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.protocol.healthservice import HealthService +from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.utils import restutil from azurelinuxagent.common.utils import textutil from azurelinuxagent.common.utils.textutil import remove_bom @@ -79,6 +81,11 @@ def __init__(self, endpoint, container_id, role_config_name): self.status_error_state = ErrorState(min_timedelta=ERROR_STATE_HOST_PLUGIN_FAILURE) self.fetch_last_timestamp = None self.status_last_timestamp = None + self._host_plugin_version = FlexibleVersion("0.0.0.0") # Version 0 means "unknown" + self._host_plugin_supports_vm_settings = False + self._host_plugin_supports_vm_settings_next_check = datetime.datetime.now() + self._vm_settings_error_reporter = _VmSettingsErrorReporter() + self._cached_vm_settings = None # Cached value of the most recent ExtensionsGoalStateFromVmSettings @staticmethod def _extract_deployment_id(role_config_name): @@ -383,3 +390,174 @@ def _base64_encode(self, data): if PY_VERSION_MAJOR > 2: return s.decode('utf-8') return s + + def fetch_vm_settings(self, force_update): + """ + Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and + a boolean indicating if they are an updated (True) or a cached value (False). + + Raises TypeError if the HostGAPlugin does not support the vmSettings API, ResourceGoneError if the container ID and roleconfig name + need to be refreshed, or ProtocolError if the request fails for any other reason (e.g. not supported, time out, server error). + """ + def raise_not_supported(reset_state=False): + if reset_state: + self._host_plugin_supports_vm_settings = False + self._host_plugin_supports_vm_settings_next_check = datetime.datetime.now() + datetime.timedelta(hours=6) # check again in 6 hours + # "Not supported" is not considered an error, so don't use self._vm_settings_error_reporter to report it + logger.info("vmSettings is not supported") + add_event(op=WALAEventOperation.HostPlugin, message="vmSettings is not supported", is_success=True) + raise VmSettingsNotSupported() + + try: + # Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check + if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.datetime.now(): + raise_not_supported() + + etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag + correlation_id = str(uuid.uuid4()) + + def format_message(msg): + return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg) + + def get_vm_settings(): + url, headers = self.get_vm_settings_request(correlation_id) + if etag is not None: + headers['if-none-match'] = etag + return restutil.http_get(url, headers=headers, use_proxy=False, max_retry=1, return_raw_response=True) + + self._vm_settings_error_reporter.report_request() + + response = get_vm_settings() + + if response.status == httpclient.GONE: + raise ResourceGoneError() + + if response.status == httpclient.NOT_FOUND: # the HostGAPlugin does not support FastTrack + raise_not_supported(reset_state=True) + + if response.status == httpclient.NOT_MODIFIED: # The goal state hasn't changed, return the current instance + return self._cached_vm_settings, False + + if response.status != httpclient.OK: + error_description = restutil.read_response_error(response) + # For historical reasons the HostGAPlugin returns 502 (BAD_GATEWAY) for internal errors instead of using + # 500 (INTERNAL_SERVER_ERROR). We add a short prefix to the error message in the hope that it will help + # clear any confusion produced by the poor choice of status code. + if response.status == httpclient.BAD_GATEWAY: + error_description = "[Internal error in HostGAPlugin] {0}".format(error_description) + error_description = format_message(error_description) + + if 400 <= response.status <= 499: + self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ClientError) + elif 500 <= response.status <= 599: + self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ServerError) + else: + self._vm_settings_error_reporter.report_error(error_description) + + raise ProtocolError(error_description) + + for h in response.getheaders(): + if h[0].lower() == 'etag': + response_etag = h[1] + break + else: # since the vmSettings were updated, the response must include an etag + message = format_message("The vmSettings response does not include an Etag header") + self._vm_settings_error_reporter.report_error(message) + raise ProtocolError(message) + + response_content = ustr(response.read(), encoding='utf-8') + + vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings(response_etag, response_content) + + # log the HostGAPlugin version + if vm_settings.host_ga_plugin_version != self._host_plugin_version: + self._host_plugin_version = vm_settings.host_ga_plugin_version + message = "HostGAPlugin version: {0}".format(vm_settings.host_ga_plugin_version) + logger.info(message) + add_event(op=WALAEventOperation.HostPlugin, message=message, is_success=True) + + # Don't support HostGAPlugin versions older than 115 + if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.115"): + raise_not_supported(reset_state=True) + + logger.info("Fetched new vmSettings [correlation ID: {0} New eTag: {1}]", correlation_id, vm_settings.etag) + self._host_plugin_supports_vm_settings = True + self._cached_vm_settings = vm_settings + return vm_settings, True + + except (ProtocolError, ResourceGoneError, VmSettingsNotSupported): + raise + except Exception as exception: + if isinstance(exception, IOError) and "timed out" in ustr(exception): + message = format_message("Timeout") + self._vm_settings_error_reporter.report_error(message, _VmSettingsError.Timeout) + else: + message = format_message("Request failed: {0}".format(textutil.format_exception(exception))) + self._vm_settings_error_reporter.report_error(message, _VmSettingsError.RequestFailed) + raise ProtocolError(message) + finally: + self._vm_settings_error_reporter.report_summary() + + +class VmSettingsNotSupported(TypeError): + pass + + +class _VmSettingsError(object): + ServerError = 'ServerError' + ClientError = 'ClientError' + Timeout = 'Timeout' + RequestFailed = 'RequestFailed' + + +class _VmSettingsErrorReporter(object): + _MaxErrors = 5 # Max number of error reported by period + _Period = datetime.timedelta(hours=1) # How often to report the summary + + def __init__(self): + self._reset() + + def _reset(self): + self._request_count = 0 # Total number of vmSettings HTTP requests + self._error_count = 0 # Total number of errors issuing vmSettings requests (includes all kinds of errors) + self._server_error_count = 0 # Count of server side errors (HTTP status in the 500s) + self._client_error_count = 0 # Count of client side errors (HTTP status in the 400s) + self._timeout_count = 0 # Count of timeouts on vmSettings requests + self._request_failure_count = 0 # Total count of requests that could not be issued (does not include timeouts or requests that were actually issued and failed, for example, with 500 or 400 statuses) + self._next_period = datetime.datetime.now() + _VmSettingsErrorReporter._Period + + def report_request(self): + self._request_count += 1 + + def report_error(self, error, category=None): + self._error_count += 1 + + if self._error_count <= _VmSettingsErrorReporter._MaxErrors: + add_event(op=WALAEventOperation.VmSettings, message=error, is_success=False, log_event=False) + + if category == _VmSettingsError.ServerError: + self._server_error_count += 1 + elif category == _VmSettingsError.ClientError: + self._client_error_count += 1 + elif category == _VmSettingsError.Timeout: + self._timeout_count += 1 + elif category == _VmSettingsError.RequestFailed: + self._request_failure_count += 1 + + def report_summary(self): + if datetime.datetime.now() >= self._next_period: + summary = { + "requests": self._request_count, + "errors": self._error_count, + "serverErrors": self._server_error_count, + "clientErrors": self._client_error_count, + "timeouts": self._timeout_count, + "failedRequests": self._request_failure_count + } + # always send telemetry, but log errors only + message = json.dumps(summary) + add_event(op=WALAEventOperation.VmSettingsSummary, message=message, is_success=False, log_event=False) + if self._error_count > 0: + logger.info("[VmSettingsSummary] {0}", message) + + self._reset() diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index c5c19a8f60..f614e8c815 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -20,7 +20,6 @@ import os import random import time -import uuid import xml.sax.saxutils as saxutils from collections import defaultdict from datetime import datetime, timedelta @@ -35,16 +34,14 @@ from azurelinuxagent.common.exception import ProtocolNotFoundError, \ ResourceGoneError, ExtensionDownloadError, InvalidContainerError, ProtocolError, HttpError from azurelinuxagent.common.future import httpclient, bytebuffer, ustr -from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory from azurelinuxagent.common.protocol.goal_state import GoalState, TRANSPORT_CERT_FILE_NAME, TRANSPORT_PRV_FILE_NAME -from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol +from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol, VmSettingsNotSupported from azurelinuxagent.common.protocol.restapi import DataContract, ExtHandlerPackage, \ ExtHandlerPackageList, ProvisionStatus, VMInfo, VMStatus from azurelinuxagent.common.telemetryevent import GuestAgentExtensionEventsSchema from azurelinuxagent.common.utils import fileutil, restutil from azurelinuxagent.common.utils.archive import StateFlusher from azurelinuxagent.common.utils.cryptutil import CryptUtil -from azurelinuxagent.common.utils.flexible_version import FlexibleVersion from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, \ findtext, gettext, remove_bom, get_bytes_from_pem, parse_json from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION @@ -576,14 +573,9 @@ def __init__(self, endpoint): self._endpoint = endpoint self._goal_state = None self._extensions_goal_state = None # The goal state to use for extensions; can be an ExtensionsGoalStateFromVmSettings or ExtensionsGoalStateFromExtensionsConfig - self._cached_vm_settings = None # Cached value of the most recent ExtensionsGoalStateFromVmSettings self._host_plugin = None - self._host_plugin_version = FlexibleVersion("0.0.0.0") # Version 0 means "unknown" - self._host_plugin_supports_vm_settings = False - self._host_plugin_supports_vm_settings_next_check = datetime.now() self.status_blob = StatusBlob(self) self.goal_state_flusher = StateFlusher(conf.get_lib_dir()) - self._vm_settings_error_reporter = _VmSettingsErrorReporter() def get_endpoint(self): return self._endpoint @@ -799,18 +791,23 @@ def update_goal_state(self, force_update=False): # goal_state = GoalState(self) - self._update_host_plugin(goal_state.container_id, goal_state.role_config_name) + host_ga_plugin = self.get_host_plugin() + host_ga_plugin.update_container_id(goal_state.container_id) + host_ga_plugin.update_role_config_name(goal_state.role_config_name) # # Then we fetch the vmSettings from the HostGAPlugin; the response will include the goal state for extensions. # - vm_settings_goal_state, vm_settings_goal_state_updated = (None, False) + vm_settings, vm_settings_updated = (None, False) if conf.get_enable_fast_track(): try: - vm_settings_goal_state, vm_settings_goal_state_updated = self._fetch_vm_settings_goal_state(force_update=force_update) + vm_settings, vm_settings_updated = host_ga_plugin.fetch_vm_settings(force_update=force_update) except VmSettingsNotSupported: pass # if vmSettings are not supported we use extensionsConfig below + except ResourceGoneError: + self.update_host_plugin_from_goal_state() + vm_settings, vm_settings_updated = host_ga_plugin.fetch_vm_settings(force_update=force_update) # # Now we fetch the rest of the goal state from the WireServer (but ony if needed: initialization, a "forced" update, or @@ -831,135 +828,28 @@ def update_goal_state(self, force_update=False): # # And, lastly, we use extensionsConfig if we don't have the vmSettings (Fast Track may be disabled or not supported). # - if vm_settings_goal_state is not None: - self._extensions_goal_state = vm_settings_goal_state + if vm_settings is not None: + self._extensions_goal_state = vm_settings else: self._extensions_goal_state = self._goal_state.extensions_config # # If either goal state changed (goal_state or vm_settings_goal_state) save them # - if goal_state_updated or vm_settings_goal_state_updated: - self._save_goal_state() + if goal_state_updated or vm_settings_updated: + self._save_goal_state(vm_settings) except ProtocolError: raise except Exception as exception: raise ProtocolError("Error fetching goal state: {0}".format(ustr(exception))) - def _fetch_vm_settings_goal_state(self, force_update): - """ - Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and - a boolean indicating if they are an updated (True) or a cached value (False). - - Raises VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API, or ProtocolError if the request fails for any other reason - (e.g. not supported, time out, server error). - """ - def raise_not_supported(reset_state=False): - if reset_state: - self._host_plugin_supports_vm_settings = False - self._host_plugin_supports_vm_settings_next_check = datetime.now() + timedelta(hours=6) # check again in 6 hours - # "Not supported" is not considered an error, so don't use self._vm_settings_error_reporter to report it - logger.info("vmSettings is not supported") - add_event(op=WALAEventOperation.HostPlugin, message="vmSettings is not supported", is_success=True) - raise VmSettingsNotSupported() - - try: - # Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check - if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.now(): - raise_not_supported() - - etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag - correlation_id = str(uuid.uuid4()) - - def format_message(msg): - return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg) - - def get_vm_settings(): - url, headers = self.get_host_plugin().get_vm_settings_request(correlation_id) - if etag is not None: - headers['if-none-match'] = etag - return restutil.http_get(url, headers=headers, use_proxy=False, max_retry=1, return_raw_response=True) - - self._vm_settings_error_reporter.report_request() - - response = get_vm_settings() - - if response.status == httpclient.GONE: # retry after refreshing the HostGAPlugin - self.update_host_plugin_from_goal_state() - response = get_vm_settings() - - if response.status == httpclient.NOT_FOUND: # the HostGAPlugin does not support FastTrack - raise_not_supported(reset_state=True) - - if response.status == httpclient.NOT_MODIFIED: # The goal state hasn't changed, return the current instance - return self._cached_vm_settings, False - - if response.status != httpclient.OK: - error_description = restutil.read_response_error(response) - # For historical reasons the HostGAPlugin returns 502 (BAD_GATEWAY) for internal errors instead of using - # 500 (INTERNAL_SERVER_ERROR). We add a short prefix to the error message in the hope that it will help - # clear any confusion produced by the poor choice of status code. - if response.status == httpclient.BAD_GATEWAY: - error_description = "[Internal error in HostGAPlugin] {0}".format(error_description) - error_description = format_message(error_description) - - if 400 <= response.status <= 499: - self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ClientError) - elif 500 <= response.status <= 599: - self._vm_settings_error_reporter.report_error(error_description, _VmSettingsError.ServerError) - else: - self._vm_settings_error_reporter.report_error(error_description) - - raise ProtocolError(error_description) - - for h in response.getheaders(): - if h[0].lower() == 'etag': - response_etag = h[1] - break - else: # since the vmSettings were updated, the response must include an etag - message = format_message("The vmSettings response does not include an Etag header") - self._vm_settings_error_reporter.report_error(message) - raise ProtocolError(message) - - response_content = self.decode_config(response.read()) - vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings(response_etag, response_content) - - # log the HostGAPlugin version - if vm_settings.host_ga_plugin_version != self._host_plugin_version: - self._host_plugin_version = vm_settings.host_ga_plugin_version - message = "HostGAPlugin version: {0}".format(vm_settings.host_ga_plugin_version) - logger.info(message) - add_event(op=WALAEventOperation.HostPlugin, message=message, is_success=True) - - # Don't support HostGAPlugin versions older than 115 - if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.115"): - raise_not_supported(reset_state=True) - - logger.info("Fetched new vmSettings [correlation ID: {0} New eTag: {1}]", correlation_id, vm_settings.etag) - self._host_plugin_supports_vm_settings = True - self._cached_vm_settings = vm_settings - return vm_settings, True - - except (ProtocolError, VmSettingsNotSupported): - raise - except Exception as exception: - if isinstance(exception, IOError) and "timed out" in ustr(exception): - message = format_message("Timeout") - self._vm_settings_error_reporter.report_error(message, _VmSettingsError.Timeout) - else: - message = format_message("Request failed: {0}".format(textutil.format_exception(exception))) - self._vm_settings_error_reporter.report_error(message, _VmSettingsError.RequestFailed) - raise ProtocolError(message) - finally: - self._vm_settings_error_reporter.report_summary() - def _update_host_plugin(self, container_id, role_config_name): if self._host_plugin is not None: self._host_plugin.update_container_id(container_id) self._host_plugin.update_role_config_name(role_config_name) - def _save_goal_state(self): + def _save_goal_state(self, vm_settings): try: self.goal_state_flusher.flush() except Exception as e: @@ -980,11 +870,10 @@ def save_if_not_none(goal_state_property, file_name): text = self._goal_state.extensions_config.get_redacted_text() if text != '': self._save_cache(text, EXT_CONF_FILE_NAME.format(self._goal_state.extensions_config.incarnation)) - # TODO: When Fast Track is fully enabled self._cached_vm_settings will go away and this can be deleted - if self._cached_vm_settings is not None: - text = self._cached_vm_settings.get_redacted_text() + if vm_settings is not None: + text = vm_settings.get_redacted_text() if text != '': - self._save_cache(text, VM_SETTINGS_FILE_NAME.format(self._cached_vm_settings.id)) + self._save_cache(text, VM_SETTINGS_FILE_NAME.format(vm_settings.id)) # END TODO except Exception as e: @@ -1507,67 +1396,3 @@ def is_on_hold(self): if 'onHold' in self.__dict__: return str(self.onHold).lower() == 'true' # pylint: disable=E1101 return False - - -class _VmSettingsError(object): - ServerError = 'ServerError' - ClientError = 'ClientError' - Timeout = 'Timeout' - RequestFailed = 'RequestFailed' - - -class _VmSettingsErrorReporter(object): - _MaxErrors = 5 # Max number of error reported by period - _Period = timedelta(hours=1) # How often to report the summary - - def __init__(self): - self._reset() - - def _reset(self): - self._request_count = 0 # Total number of vmSettings HTTP requests - self._error_count = 0 # Total number of errors issuing vmSettings requests (includes all kinds of errors) - self._server_error_count = 0 # Count of server side errors (HTTP status in the 500s) - self._client_error_count = 0 # Count of client side errors (HTTP status in the 400s) - self._timeout_count = 0 # Count of timeouts on vmSettings requests - self._request_failure_count = 0 # Total count of requests that could not be issued (does not include timeouts or requests that were actually issued and failed, for example, with 500 or 400 statuses) - self._next_period = datetime.now() + _VmSettingsErrorReporter._Period - - def report_request(self): - self._request_count += 1 - - def report_error(self, error, category=None): - self._error_count += 1 - - if self._error_count <= _VmSettingsErrorReporter._MaxErrors: - add_event(op=WALAEventOperation.VmSettings, message=error, is_success=False, log_event=False) - - if category == _VmSettingsError.ServerError: - self._server_error_count += 1 - elif category == _VmSettingsError.ClientError: - self._client_error_count += 1 - elif category == _VmSettingsError.Timeout: - self._timeout_count += 1 - elif category == _VmSettingsError.RequestFailed: - self._request_failure_count += 1 - - def report_summary(self): - if datetime.now() >= self._next_period: - summary = { - "requests": self._request_count, - "errors": self._error_count, - "serverErrors": self._server_error_count, - "clientErrors": self._client_error_count, - "timeouts": self._timeout_count, - "failedRequests": self._request_failure_count - } - # always send telemetry, but log errors only - message = json.dumps(summary) - add_event(op=WALAEventOperation.VmSettingsSummary, message=message, is_success=False, log_event=False) - if self._error_count > 0: - logger.info("[VmSettingsSummary] {0}", message) - - self._reset() - - -class VmSettingsNotSupported(TypeError): - pass diff --git a/tests/protocol/test_extensions_goal_state.py b/tests/protocol/test_extensions_goal_state.py index dcc0c2c79a..279d4193fa 100644 --- a/tests/protocol/test_extensions_goal_state.py +++ b/tests/protocol/test_extensions_goal_state.py @@ -31,7 +31,7 @@ def test_extension_goal_state_should_parse_requested_version_properly(self): for manifest in fabric_manifests: self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") - vm_settings_ga_manifests = protocol.client._cached_vm_settings.agent_manifests + vm_settings_ga_manifests = protocol.client._host_plugin._cached_vm_settings.agent_manifests for manifest in vm_settings_ga_manifests: self.assertEqual(manifest.requested_version_string, "0.0.0.0", "Version should be None") @@ -43,6 +43,6 @@ def test_extension_goal_state_should_parse_requested_version_properly(self): for manifest in fabric_manifests: self.assertEqual(manifest.requested_version_string, "9.9.9.10", "Version should be 9.9.9.10") - vm_settings_ga_manifests = protocol.client._cached_vm_settings.agent_manifests + vm_settings_ga_manifests = protocol.client._host_plugin._cached_vm_settings.agent_manifests for manifest in vm_settings_ga_manifests: self.assertEqual(manifest.requested_version_string, "9.9.9.9", "Version should be 9.9.9.9") diff --git a/tests/protocol/test_hostplugin.py b/tests/protocol/test_hostplugin.py index 2a18e48a27..a55ae41bb7 100644 --- a/tests/protocol/test_hostplugin.py +++ b/tests/protocol/test_hostplugin.py @@ -26,13 +26,13 @@ import azurelinuxagent.common.protocol.restapi as restapi import azurelinuxagent.common.protocol.wire as wire from azurelinuxagent.common.errorstate import ErrorState -from azurelinuxagent.common.exception import HttpError, ResourceGoneError +from azurelinuxagent.common.exception import HttpError, ResourceGoneError, ProtocolError from azurelinuxagent.common.future import ustr, httpclient from azurelinuxagent.common.osutil.default import UUID_PATTERN -from azurelinuxagent.common.protocol.hostplugin import API_VERSION +from azurelinuxagent.common.protocol.hostplugin import API_VERSION, _VmSettingsErrorReporter, VmSettingsNotSupported from azurelinuxagent.common.utils import restutil from azurelinuxagent.common.version import AGENT_VERSION, AGENT_NAME -from tests.protocol.mocks import mock_wire_protocol, MockHttpResponse +from tests.protocol.mocks import mock_wire_protocol, mockwiredata, MockHttpResponse from tests.protocol.HttpRequestPredicates import HttpRequestPredicates from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_NO_EXT from tests.tools import AgentTestCase, PY_VERSION_MAJOR, Mock, PropertyMock, patch @@ -862,6 +862,125 @@ def test_should_report(self): self.assertEqual(False, actual) +class TestHostPluginVmSettings(HttpRequestPredicates, AgentTestCase): + def test_it_should_raise_protocol_error_when_the_vm_settings_request_fails(self): + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR, body="TEST ERROR") + return None + + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + protocol.set_http_handlers(http_get_handler=http_get_handler) + with self.assertRaisesRegexCM(ProtocolError, r'GET vmSettings \[correlation ID: .* eTag: .*\]: \[HTTP Failed\] \[500: None].*TEST ERROR.*'): + protocol.client.get_host_plugin().fetch_vm_settings(False) + + @staticmethod + def _fetch_vm_settings_ignoring_errors(protocol): + try: + protocol.client.get_host_plugin().fetch_vm_settings(False) + except (ProtocolError, VmSettingsNotSupported): + pass + + def test_it_should_keep_track_of_errors_in_vm_settings_requests(self): + mock_response = None + + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + if isinstance(mock_response, Exception): + # E0702: Raising NoneType while only classes or instances are allowed (raising-bad-type) - Disabled: we never raise None + raise mock_response # pylint: disable=raising-bad-type + return mock_response + return None + + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: + mock_response = MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR) + self._fetch_vm_settings_ignoring_errors(protocol) + + mock_response = MockHttpResponse(httpclient.BAD_REQUEST) + self._fetch_vm_settings_ignoring_errors(protocol) + self._fetch_vm_settings_ignoring_errors(protocol) + + mock_response = IOError("timed out") + self._fetch_vm_settings_ignoring_errors(protocol) + + mock_response = httpclient.HTTPException() + self._fetch_vm_settings_ignoring_errors(protocol) + self._fetch_vm_settings_ignoring_errors(protocol) + + # force the summary by resetting its period and calling update_goal_state + with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: + mock_response = None # stop producing errors + protocol.client._host_plugin._vm_settings_error_reporter._next_period = datetime.datetime.now() + self._fetch_vm_settings_ignoring_errors(protocol) + summary_text = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettingsSummary"] + + self.assertEqual(1, len(summary_text), "Exactly 1 summary should have been produced. Got: {0} ".format(summary_text)) + + summary = json.loads(summary_text[0]) + + expected = { + "requests": 6 + 2, # two extra calls to update_goal_state (when creating the mock protocol and when forcing the summary) + "errors": 6, + "serverErrors": 1, + "clientErrors": 2, + "timeouts": 1, + "failedRequests": 2 + } + + self.assertEqual(expected, summary, "The count of errors is incorrect") + + def test_it_should_limit_the_number_of_errors_it_reports(self): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.BAD_GATEWAY) # HostGAPlugin returns 502 for internal errors + return None + protocol.set_http_handlers(http_get_handler=http_get_handler) + + with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: + for _ in range(_VmSettingsErrorReporter._MaxErrors + 3): + self._fetch_vm_settings_ignoring_errors(protocol) + + messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + + self.assertEqual(_VmSettingsErrorReporter._MaxErrors, len(messages), "The number of errors reported is not the max allowed (got: {0})".format(messages)) + + # Reset the error reporter and verify that additional errors are reported + protocol.client._host_plugin._vm_settings_error_reporter._next_period = datetime.datetime.now() + self._fetch_vm_settings_ignoring_errors(protocol) # this triggers the reset + + with patch("azurelinuxagent.common.protocol.hostplugin.add_event") as add_event: + for _ in range(3): + self._fetch_vm_settings_ignoring_errors(protocol) + + messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] + + self.assertEqual(3, len(messages), "Expected additional errors to be reported in the next period (got: {0})".format(messages)) + + def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self): + with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: + def http_get_handler(url, *_, **__): + if self.is_host_plugin_vm_settings_request(url): + return MockHttpResponse(httpclient.NOT_FOUND) # HostGAPlugin returns 404 if the API is not supported + return None + protocol.set_http_handlers(http_get_handler=http_get_handler) + + def get_vm_settings_call_count(): + return len([url for url in protocol.get_tracked_urls() if "vmSettings" in url]) + + self._fetch_vm_settings_ignoring_errors(protocol) + self.assertEqual(1, get_vm_settings_call_count(), "There should have been an initial call to vmSettings.") + + protocol.client.update_goal_state() + protocol.client.update_goal_state() + self.assertEqual(1, get_vm_settings_call_count(), "Additional calls to update_goal_state should not have produced extra calls to vmSettings.") + + # reset the vmSettings check period; this should restart the calls to the API + protocol.client._host_plugin._host_plugin_supports_vm_settings_next_check = datetime.datetime.now() + protocol.client.update_goal_state() + self.assertEqual(2, get_vm_settings_call_count(), "A second call to vmSettings was expecting after the check period has elapsed.") + + class MockResponse: def __init__(self, body, status_code, reason=''): self.body = body diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 0d505710f0..198d5c3375 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -39,7 +39,7 @@ from azurelinuxagent.common.protocol.extensions_goal_state_from_vm_settings import ExtensionsGoalStateFromVmSettings from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol from azurelinuxagent.common.protocol.wire import WireProtocol, WireClient, \ - StatusBlob, VMStatus, EXT_CONF_FILE_NAME, _VmSettingsErrorReporter + StatusBlob, VMStatus, EXT_CONF_FILE_NAME from azurelinuxagent.common.telemetryevent import GuestAgentExtensionEventsSchema, \ TelemetryEventParam, TelemetryEvent from azurelinuxagent.common.utils import restutil @@ -1241,42 +1241,6 @@ def http_get_vm_settings(_method, _host, _relative_url, **kwargs): self.assertEqual("GET_VM_SETTINGS_TEST_CONTAINER_ID", request_headers[1][hostplugin._HEADER_CONTAINER_ID], "The retry request did not include the expected header for the ContainerId") self.assertEqual("GET_VM_SETTINGS_TEST_ROLE_CONFIG_NAME", request_headers[1][hostplugin._HEADER_HOST_CONFIG_NAME], "The retry request did not include the expected header for the RoleConfigName") - - @staticmethod - def _update_goal_state_ignoring_errors(protocol): - try: - protocol.client.update_goal_state() - except ProtocolError: - pass - - def test_it_should_limit_the_number_of_errors_it_reports(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - return MockHttpResponse(httpclient.BAD_GATEWAY) # HostGAPlugin returns 502 for internal errors - return None - protocol.set_http_handlers(http_get_handler=http_get_handler) - - with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: - for _ in range(_VmSettingsErrorReporter._MaxErrors + 3): - self._update_goal_state_ignoring_errors(protocol) - - messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] - - self.assertEqual(_VmSettingsErrorReporter._MaxErrors, len(messages), "The number of errors reported is not the max allowed (got: {0})".format(messages)) - - # Reset the error reporter and verify that additional errors are reported - protocol.client._vm_settings_error_reporter._next_period = datetime.now() - self._update_goal_state_ignoring_errors(protocol) # this triggers the reset - - with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: - for _ in range(3): - self._update_goal_state_ignoring_errors(protocol) - - messages = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettings"] - - self.assertEqual(3, len(messages), "Expected additional errors to be reported in the next period (got: {0})".format(messages)) - def test_it_should_use_vm_settings_by_default(self): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: extensions_goal_state = protocol.get_extensions_goal_state() @@ -1303,17 +1267,6 @@ def http_get_handler(url, *_, **__): with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) - def test_it_should_raise_protocol_error_when_the_vm_settings_request_fails(self): - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - return MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR, body="TEST ERROR") - return None - - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - protocol.set_http_handlers(http_get_handler=http_get_handler) - with self.assertRaisesRegexCM(ProtocolError, r'GET vmSettings \[correlation ID: .* eTag: .*\]: \[HTTP Failed\] \[500: None].*TEST ERROR.*'): - protocol.update_goal_state() - def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_supported(self): data_file = mockwiredata.DATA_FILE_VM_SETTINGS.copy() data_file["vm_settings"] = "hostgaplugin/vm_settings-unsupported_version.json" @@ -1321,77 +1274,6 @@ def test_it_should_use_extensions_config_when_the_host_ga_plugin_version_is_not_ with mock_wire_protocol(data_file) as protocol: self._assert_is_extensions_goal_state_from_extensions_config(protocol.get_extensions_goal_state()) - def test_it_should_keep_track_of_errors_in_vm_settings_requests(self): - mock_response = None - - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - if isinstance(mock_response, Exception): - # E0702: Raising NoneType while only classes or instances are allowed (raising-bad-type) - Disabled: we never raise None - raise mock_response # pylint: disable=raising-bad-type - return mock_response - return None - - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS, http_get_handler=http_get_handler) as protocol: - mock_response = MockHttpResponse(httpclient.INTERNAL_SERVER_ERROR) - self._update_goal_state_ignoring_errors(protocol) - - mock_response = MockHttpResponse(httpclient.BAD_REQUEST) - self._update_goal_state_ignoring_errors(protocol) - self._update_goal_state_ignoring_errors(protocol) - - mock_response = IOError("timed out") - self._update_goal_state_ignoring_errors(protocol) - - mock_response = httpclient.HTTPException() - self._update_goal_state_ignoring_errors(protocol) - self._update_goal_state_ignoring_errors(protocol) - - # force the summary by resetting its period and calling update_goal_state - with patch("azurelinuxagent.common.protocol.wire.add_event") as add_event: - mock_response = None # stop producing errors - protocol.client._vm_settings_error_reporter._next_period = datetime.now() - self._update_goal_state_ignoring_errors(protocol) - summary_text = [kwargs["message"] for _, kwargs in add_event.call_args_list if kwargs["op"] == "VmSettingsSummary"] - - self.assertEqual(1, len(summary_text), "Exactly 1 summary should have been produced. Got: {0} ".format(summary_text)) - - summary = json.loads(summary_text[0]) - - expected = { - "requests": 6 + 2, # two extra calls to update_goal_state (when creating the mock protocol and when forcing the summary) - "errors": 6, - "serverErrors": 1, - "clientErrors": 2, - "timeouts": 1, - "failedRequests": 2 - } - - self.assertEqual(expected, summary, "The count of errors is incorrect") - - def test_it_should_stop_issuing_vm_settings_requests_when_api_is_not_supported(self): - with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol: - def http_get_handler(url, *_, **__): - if self.is_host_plugin_vm_settings_request(url): - return MockHttpResponse(httpclient.NOT_FOUND) # HostGAPlugin returns 404 if the API is not supported - return None - protocol.set_http_handlers(http_get_handler=http_get_handler) - - def get_vm_settings_call_count(): - return len([url for url in protocol.get_tracked_urls() if "vmSettings" in url]) - - protocol.client.update_goal_state() - self.assertEqual(1, get_vm_settings_call_count(), "There should have been an initial call to vmSettings.") - - protocol.client.update_goal_state() - protocol.client.update_goal_state() - self.assertEqual(1, get_vm_settings_call_count(), "Additional calls to update_goal_state should not have produced extra calls to vmSettings.") - - # reset the vmSettings check period; this should restart the calls to the API - protocol.client._host_plugin_supports_vm_settings_next_check = datetime.now() - protocol.client.update_goal_state() - self.assertEqual(2, get_vm_settings_call_count(), "A second call to vmSettings was expecting after the check period has elapsed.") - class UpdateHostPluginFromGoalStateTestCase(AgentTestCase): """ From 817bdae4474638344d21a5fcd017e490f6b9fc66 Mon Sep 17 00:00:00 2001 From: narrieta Date: Mon, 24 Jan 2022 16:28:12 -0800 Subject: [PATCH 2/5] remove comment --- azurelinuxagent/common/protocol/wire.py | 1 - 1 file changed, 1 deletion(-) diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index f614e8c815..efce4e6d7c 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -874,7 +874,6 @@ def save_if_not_none(goal_state_property, file_name): text = vm_settings.get_redacted_text() if text != '': self._save_cache(text, VM_SETTINGS_FILE_NAME.format(vm_settings.id)) - # END TODO except Exception as e: logger.warn("Failed to save the goal state to disk: {0}", ustr(e)) From 8bee1b72093a92c13ac600e1ca749195809a4086 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 26 Jan 2022 07:33:11 -0800 Subject: [PATCH 3/5] Remove comment --- azurelinuxagent/common/protocol/hostplugin.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 5da5b73134..113ec7f125 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -554,9 +554,7 @@ def report_summary(self): "timeouts": self._timeout_count, "failedRequests": self._request_failure_count } - # always send telemetry, but log errors only - message = json.dumps(summary) - add_event(op=WALAEventOperation.VmSettingsSummary, message=message, is_success=False, log_event=False) + add_event(op=WALAEventOperation.VmSettingsSummary, message=json.dumps(summary), is_success=False, log_event=False) if self._error_count > 0: logger.info("[VmSettingsSummary] {0}", message) From 422303468f7afa3e7e96cd972d2784dc9be6c2a0 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 26 Jan 2022 09:49:33 -0800 Subject: [PATCH 4/5] fix variable --- azurelinuxagent/common/protocol/hostplugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index 113ec7f125..d5087a3d52 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -554,7 +554,8 @@ def report_summary(self): "timeouts": self._timeout_count, "failedRequests": self._request_failure_count } - add_event(op=WALAEventOperation.VmSettingsSummary, message=json.dumps(summary), is_success=False, log_event=False) + message = json.dumps(summary) + add_event(op=WALAEventOperation.VmSettingsSummary, message=message, is_success=False, log_event=False) if self._error_count > 0: logger.info("[VmSettingsSummary] {0}", message) From 9a353b15f2bf3df4496c575beea0a56c611225c4 Mon Sep 17 00:00:00 2001 From: Norberto Arrieta Date: Wed, 26 Jan 2022 10:59:40 -0800 Subject: [PATCH 5/5] fix comment --- azurelinuxagent/common/protocol/hostplugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azurelinuxagent/common/protocol/hostplugin.py b/azurelinuxagent/common/protocol/hostplugin.py index d5087a3d52..9b6ab0762e 100644 --- a/azurelinuxagent/common/protocol/hostplugin.py +++ b/azurelinuxagent/common/protocol/hostplugin.py @@ -396,7 +396,7 @@ def fetch_vm_settings(self, force_update): Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and a boolean indicating if they are an updated (True) or a cached value (False). - Raises TypeError if the HostGAPlugin does not support the vmSettings API, ResourceGoneError if the container ID and roleconfig name + Raises VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API, ResourceGoneError if the container ID and roleconfig name need to be refreshed, or ProtocolError if the request fails for any other reason (e.g. not supported, time out, server error). """ def raise_not_supported(reset_state=False):