Skip to content

Commit

Permalink
Improve logging of goal state processing (#2523)
Browse files Browse the repository at this point in the history
* Reorganize the history directory

* pylint

* python2

* write agent status directly to history

* Improve logging for goal state processing

* fix incarnation

* fix message

* Update test

* fix messages

* add return value

Co-authored-by: narrieta <narrieta>
  • Loading branch information
narrieta authored Mar 2, 2022
1 parent 6445aaa commit 415882d
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 63 deletions.
24 changes: 22 additions & 2 deletions azurelinuxagent/common/protocol/extensions_goal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ class GoalStateChannel(object):
Empty = "Empty"


class GoalStateSource(object):
Fabric = "Fabric"
FastTrack = "FastTrack"
Empty = "Empty"


class ExtensionsGoalState(object):
"""
ExtensionsGoalState represents the extensions information in the goal state; that information can originate from
Expand Down Expand Up @@ -57,7 +63,17 @@ def created_on_timestamp(self):
raise NotImplementedError()

@property
def source_channel(self):
def channel(self):
"""
Whether the goal state was retrieved from the WireServer or the HostGAPlugin
"""
raise NotImplementedError()

@property
def source(self):
"""
Whether the goal state originated from Fabric or Fast Track
"""
raise NotImplementedError()

@property
Expand Down Expand Up @@ -147,9 +163,13 @@ def created_on_timestamp(self):
return datetime.datetime.min

@property
def source_channel(self):
def channel(self):
return GoalStateChannel.Empty

@property
def source(self):
return GoalStateSource.Empty

@property
def status_upload_blob(self):
return None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ def create_from_extensions_config(incarnation, xml_text, wire_client):
return ExtensionsGoalStateFromExtensionsConfig(incarnation, xml_text, wire_client)

@staticmethod
def create_from_vm_settings(etag, json_text):
return ExtensionsGoalStateFromVmSettings(etag, json_text)
def create_from_vm_settings(etag, json_text, correlation_id):
return ExtensionsGoalStateFromVmSettings(etag, json_text, correlation_id)

Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.exception import ExtensionsConfigError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, GoalStateSource
from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, VMAgentManifest, ExtensionState, InVMGoalStateMetaData
from azurelinuxagent.common.utils.textutil import parse_doc, parse_json, findall, find, findtext, getattrib, gettext, format_exception, \
is_str_none_or_whitespace, is_str_empty
Expand Down Expand Up @@ -149,9 +149,13 @@ def created_on_timestamp(self):
return self._created_on_timestamp

@property
def source_channel(self):
def channel(self):
return GoalStateChannel.WireServer

@property
def source(self):
return GoalStateSource.Fabric

@property
def status_upload_blob(self):
return self._status_upload_blob
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@
class ExtensionsGoalStateFromVmSettings(ExtensionsGoalState):
_MINIMUM_TIMESTAMP = datetime.datetime(1900, 1, 1, 0, 0) # min value accepted by datetime.strftime()

def __init__(self, etag, json_text):
def __init__(self, etag, json_text, correlation_id):
super(ExtensionsGoalStateFromVmSettings, self).__init__()
self._id = "etag_{0}".format(etag)
self._etag = etag
self._fetch_correlation_id = correlation_id
self._text = json_text
self._host_ga_plugin_version = FlexibleVersion('0.0.0.0')
self._schema_version = FlexibleVersion('0.0.0.0')
Expand Down Expand Up @@ -74,28 +75,38 @@ def schema_version(self):

@property
def activity_id(self):
"""
The CRP activity id
"""
return self._activity_id

@property
def correlation_id(self):
"""
The correlation id for the CRP operation
"""
return self._correlation_id

@property
def fetch_correlation_id(self):
"""
The correlation id for the fetch operation (i.e. the call to the HostGAPlugin vmSettings API)
"""
return self._fetch_correlation_id

@property
def created_on_timestamp(self):
"""
Timestamp assigned by the CRP (time at which the Fast Track goal state was created)
Timestamp assigned by the CRP (time at which the goal state was created)
"""
return self._created_on_timestamp

@property
def source_channel(self):
def channel(self):
return GoalStateChannel.HostGAPlugin

@property
def source(self):
"""
Whether the goal state originated from Fabric or Fast Track
"""
return self._source

@property
Expand Down
3 changes: 3 additions & 0 deletions azurelinuxagent/common/protocol/goal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def update(self, force_update=False):
timestamp = create_timestamp()
vm_settings, vm_settings_updated = self._fetch_vm_settings(force_update=force_update)
if vm_settings_updated:
logger.info("Fetched new vmSettings [correlation ID: {0} eTag: {1} source: {2}]", vm_settings.fetch_correlation_id, vm_settings.etag, vm_settings.source)
self._history = GoalStateHistory(timestamp, vm_settings.etag)
self._extensions_goal_state = vm_settings
self._history.save_vm_settings(vm_settings.get_redacted_text())
Expand Down Expand Up @@ -244,6 +245,8 @@ def _fetch_extended_goal_state(self, xml_text, xml_doc, force_vm_settings_update
vm_settings, vm_settings_updated = self._fetch_vm_settings(force_update=force_vm_settings_update)

if vm_settings is not None:
new = " new " if vm_settings_updated else " "
logger.info("Fetched{0}vmSettings [correlation ID: {1} eTag: {2} source: {3}]", new, vm_settings.fetch_correlation_id, vm_settings.etag, vm_settings.source)
self._extensions_goal_state = vm_settings
if vm_settings_updated:
self._history.save_vm_settings(vm_settings.get_redacted_text())
Expand Down
3 changes: 1 addition & 2 deletions azurelinuxagent/common/protocol/hostplugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def format_message(msg):

response_content = ustr(response.read(), encoding='utf-8')

vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings(response_etag, response_content)
vm_settings = ExtensionsGoalStateFactory.create_from_vm_settings(response_etag, response_content, correlation_id)

# log the HostGAPlugin version
if vm_settings.host_ga_plugin_version != self._host_plugin_version:
Expand All @@ -485,7 +485,6 @@ def format_message(msg):
if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.117"):
raise_not_supported(reset_state=True)

logger.info("Fetched new vmSettings [correlation ID: {0} New eTag: {1}]", correlation_id, vm_settings.etag)
self._host_plugin_supports_vm_settings = True
self._cached_vm_settings = vm_settings
return vm_settings, True
Expand Down
65 changes: 27 additions & 38 deletions azurelinuxagent/ga/exthandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ def __init__(self, protocol):
# The GoalState Aggregate status needs to report the last status of the GoalState. Since we only process
# extensions on incarnation change, we need to maintain its state.
# Setting the status to None here. This would be overridden as soon as the first GoalState is processed
# (once self._extension_processing_allowed() is True).
self.__gs_aggregate_status = None

self.report_status_error_state = ErrorState()
Expand All @@ -294,53 +293,43 @@ def __last_gs_unsupported(self):
self.__gs_aggregate_status.code == GoalStateAggregateStatusCodes.GoalStateUnsupportedRequiredFeatures

def run(self):
etag, activity_id, correlation_id, gs_creation_time = None, None, None, None

try:
extensions_goal_state = self.protocol.get_goal_state().extensions_goal_state
gs = self.protocol.get_goal_state()
egs = gs.extensions_goal_state

# self.ext_handlers and etag need to be initialized first, since status reporting depends on them; also
# self.ext_handlers needs to be initialized before returning, since status reporting depends on it; also
# we make a deep copy of the extensions, since changes are made to self.ext_handlers while processing the extensions
self.ext_handlers = copy.deepcopy(extensions_goal_state.extensions)
etag = self.protocol.client.get_goal_state().incarnation
self.ext_handlers = copy.deepcopy(egs.extensions)

if not self._extension_processing_allowed():
return

gs_creation_time = extensions_goal_state.created_on_timestamp
activity_id = extensions_goal_state.activity_id
correlation_id = extensions_goal_state.correlation_id
utc_start = datetime.datetime.utcnow()
error = None
message = "ProcessExtensionsGoalState started [{0} channel: {1} source: {2} activity: {3} correlation {4} created: {5}]".format(
egs.id, egs.channel, egs.source, egs.activity_id, egs.correlation_id, egs.created_on_timestamp)
logger.info(message)
add_event(op=WALAEventOperation.ExtensionProcessing, message=message)

try:
self.__process_and_handle_extensions(gs.incarnation) # TODO: review the use of incarnation
self._cleanup_outdated_handlers()
except Exception as e:
error = u"Error processing extensions:{0}".format(textutil.format_exception(e))
finally:
duration = elapsed_milliseconds(utc_start)
if error is None:
message = 'ProcessExtensionsGoalState completed [{0} {1} ms]'.format(egs.id, duration)
logger.info(message)
else:
message = 'ProcessExtensionsGoalState failed [{0} {1} ms]\n{2}'.format(egs.id, duration, error)
logger.error(message)
add_event(op=WALAEventOperation.ExtensionProcessing, is_success=(error is None), message=message, log_event=False, duration=duration)

except Exception as error:
msg = u"ProcessExtensionsInGoalState - Exception processing extension handlers:{0}".format(textutil.format_exception(error))
logger.warn(msg)
logger.error(msg)
add_event(op=WALAEventOperation.ExtensionProcessing, is_success=False, message=msg, log_event=False)
return

def goal_state_debug_info(duration=None):
if duration is None:
return "[Incarnation: {0}; Activity Id: {1}; Correlation Id: {2}; GS Creation Time: {3}]".format(etag, activity_id, correlation_id, gs_creation_time)
else:
return "[Incarnation: {0}; {1} ms; Activity Id: {2}; Correlation Id: {3}; GS Creation Time: {4}]".format(etag, duration, activity_id, correlation_id, gs_creation_time)

utc_start = datetime.datetime.utcnow()
error = None
message = "ProcessExtensionsInGoalState started {0}".format(goal_state_debug_info())
logger.info(message)
add_event(op=WALAEventOperation.ExtensionProcessing, message=message)
try:
self.__process_and_handle_extensions(etag)
self._cleanup_outdated_handlers()
except Exception as e:
error = u"ProcessExtensionsInGoalState - Exception processing extension handlers:{0}".format(textutil.format_exception(e))
finally:
duration = elapsed_milliseconds(utc_start)
if error is None:
message = 'ProcessExtensionsInGoalState completed {0}'.format(goal_state_debug_info(duration=duration))
logger.info(message)
else:
message = 'ProcessExtensionsInGoalState failed {0}\nError:{1}'.format(goal_state_debug_info(duration=duration), error)
logger.warn(message)
add_event(op=WALAEventOperation.ExtensionProcessing, is_success=(error is None), message=message, log_event=False, duration=duration)

def __get_unsupported_features(self):
required_features = self.protocol.get_goal_state().extensions_goal_state.required_features
Expand Down
2 changes: 1 addition & 1 deletion azurelinuxagent/ga/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ def _report_status(self, exthandlers_handler):
if self._goal_state is None:
supports_fast_track = False
else:
supports_fast_track = self._goal_state.extensions_goal_state.source_channel == GoalStateChannel.HostGAPlugin
supports_fast_track = self._goal_state.extensions_goal_state.channel == GoalStateChannel.HostGAPlugin

vm_status = exthandlers_handler.report_ext_handlers_status(
incarnation_changed=self._processing_new_extensions_goal_state(),
Expand Down
11 changes: 3 additions & 8 deletions dcr/scenario_utils/agent_log_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,9 @@

AGENT_LOG_FILE = '/var/log/waagent.log'

# Examples:
# ProcessGoalState completed [Incarnation: 12; 23 ms]
# ProcessGoalState completed [Incarnation: 12; 23 ms; Activity Id: 555e551c-600e-4fb4-90ba-8ab8ec28eccc]
# ProcessGoalState completed [Incarnation: 12; 23 ms; Correlation Id: 555e551c-600e-4fb4-90ba-8ab8ec28eccc]
# ProcessGoalState completed [Incarnation: 12; 23 ms; GS Creation Time: 2020-11-09T17:48:50.000000Z]
GOAL_STATE_COMPLETED = r"ProcessExtensionsInGoalState completed\s\[Incarnation:\s(?P<incarnation>\d+);\s(?P<duration>\d+)\sms" \
r"(;\sActivity Id:\s(?P<activity_id>\S+))?(;\sCorrelation Id:\s(?P<correlation_id>\S+))?" \
r"(;\sGS Creation Time:\s(?P<gs_creation_time>\S+))?\]"
# Example:
# ProcessExtensionsGoalState completed [etag_2824367392948713696 4073 ms]
GOAL_STATE_COMPLETED = r"ProcessExtensionsGoalState completed\s\[(?P<id>[a-z_\d]+)\s(?P<duration>\d+)\sms\]"

# The format of the log has changed over time and the current log may include records from different sources. Most records are single-line, but some of them
# can span across multiple lines. We will assume records always start with a line similar to the examples below; any other lines will be assumed to be part
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,4 @@ def test_its_source_channel_should_be_wire_server(self):
with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol:
extensions_goal_state = protocol.get_goal_state().extensions_goal_state

self.assertEqual(GoalStateChannel.WireServer, extensions_goal_state.source_channel, "The source_channel is incorrect")
self.assertEqual(GoalStateChannel.WireServer, extensions_goal_state.channel, "The channel is incorrect")
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_its_source_channel_should_be_host_ga_plugin(self, _):
with mock_wire_protocol(mockwiredata.DATA_FILE_VM_SETTINGS) as protocol:
extensions_goal_state = protocol.get_goal_state().extensions_goal_state

self.assertEqual(GoalStateChannel.HostGAPlugin, extensions_goal_state.source_channel, "The source_channel is incorrect")
self.assertEqual(GoalStateChannel.HostGAPlugin, extensions_goal_state.channel, "The channel is incorrect")

class CaseFoldedDictionaryTestCase(AgentTestCase):
def test_it_should_retrieve_items_ignoring_case(self):
Expand Down

0 comments on commit 415882d

Please sign in to comment.