Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing extension sequencing in azure linux agent #1298

Merged
merged 14 commits into from
Oct 17, 2018
Merged
11 changes: 7 additions & 4 deletions azurelinuxagent/common/protocol/restapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,18 +151,19 @@ def __init__(self,
sequenceNumber=None,
publicSettings=None,
protectedSettings=None,
certificateThumbprint=None):
certificateThumbprint=None,
dependencyLevel=0):
self.name = name
self.sequenceNumber = sequenceNumber
self.publicSettings = publicSettings
self.protectedSettings = protectedSettings
self.certificateThumbprint = certificateThumbprint
self.dependencyLevel = dependencyLevel


class ExtHandlerProperties(DataContract):
def __init__(self):
self.version = None
self.dependencyLevel = None
self.state = None
self.extensions = DataContractList(Extension)

Expand All @@ -179,9 +180,11 @@ def __init__(self, name=None):
self.versionUris = DataContractList(ExtHandlerVersionUri)

def sort_key(self):
level = self.properties.dependencyLevel
if level is None:
levels = [e.dependencyLevel for e in self.properties.extensions]
if len(levels) == 0:
level = 0
else:
level = min(levels)
# Process uninstall or disabled before enabled, in reverse order
# remap 0 to -1, 1 to -2, 2 to -3, etc
if self.properties.state != u"enabled":
Expand Down
15 changes: 10 additions & 5 deletions azurelinuxagent/common/protocol/wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -1591,11 +1591,6 @@ def parse_plugin(self, plugin):
ext_handler.properties.version = getattrib(plugin, "version")
ext_handler.properties.state = getattrib(plugin, "state")

try:
ext_handler.properties.dependencyLevel = int(getattrib(plugin, "dependencyLevel"))
except ValueError:
ext_handler.properties.dependencyLevel = 0

sirajudeens marked this conversation as resolved.
Show resolved Hide resolved
location = getattrib(plugin, "location")
failover_location = getattrib(plugin, "failoverlocation")
for uri in [location, failover_location]:
Expand Down Expand Up @@ -1627,6 +1622,15 @@ def parse_plugin_settings(self, ext_handler, plugin_settings):
logger.error("Invalid extension settings")
return

depends_on_level = 0
depends_on_node = find(settings[0], "DependsOn")
if depends_on_node != None:
try:
depends_on_level = int(getattrib(depends_on_node, "dependencyLevel"))
except (ValueError, TypeError):
logger.warn("Could not parse dependencyLevel for handler {0}. Setting it to 0".format(name))
depends_on_level = 0

for plugin_settings_list in runtime_settings["runtimeSettings"]:
handler_settings = plugin_settings_list["handlerSettings"]
ext = Extension()
Expand All @@ -1636,6 +1640,7 @@ def parse_plugin_settings(self, ext_handler, plugin_settings):
ext.sequenceNumber = seqNo
ext.publicSettings = handler_settings.get("publicSettings")
ext.protectedSettings = handler_settings.get("protectedSettings")
ext.dependencyLevel = depends_on_level
thumbprint = handler_settings.get(
"protectedSettingsCertThumbprint")
ext.certificateThumbprint = thumbprint
Expand Down
91 changes: 86 additions & 5 deletions azurelinuxagent/ga/exthandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@
HANDLER_ENVIRONMENT_VERSION = 1.0

EXTENSION_STATUS_ERROR = 'error'
EXTENSION_STATUS_SUCCESS = 'success'
VALID_EXTENSION_STATUS = ['transitioning', 'error', 'success', 'warning']
EXTENSION_TERMINAL_STATUSES = ['error', 'success']

VALID_HANDLER_STATUS = ['Ready', 'NotReady', "Installing", "Unresponsive"]

Expand All @@ -66,6 +68,7 @@
HANDLER_PKG_EXT = ".zip"
HANDLER_PKG_PATTERN = re.compile(HANDLER_PATTERN + r"\.zip$", re.IGNORECASE)

DEFAULT_EXT_TIMEOUT_MINUTES = 90

def validate_has_key(obj, key, fullname):
if key not in obj:
Expand Down Expand Up @@ -120,9 +123,7 @@ def parse_ext_status(ext_status, data):
ext_status.code = status_data.get('code', 0)
formatted_message = status_data.get('formattedMessage')
ext_status.message = parse_formatted_message(formatted_message)
substatus_list = status_data.get('substatus')
if substatus_list is None:
return
substatus_list = status_data.get('substatus', [])
for substatus in substatus_list:
if substatus is not None:
ext_status.substatusList.append(parse_ext_substatus(substatus))
Expand Down Expand Up @@ -310,11 +311,62 @@ def handle_ext_handlers(self, etag=None):
logger.info("Extension handling is on hold")
return

wait_until = datetime.datetime.utcnow() + datetime.timedelta(minutes=DEFAULT_EXT_TIMEOUT_MINUTES)
max_dep_level = max([handler.sort_key() for handler in self.ext_handlers.extHandlers])

self.ext_handlers.extHandlers.sort(key=operator.methodcaller('sort_key'))
for ext_handler in self.ext_handlers.extHandlers:
# TODO: handle install in sequence, enable in parallel
self.handle_ext_handler(ext_handler, etag)


# Wait for the extension installation until it is handled.
# This is done for the install and enable. Not for the uninstallation.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think this comment is entirely accurate.

"install and enable" refer to transition steps that handle_ext_handler performs to bring an extension from the uninstalled state to the disabled/installed state and from the disabled/installed state to the enabled state. handle_ext_handler takes care of these steps. sort_key is based on the goal state for the extension ("enabled" or not "enabled" which is either "disabled" or "uninstall"). If an extension is uninstalled and the goal state is "disabled", then handle_ext_handler will install but not enable the extension.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The initial ask for a comment similar to this was to clarify the intention of the "dep_level >= 0" check... we wait only when the extension was installed or enabled. As it is currently phrased it seems like it is not helping so feel free to remove it or change it accordingly.

# If handled successfully, proceed with the current handler.
# Otherwise, skip the rest of the extension installation.
dep_level = ext_handler.sort_key()
if dep_level >= 0 and dep_level < max_dep_level:
if not self.wait_for_handler_successful_completion(ext_handler, wait_until):
logger.warn("An extension failed or timed out, will skip processing the rest of the extensions")
break

def wait_for_handler_successful_completion(self, ext_handler, wait_until):
'''
Check the status of the extension being handled.
Wait until it has a terminal state or times out.
Return True if it is handled successfully. False if not.
'''
handler_i = ExtHandlerInstance(ext_handler, self.protocol)
for ext in ext_handler.properties.extensions:
ext_completed, status = handler_i.is_ext_handling_complete(ext)

# Keep polling for the extension status until it becomes success or times out
while not ext_completed and datetime.datetime.utcnow() <= wait_until:
time.sleep(5)
ext_completed, status = handler_i.is_ext_handling_complete(ext)

# In case of timeout or terminal error state, we log it and return false
# so that the extensions waiting on this one can be skipped processing
if datetime.datetime.utcnow() > wait_until:
msg = "Extension {0} did not reach a terminal state within the allowed timeout. Last status was {1}".format(ext.name, status)
logger.warn(msg)
add_event(AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.ExtensionProcessing,
is_success=False,
message=msg)
return False

if status != EXTENSION_STATUS_SUCCESS:
msg = "Extension {0} did not succeed. Status was {1}".format(ext.name, status)
logger.warn(msg)
add_event(AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.ExtensionProcessing,
is_success=False,
message=msg)
return False

return True

def handle_ext_handler(self, ext_handler, etag):
ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol)

Expand Down Expand Up @@ -871,6 +923,35 @@ def collect_ext_status(self, ext):

return ext_status

def get_ext_handling_status(self, ext):
seq_no, ext_status_file = self.get_status_file_path(ext)
if seq_no < 0 or ext_status_file is None:
return None

# Missing status file is considered a non-terminal state here
# so that extension sequencing can wait until it becomes existing
uiri marked this conversation as resolved.
Show resolved Hide resolved
if not os.path.exists(ext_status_file):
status = "warning"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should change this status... this gives the impression that the extension is reporting a warning when in actuality there is no status file. I would suggest return something like "status file not found" or similar

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This value is verified against a set of valid values. I think we should adhere to those values. Also it is clearly commented above that condition in which it can occur.

Copy link
Member

@narrieta narrieta Sep 17, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My concern is that if we reach this path, then the status in this message:

"Extension {0} did not reach a terminal state within the allowed timeout. Last status was {1}".

will be "warning". When looking at the log, it'll seem like the extension actually reported a warning, when in actuality it didn't report any status. That may be confusing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to pass up the entire status, not just the status field, to give a more complete log message and add a __str__ to ExtStatus so that it logs something sensible

else:
ext_status = self.collect_ext_status(ext)
status = ext_status.status if ext_status is not None else None

return status

def is_ext_handling_complete(self, ext):
status = self.get_ext_handling_status(ext)

# when seq < 0 (i.e. no new user settings), the handling is complete and return None status
if status is None:
return (True, None)

# If not in terminal state, it is incomplete
if status not in EXTENSION_TERMINAL_STATUSES:
return (False, status)

# Extension completed, return its status
return (True, status)

def report_ext_status(self):
active_exts = []
# TODO Refactor or remove this common code pattern (for each extension subordinate to an ext_handler, do X).
Expand Down
9 changes: 7 additions & 2 deletions tests/data/wire/ext_conf_sequencing.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,19 @@
</GAFamilies>
</GuestAgentExtension>
<Plugins>
<Plugin name="OSTCExtensions.ExampleHandlerLinux" version="1.0.0" location="http://rdfepirv2hknprdstr03.blob.core.windows.net/b01058962be54ceca550a390fa5ff064/Microsoft.OSTCExtensions_ExampleHandlerLinux_asiaeast_manifest.xml" config="" state="enabled" autoUpgrade="false" failoverlocation="http://rdfepirv2hknprdstr04.blob.core.windows.net/b01058962be54ceca550a390fa5ff064/Microsoft.OSTCExtensions_ExampleHandlerLinux_asiaeast_manifest.xml" runAsStartupTask="false" isJson="true" dependencyLevel="2"/>
<Plugin name="OSTCExtensions.OtherExampleHandlerLinux" version="1.0.0" location="http://rdfepirv2hknprdstr03.blob.core.windows.net/b01058962be54ceca550a390fa5ff064/Microsoft.OSTCExtensions_OtherExampleHandlerLinux_asiaeast_manifest.xml" config="" state="enabled" autoUpgrade="false" failoverlocation="http://rdfepirv2hknprdstr04.blob.core.windows.net/b01058962be54ceca550a390fa5ff064/Microsoft.OSTCExtensions_OtherExampleHandlerLinux_asiaeast_manifest.xml" runAsStartupTask="false" isJson="true" dependencyLevel="1" />
<Plugin name="OSTCExtensions.ExampleHandlerLinux" version="1.0.0" location="http://rdfepirv2hknprdstr03.blob.core.windows.net/b01058962be54ceca550a390fa5ff064/Microsoft.OSTCExtensions_ExampleHandlerLinux_asiaeast_manifest.xml" config="" state="enabled" autoUpgrade="false" failoverlocation="http://rdfepirv2hknprdstr04.blob.core.windows.net/b01058962be54ceca550a390fa5ff064/Microsoft.OSTCExtensions_ExampleHandlerLinux_asiaeast_manifest.xml" runAsStartupTask="false" isJson="true" />
<Plugin name="OSTCExtensions.OtherExampleHandlerLinux" version="1.0.0" location="http://rdfepirv2hknprdstr03.blob.core.windows.net/b01058962be54ceca550a390fa5ff064/Microsoft.OSTCExtensions_OtherExampleHandlerLinux_asiaeast_manifest.xml" config="" state="enabled" autoUpgrade="false" failoverlocation="http://rdfepirv2hknprdstr04.blob.core.windows.net/b01058962be54ceca550a390fa5ff064/Microsoft.OSTCExtensions_OtherExampleHandlerLinux_asiaeast_manifest.xml" runAsStartupTask="false" isJson="true" />
</Plugins>
<PluginSettings>
<Plugin name="OSTCExtensions.ExampleHandlerLinux" version="1.0.0">
<DependsOn dependencyLevel="2">
<DependsOnExtension handler="OSTCExtensions.OtherExampleHandlerLinux" />
</DependsOn>
<RuntimeSettings seqNo="0">{"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]}</RuntimeSettings>
</Plugin>
<Plugin name="OSTCExtensions.OtherExampleHandlerLinux" version="1.0.0">
<DependsOn dependencyLevel="1">
</DependsOn>
<RuntimeSettings seqNo="0">{"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]}</RuntimeSettings>
</Plugin>
</PluginSettings>
Expand Down
Loading