Skip to content

Commit

Permalink
Merge branch 'develop' into fix-rsm-update
Browse files Browse the repository at this point in the history
  • Loading branch information
nagworld9 authored Jan 16, 2024
2 parents 1dbe5c1 + c24a9b6 commit 3813f64
Show file tree
Hide file tree
Showing 10 changed files with 85 additions and 20 deletions.
32 changes: 30 additions & 2 deletions azurelinuxagent/common/osutil/redhat.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,39 @@ def set_hostname(self, hostname):
logger.warn("[{0}] failed, attempting fallback".format(' '.join(hostnamectl_cmd)))
DefaultOSUtil.set_hostname(self, hostname)

def get_nm_controlled(self):
ifname = self.get_if_name()
filepath = "/etc/sysconfig/network-scripts/ifcfg-{0}".format(ifname)
nm_controlled_cmd = ['grep', 'NM_CONTROLLED=', filepath]
try:
result = shellutil.run_command(nm_controlled_cmd, log_error=False, encode_output=False).rstrip()

if result and len(result.split('=')) > 1:
# Remove trailing white space and ' or " characters
value = result.split('=')[1].replace("'", '').replace('"', '').rstrip()
if value == "n" or value == "no":
return False
except shellutil.CommandError as e:
# Command might fail because NM_CONTROLLED value is not in interface config file (exit code 1).
# Log warning for any other exit code.
# NM_CONTROLLED=y by default if not specified.
if e.returncode != 1:
logger.warn("[{0}] failed: {1}.\nAgent will continue to publish hostname without NetworkManager restart".format(' '.join(nm_controlled_cmd), e))
except Exception as e:
logger.warn("Unexpected error while retrieving value of NM_CONTROLLED in {0}: {1}.\nAgent will continue to publish hostname without NetworkManager restart".format(filepath, e))

return True

def publish_hostname(self, hostname):
"""
Restart NetworkManager first before publishing hostname
Restart NetworkManager first before publishing hostname, only if the network interface is not controlled by the
NetworkManager service (as determined by NM_CONTROLLED=n in the interface configuration). If the NetworkManager
service is restarted before the agent publishes the hostname, and NM_controlled=y, a race condition may happen
between the NetworkManager service and the Guest Agent making changes to the network interface configuration
simultaneously.
"""
shellutil.run("service NetworkManager restart")
if not self.get_nm_controlled():
shellutil.run("service NetworkManager restart")
super(RedhatOSUtil, self).publish_hostname(hostname)

def register_agent_service(self):
Expand Down
12 changes: 7 additions & 5 deletions azurelinuxagent/common/utils/fileutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,15 @@ def get_line_startingwith(prefix, filepath):
return None


def mkdir(dirpath, mode=None, owner=None):
def mkdir(dirpath, mode=None, owner=None, reset_mode_and_owner=True):
if not os.path.isdir(dirpath):
os.makedirs(dirpath)
if mode is not None:
chmod(dirpath, mode)
if owner is not None:
chowner(dirpath, owner)
reset_mode_and_owner = True # force setting the mode and owner
if reset_mode_and_owner:
if mode is not None:
chmod(dirpath, mode)
if owner is not None:
chowner(dirpath, owner)


def chowner(path, owner):
Expand Down
2 changes: 1 addition & 1 deletion azurelinuxagent/ga/exthandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,7 @@ def get_extension_full_name(self, extension=None):

def __set_command_execution_log(self, extension, execution_log_max_size):
try:
fileutil.mkdir(self.get_log_dir(), mode=0o755)
fileutil.mkdir(self.get_log_dir(), mode=0o755, reset_mode_and_owner=False)
except IOError as e:
self.logger.error(u"Failed to create extension log dir: {0}", e)
else:
Expand Down
6 changes: 5 additions & 1 deletion azurelinuxagent/pa/deprovision/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,11 @@ def del_lib_dir_files(self, warnings, actions): # pylint: disable=W0613
'partition',
'Protocol',
'SharedConfig.xml',
'WireServerEndpoint'
'WireServerEndpoint',
'published_hostname',
'fast_track.json',
'initial_goal_state',
'rsm_update.json'
]
known_files_glob = [
'Extensions.*.xml',
Expand Down
8 changes: 5 additions & 3 deletions azurelinuxagent/pa/provision/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,11 @@ def check_provisioned_file(self):
s = fileutil.read_file(ProvisionHandler.provisioned_file_path()).strip()
if not self.osutil.is_current_instance_id(s):
if len(s) > 0:
logger.warn("VM is provisioned, "
"but the VM unique identifier has changed -- "
"clearing cached state")
msg = "VM is provisioned, but the VM unique identifier has changed. This indicates the VM may be " \
"created from an image that was not properly deprovisioned or generalized, which can result in " \
"unexpected behavior from the guest agent -- clearing cached state"
logger.warn(msg)
self.report_event(msg)
from azurelinuxagent.pa.deprovision \
import get_deprovision_handler
deprovision_handler = get_deprovision_handler()
Expand Down
22 changes: 22 additions & 0 deletions tests/ga/test_exthandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,28 @@ def test_command_extension_log_truncates_correctly(self, mock_log_dir):
with open(log_file_path) as truncated_log_file:
self.assertEqual(truncated_log_file.read(), "{second_line}\n".format(second_line=second_line))

def test_set_logger_should_not_reset_the_mode_of_the_log_directory(self):
ext_log_dir = os.path.join(self.tmp_dir, "log_directory")

with patch("azurelinuxagent.common.conf.get_ext_log_dir", return_value=ext_log_dir):
ext_handler = Extension(name='foo')
ext_handler.version = "1.2.3"
ext_handler_instance = ExtHandlerInstance(ext_handler=ext_handler, protocol=None)
ext_handler_log_dir = os.path.join(ext_log_dir, ext_handler.name)

# Double-check the initial mode
get_mode = lambda f: os.stat(f).st_mode & 0o777
mode = get_mode(ext_handler_log_dir)
if mode != 0o755:
raise Exception("The initial mode of the log directory should be 0o755, got 0{0:o}".format(mode))

new_mode = 0o700
os.chmod(ext_handler_log_dir, new_mode)
ext_handler_instance.set_logger()

mode = get_mode(ext_handler_log_dir)
self.assertEqual(new_mode, mode, "The mode of the log directory should not have changed")

def test_it_should_report_the_message_in_the_hearbeat(self):
def heartbeat_with_message():
return {'code': 0, 'formattedMessage': {'lang': 'en-US', 'message': 'This is a heartbeat message'},
Expand Down
3 changes: 3 additions & 0 deletions tests_e2e/orchestrator/lib/agent_junit.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ def _received_message(self, message: MessageBase) -> None:
if "Unexpected error in AgentTestSuite" in message.message:
# Ignore these errors, they are already reported as AgentTestResultMessages
return
if "TestFailedException" in message.message:
# Ignore these errors, they are already reported as test failures
return
# Change the suite name to "_Runbook_" for LISA messages in order to separate them
# from actual test results.
message.suite_full_name = "_Runbook_"
Expand Down
10 changes: 5 additions & 5 deletions tests_e2e/pipeline/pipeline-cleanup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ parameters:
- name: older_than
displayName: Delete resources older than (use the syntax of the "date -d" command)
type: string
default: 1 day ago
default: 12 hours ago

- name: service_connections
type: object
Expand Down Expand Up @@ -45,12 +45,12 @@ steps:
rest_endpoint=$(az cloud show --query "endpoints.resourceManager" -o tsv)
pattern="${{ parameters.name_pattern }}"
az rest --method GET \
--url "${rest_endpoint}/subscriptions/${subscription_id}/resourcegroups" \
--url-parameters api-version=2021-04-01 \$expand=createdTime \
--output json \
--query value \
| jq --arg date "$date" '.[] | select (.createdTime < $date).name' \
| grep -i '${{ parameters.name_pattern }}' \
| xargs -l -t -r az group delete --no-wait -y -n \
|| echo "No resource groups found to delete"
| jq --arg date "$date" '.[] | select (.createdTime < $date).name | match("'${pattern}'"; "g").string' \
| xargs -l -t -r az group delete --subscription "${subscription_id}" --no-wait -y -n
6 changes: 5 additions & 1 deletion tests_e2e/pipeline/scripts/setup-agent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#

#
# Script to setup the agent VM for the Azure Pipelines agent pool; it simply installs the Azure CLI and the Docker Engine.
# Script to setup the agent VM for the Azure Pipelines agent pool; it simply installs the Azure CLI, the Docker Engine and jq.
#

set -euox pipefail
Expand Down Expand Up @@ -48,3 +48,7 @@ sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plug

# Verify that Docker Engine is installed correctly by running the hello-world image.
sudo docker run hello-world

# Install jq; it is used by the cleanup pipeline to parse the JSON output of the Azure CLI
sudo apt-get install -y jq

4 changes: 2 additions & 2 deletions tests_e2e/test_suites/agent_update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
# Self-update: If vm not enrolled into RSM, it will validate agent uses self-update to update to latest version published
name: "AgentUpdate"
tests:
# - "agent_update/rsm_update.py" will enable this test once we have a new test version published
# - "agent_update/rsm_update.py" TODO: will enable this test once we have a new test version published
- "agent_update/self_update.py"
images:
- "random(endorsed, 10)"
- "random(endorsed-arm64, 2)"
# - "random(endorsed-arm64, 2)" TODO: HGPA not deployed on some arm64 hosts(so agent stuck on Vmesttings calls as per contract) and will enable once HGPA deployed there
locations: "AzureCloud:eastus2euap"
owns_vm: true
skip_on_clouds:
Expand Down

0 comments on commit 3813f64

Please sign in to comment.