Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python configuration for tests #2793

Merged
merged 7 commits into from
Apr 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 74 additions & 52 deletions tests_e2e/orchestrator/lib/agent_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,57 +274,81 @@ def _setup_node(self) -> None:
log.info("************************************** [Node Setup] **************************************")
log.info("")
log.info("Test Node: %s", self.context.vm.name)
log.info("IP Address: %s", self.context.vm_ip_address)
log.info("Resource Group: %s", self.context.vm.resource_group)
log.info("")

self.context.ssh_client.run_command("mkdir -p ~/bin/tests_e2e/tests; touch ~/bin/agent-env")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we prepare a tarball on the orchestrator machine that is already structured in the way it needs to be setup on the test VMs, instead of using SSH to create/copy these files. This should make easier to include additional files that need to be copied to the test VMs.


# Copy the test tools
tools_path = self.context.test_source_directory/"orchestrator"/"scripts"
tools_target_path = Path("~/bin")
log.info("Copying %s to %s:%s", tools_path, self.context.node.name, tools_target_path)
self.context.ssh_client.copy_to_node(tools_path, tools_target_path, recursive=True)

# Copy the test libraries
lib_path = self.context.test_source_directory/"tests"/"lib"
lib_target_path = Path("~/bin/tests_e2e/tests")
log.info("Copying %s to %s:%s", lib_path, self.context.node.name, lib_target_path)
self.context.ssh_client.copy_to_node(lib_path, lib_target_path, recursive=True)

# Copy the test agent
agent_package_path: Path = self._get_agent_package_path()
agent_package_target_path = Path("~/bin")/agent_package_path.name
log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, agent_package_target_path)
self.context.ssh_client.copy_to_node(agent_package_path, agent_package_target_path)

# Copy Pypy
# NOTE: Pypy is pre-downloaded to /tmp on the container image used for Azure Pipelines runs. For dev runs,
# if we don't find Pypy under /tmp, then we download it a few lines below.
#
# Ensure that the correct version (x84 vs ARM64) Pypy has been downloaded; it is pre-downloaded to /tmp on the container image
# used for Azure Pipelines runs, but for developer runs it may need to be downloaded.
#
if self.context.ssh_client.get_architecture() == "aarch64":
pypy_path = Path("/tmp/pypy3.7-arm64.tar.bz2")
pypy_download = "https://downloads.python.org/pypy/pypy3.7-v7.3.5-aarch64.tar.bz2"
else:
pypy_path = Path("/tmp/pypy3.7-x64.tar.bz2")
pypy_download = "https://downloads.python.org/pypy/pypy3.7-v7.3.5-linux64.tar.bz2"

if not pypy_path.exists():
if pypy_path.exists():
log.info("Found Pypy at %s", pypy_path)
else:
log.info("Downloading %s to %s", pypy_download, pypy_path)
run_command(["wget", pypy_download, "-O", pypy_path])
pypy_target_path = Path("~/bin/pypy3.7.tar.bz2")
log.info("Copying %s to %s:%s", pypy_path, self.context.node.name, pypy_target_path)
self.context.ssh_client.copy_to_node(pypy_path, pypy_target_path)

# Install the tools and libraries
install_command = lambda: self.context.ssh_client.run_command(f"~/bin/scripts/install-tools --agent-package {agent_package_target_path}")
log.info('Installing tools on the test node\n%s', install_command())
log.info('Remote commands will use %s', self.context.ssh_client.run_command("which python3"))
#
# Create a tarball with the files we need to copy to the test node. The tarball includes two directories:
#
# * bin - Executables file (Bash and Python scripts)
# * lib - Library files (Python modules)
#
# After extracting the tarball on the test node, 'bin' will be added to PATH and PYTHONPATH will be set to 'lib'.
#
# Note that executables are placed directly under 'bin', while the path for Python modules is preserved under 'lib.
#
tarball_path: Path = Path("/tmp/waagent.tar")
log.info("Creating %s with the files need on the test node", tarball_path)
log.info("Adding orchestrator/scripts")
run_command(['tar', 'cvf', str(tarball_path), '--transform=s,.*/,bin/,', '-C', str(self.context.test_source_directory/"orchestrator"/"scripts"), '.'])
# log.info("Adding tests/scripts")
# run_command(['tar', 'rvf', str(tarball_path), '--transform=s,.*/,bin/,', '-C', str(self.context.test_source_directory/"tests"/"scripts"), '.'])
log.info("Adding tests/lib")
run_command(['tar', 'rvf', str(tarball_path), '--transform=s,^,lib/,', '-C', str(self.context.test_source_directory.parent), '--exclude=__pycache__', 'tests_e2e/tests/lib'])
log.info("Contents of %s:\n\n%s", tarball_path, run_command(['tar', 'tvf', str(tarball_path)]))

#
# Cleanup the test node (useful for developer runs)
#
log.info('Preparing the test node for setup')
# Note that removing lib requires sudo, since a Python cache may have been created by tests using sudo
self.context.ssh_client.run_command("rm -rvf ~/{bin,lib,tmp}", use_sudo=True)

#
# Copy the tarball, Pypy and the test Agent to the test node
#
target_path = Path("~")/"tmp"
self.context.ssh_client.run_command(f"mkdir {target_path}")
log.info("Copying %s to %s:%s", tarball_path, self.context.node.name, target_path)
self.context.ssh_client.copy_to_node(tarball_path, target_path)
log.info("Copying %s to %s:%s", pypy_path, self.context.node.name, target_path)
self.context.ssh_client.copy_to_node(pypy_path, target_path)
agent_package_path: Path = self._get_agent_package_path()
log.info("Copying %s to %s:%s", agent_package_path, self.context.node.name, target_path)
self.context.ssh_client.copy_to_node(agent_package_path, target_path)

#
# Extract the tarball and execute the install scripts
#
log.info('Installing tools on the test node')
command = f"tar xf {target_path/tarball_path.name} && ~/bin/install-tools"
log.info("%s\n%s", command, self.context.ssh_client.run_command(command))

# Install the agent
if self.context.is_vhd:
log.info("Using a VHD; will not install the Test Agent.")
else:
install_command = lambda: self.context.ssh_client.run_command(f"install-agent --package {agent_package_target_path} --version {AGENT_VERSION}", use_sudo=True)
log.info("Installing the Test Agent on %s\n%s", self.context.node.name, install_command())
log.info("Installing the Test Agent on the test node")
command = f"install-agent --package ~/tmp/{agent_package_path.name} --version {AGENT_VERSION}"
log.info("%s\n%s", command, self.context.ssh_client.run_command(command, use_sudo=True))

log.info("Completed test node setup")

def _collect_node_logs(self) -> None:
"""
Expand Down Expand Up @@ -393,6 +417,8 @@ def _execute(self, environment: Environment, variables: Dict[str, Any]):
# pylint seems to think self.context.test_suites is not iterable. Suppressing warning, since its type is List[AgentTestSuite]
# E1133: Non-iterable value self.context.test_suites is used in an iterating context (not-an-iterable)
for suite in self.context.test_suites: # pylint: disable=E1133
log.info("Executing test suite %s", suite.name)
self.context.lisa_log.info("Executing Test Suite %s", suite.name)
test_suite_success = self._execute_test_suite(suite) and test_suite_success

test_suite_success = self._check_agent_log() and test_suite_success
Expand All @@ -419,7 +445,7 @@ def _execute(self, environment: Environment, variables: Dict[str, Any]):
finally:
self._clean_up()
if not success:
self._mark_log_as_failed(log_path)
self._mark_log_as_failed()

def _execute_test_suite(self, suite: TestSuiteInfo) -> bool:
"""
Expand All @@ -432,7 +458,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool:
success: bool = True # True if all the tests succeed

with _set_thread_name(suite_full_name): # The thread name is added to the LISA log
log_path:Path = self.context.log_path/f"{suite_full_name}.log"
log_path: Path = self.context.log_path/f"{suite_full_name}.log"
with set_current_thread_log(log_path):
try:
log.info("")
Expand All @@ -447,15 +473,15 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool:
test_start_time: datetime.datetime = datetime.datetime.now()

log.info("******** Executing %s", test_name)
self.context.lisa_log.info("******** Executing %s", test_full_name)
self.context.lisa_log.info("Executing test %s", test_full_name)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor formatting change. We used to output "********" to the LISA log to make those messages stand out. My previous PR created a log for each environment, so now the LISA log is not too crowded and this marker is not needed anymore.


try:

test(self.context).run()

summary.append(f"[Passed] {test_name}")
log.info("******** [Passed] %s", test_name)
self.context.lisa_log.info("******** [Passed] %s", test_full_name)
self.context.lisa_log.info("[Passed] %s", test_full_name)
self._report_test_result(
suite_full_name,
test_name,
Expand Down Expand Up @@ -514,7 +540,7 @@ def _execute_test_suite(self, suite: TestSuiteInfo) -> bool:
add_exception_stack_trace=True)
finally:
if not success:
self._mark_log_as_failed(log_path)
self._mark_log_as_failed()

return success

Expand Down Expand Up @@ -551,21 +577,17 @@ def _check_agent_log(self) -> bool:
log.info("There are no errors in the agent log")
return True

log_path: Path = self.context.log_path/f"CheckAgentLog-{self.context.environment_name}.log"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My previous PR create a log for each environment, so now we do not need to create this extra CheckAgentLog-*.log, we can simply log those messages to the environment log.

message = f"Detected {len(errors)} error(s) in the agent log. See {log_path} for a full report."
self.context.lisa_log.info(message)
log.info(message)

with set_current_thread_log(log_path):
log.info("Detected %s error(s) in the agent log:\n\n%s", len(errors), '\n'.join(['\t' + e.text for e in errors]))
self._mark_log_as_failed(log_path)
message = f"Detected {len(errors)} error(s) in the agent log"
self.context.lisa_log.error(message)
log.error("%s:\n\n%s\n", message, '\n'.join(['\t\t' + e.text.replace('\n', '\n\t\t') for e in errors]))
self._mark_log_as_failed()

self._report_test_result(
self.context.environment_name,
"CheckAgentLog",
TestStatus.FAILED,
start_time,
message=message + '\n' + '\n'.join([e.text for e in errors[0:3]]))
message=message + ' - First few errors:\n' + '\n'.join([e.text for e in errors[0:3]]))
except: # pylint: disable=bare-except
log.exception("Error checking agent log")
self._report_test_result(
Expand All @@ -579,11 +601,11 @@ def _check_agent_log(self) -> bool:
return False

@staticmethod
def _mark_log_as_failed(log_path: Path):
def _mark_log_as_failed():
"""
Renames the given log to prefix it with "_".
Adds a message to indicate the log contains errors.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Marking failed logs by renaming them makes developer runs a little harder, since one may be watching the progress in a test log while the file is renamed. Instead, we add the message below and do not change the name.

"""
log_path.rename(log_path.parent / ("_" + log_path.name))
log.info("MARKER-LOG-WITH-ERRORS")

@staticmethod
def _report_test_result(
Expand Down
2 changes: 1 addition & 1 deletion tests_e2e/orchestrator/scripts/check-agent-log.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/usr/bin/env pypy3

# Microsoft Azure Linux Agent
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,34 +18,20 @@
#

#
# Returns the path to the Python executable.
# Returns the PYTHONPATH on which the azurelinuxagent and associated modules are located.
#
# To do this, the script walks the site packages for the Python used to execute the agent,
# looking for the directory that contains "azurelinuxagent".
#
set -euo pipefail

# python3 is available on most distros
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somehow git captures this as a rename + changes. I actually removed find-python and created a totally unrelated script, get-agent-modules-path

if which python3 2> /dev/null; then
exit 0
fi

# try python
if which python 2> /dev/null; then
exit 0
fi

# try some well-known locations
declare -a known_locations=(
"/usr/share/oem/python/bin/python"
"/usr/share/oem/python/bin/python3"
)

for python in "${known_locations[@]}"
do
if [[ -e $python ]]; then
echo "$python"
exit 0
fi
done

echo "Can't find the python executable" >&2
$(get-agent-python) -c '
import site
import os

exit 1
for dir in site.getsitepackages():
if os.path.isdir(dir + "/azurelinuxagent"):
print(dir)
exit(0)
exit(1)
'
59 changes: 59 additions & 0 deletions tests_e2e/orchestrator/scripts/get-agent-python
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env bash

# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# Returns the path of the Python executable used to start the Agent.
#
set -euo pipefail

# if the agent is running, get the python command from 'exe' in the /proc file system
if test -e /run/waagent.pid; then
exe="/proc/$(cat /run/waagent.pid)/exe"
if test -e "$exe"; then
# exe is a symbolic link; return its target
readlink -f "$exe"
exit 0
fi
fi

# try all the instances of 'python' and 'python3' in $PATH
for path in $(echo "$PATH" | tr ':' '\n'); do
if [[ -e $path ]]; then
for python in $(find "$path" -maxdepth 1 -name python3 -or -name python); do
if $python -c 'import azurelinuxagent' 2> /dev/null; then
echo "$python"
exit 0
fi
done
fi
done

# try some well-known locations
declare -a known_locations=(
"/usr/share/oem/python/bin/python"
)
for python in "${known_locations[@]}"
do
if $python -c 'import azurelinuxagent' 2> /dev/null; then
echo "$python"
exit 0
fi
done

exit 1
Loading