Skip to content

Commit

Permalink
feat: add job user override for windows (#372)
Browse files Browse the repository at this point in the history
* feat: add job user override for windows

Signed-off-by: Samuel Anderson <119458760+AWS-Samuel@users.noreply.github.com>
  • Loading branch information
AWS-Samuel authored Aug 7, 2024
1 parent 3db4cf6 commit 84c83bd
Show file tree
Hide file tree
Showing 28 changed files with 1,711 additions and 311 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ ignore = [
# We need to use a platform assertion to short-circuit mypy type checking on non-Windows platforms
# https://mypy.readthedocs.io/en/stable/common_issues.html#python-version-and-system-platform-checks
# This causes imports to come after regular Python statements causing flake8 rule E402 to be flagged
"src/deadline_worker_agent/**/*windows*.py" = ["E402"]
"src/deadline_worker_agent/**/*win*.py" = ["E402"]
"test/**/*windows*.py" = ["E402"]

[tool.ruff.lint.isort]
Expand Down
11 changes: 11 additions & 0 deletions src/deadline_worker_agent/installer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ def install() -> None:
installer_args.update(password=args.password)
if args.telemetry_opt_out:
installer_args.update(telemetry_opt_out=args.telemetry_opt_out)
if args.windows_job_user:
installer_args.update(windows_job_user=args.windows_job_user)

start_windows_installer(**installer_args)
else:
Expand Down Expand Up @@ -157,6 +159,7 @@ class ParsedCommandLineArguments(Namespace):
vfs_install_path: str
grant_required_access: bool
disallow_instance_profile: bool
windows_job_user: Optional[str] = None


def get_argument_parser() -> ArgumentParser: # pragma: no cover
Expand Down Expand Up @@ -269,5 +272,13 @@ def get_argument_parser() -> ArgumentParser: # pragma: no cover
required=False,
default=False,
)
parser.add_argument(
"--windows-job-user",
help=(
"The username of the Windows user that jobs run as. The password for this user account is reset during worker startup."
),
required=False,
default=None,
)

return parser
65 changes: 44 additions & 21 deletions src/deadline_worker_agent/installer/win_installer.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# This assertion short-circuits mypy from type checking this module on platforms other than Windows
# https://mypy.readthedocs.io/en/stable/common_issues.html#python-version-and-system-platform-checks
import sys

assert sys.platform == "win32"
import dataclasses
import logging
import os
import re
import secrets
import shutil
import string
import sys
import time
from argparse import ArgumentParser
from getpass import getpass
Expand All @@ -33,11 +34,11 @@
FileSystemPermissionEnum,
)
from ..windows.win_service import WorkerAgentWindowsService
from ..windows.win_logon import generate_password, users_equal

# Defaults
DEFAULT_WA_USER = "deadline-worker"
DEFAULT_JOB_GROUP = "deadline-job-users"
DEFAULT_PASSWORD_LENGTH = 32

# Environment variable that overrides the config path used by the Deadline client
DEADLINE_CLIENT_CONFIG_PATH_OVERRIDE_ENV_VAR = "DEADLINE_CONFIG_FILE_PATH"
Expand All @@ -57,20 +58,6 @@ class WorkerAgentDirectories:
deadline_config_subdir: Path


def generate_password() -> str:
"""
Generate password of given length.
Returns
str: password
"""
alphabet = string.ascii_letters + string.digits + string.punctuation
# Use secrets.choice to ensure a secure random selection of characters
# https://docs.python.org/3/library/secrets.html#recipes-and-best-practices
password = "".join(secrets.choice(alphabet) for _ in range(DEFAULT_PASSWORD_LENGTH))
return password


def print_banner():
print(
"===========================================================\n"
Expand Down Expand Up @@ -308,6 +295,7 @@ def update_config_file(
fleet_id: str,
shutdown_on_stop: Optional[bool] = None,
allow_ec2_instance_profile: Optional[bool] = None,
windows_job_user: Optional[str] = None,
) -> None:
"""
Updates the worker configuration file, creating it from the example if it does not exist.
Expand Down Expand Up @@ -407,6 +395,26 @@ def update_config_file(
else:
updated_keys.append("allow_ec2_instance_profile")

if windows_job_user is not None:
escaped_username = windows_job_user.replace("\\", "\\\\\\\\")
content = re.sub(
r'^#*\s*windows_job_user\s*=\s*".{1,512}"$', # defer validation to OS
f'windows_job_user = "{escaped_username}"',
content,
flags=re.MULTILINE,
)
search_username = windows_job_user.replace("\\", "\\\\")
if not re.search(
rf'^windows_job_user = "{re.escape(search_username)}"$',
content,
flags=re.MULTILINE,
):
raise InstallerFailedException(
f"Failed to configure windows_job_user in {worker_config_file}"
)
else:
updated_keys.append("windows_job_user")

# Write the updated content back to the worker configuration file
with open(worker_config_file, "w") as file:
file.write(content)
Expand Down Expand Up @@ -556,7 +564,6 @@ def _install_service(
agent_user_name(str): Worker Agent's account username
password(str): The Worker Agent's user account password
"""

# If the username does not contain the domain, then assume the local domain
# https://learn.microsoft.com/en-us/windows/win32/secauthn/user-name-formats
if "\\" not in agent_user_name and "@" not in agent_user_name:
Expand Down Expand Up @@ -805,6 +812,7 @@ def start_windows_installer(
user_name: str = DEFAULT_WA_USER,
password: Optional[str] = None,
group_name: str = DEFAULT_JOB_GROUP,
windows_job_user: Optional[str] = None,
install_service: bool = False,
start_service: bool = False,
confirm: bool = False,
Expand Down Expand Up @@ -837,6 +845,18 @@ def print_helping_info_and_exit():
logging.error(f"User does not have Administrator privileges: {os.environ['USERNAME']}")
print_helping_info_and_exit()

if windows_job_user is not None and not check_account_existence(windows_job_user):
raise InstallerFailedException(
f"Account {windows_job_user} provided for argument windows-job-user does not exist. "
"Please create the account before proceeding."
)
elif windows_job_user is not None and users_equal(windows_job_user, user_name):
raise InstallerFailedException(
f"Argument for windows-job-user cannot be the same as the worker agent user: {user_name}. "
"If you wish to run jobs as the agent user, set run_jobs_as_agent_user = true in the agent "
"configuration file."
)

# Print configuration
print_banner()

Expand All @@ -849,7 +869,8 @@ def print_helping_info_and_exit():
print("ERROR: Password incorrect")
sys.exit(1)
else:
password = generate_password()
password = generate_password(user_name, length=200)
assert password

print(
f"Farm ID: {farm_id}\n"
Expand All @@ -861,7 +882,8 @@ def print_helping_info_and_exit():
f"Install Windows service: {install_service}\n"
f"Start service: {start_service}\n"
f"Telemetry opt-out: {telemetry_opt_out}\n"
f"Disallow EC2 instance profile: {not allow_ec2_instance_profile}"
f"Disallow EC2 instance profile: {not allow_ec2_instance_profile}\n"
f"Windows Job User: {windows_job_user}"
)
print()

Expand Down Expand Up @@ -960,6 +982,7 @@ def print_helping_info_and_exit():
# any "shutdown" option to be consistent with POSIX installer
shutdown_on_stop=allow_shutdown,
allow_ec2_instance_profile=allow_ec2_instance_profile,
windows_job_user=windows_job_user,
)

if telemetry_opt_out:
Expand Down
15 changes: 14 additions & 1 deletion src/deadline_worker_agent/installer/worker.toml.example
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
#
# run_jobs_as_agent_user = true

# AWS Deadline Cloud may specify an OS user to run a Job's session actions as. Setting
# AWS Deadline Cloud may specify a Posix OS user to run a Job's session actions as. Setting
# "posix_job_user" will override the OS user and the session actions will be run as
# the user given in the value of "posix_job_user" instead. This setting is ignored
# if "run_jobs_as_agent_user" is set to true.
Expand All @@ -201,6 +201,19 @@
#
# posix_job_user = "user:group"

# AWS Deadline Cloud may specify a Windows OS user to run a Job's session actions as. Setting
# "windows_job_user" will override the OS user and the session actions will be run as
# the user given in the value of "windows_job_user" instead. It is important to note that by specifying
# this value, the password for the Windows OS user specified will be reset to a random, unstored value.
# This setting also requires that the worker agent is run with administrator privileges. This setting is
# incompatible the setting "run_jobs_as_agent_user" set to true.
#
# To have a specific Windows OS user used when running jobs, uncomment the line below and
# replace the username as desired. This value is overridden when the DEADLINE_WORKER_WINDOWS_JOB_USER
# environment variable or if the --windows-job-user command-line flag is specified.
#
# windows_job_user = "job-user"

# AWS Deadline Cloud may tell the worker to stop. If the "shutdown_on_stop" setting below is true, then the
# Worker will attempt to shutdown the host system after the Worker has been stopped.
#
Expand Down
21 changes: 17 additions & 4 deletions src/deadline_worker_agent/scheduler/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@

if sys.platform == "win32":
from ..windows.win_credentials_resolver import WindowsCredentialsResolver
from ..windows.win_logon import unload_and_close
else:
WindowsCredentialsResolver = Any

Expand Down Expand Up @@ -238,7 +239,9 @@ def __init__(
self._retain_session_dir = retain_session_dir
self._windows_credentials_resolver: Optional[WindowsCredentialsResolver]

if os.name == "nt":
if os.name == "nt" and not (
self._job_run_as_user_override.job_user or self._job_run_as_user_override.run_as_agent
):
self._windows_credentials_resolver = WindowsCredentialsResolver(self._boto_session)
else:
self._windows_credentials_resolver = None
Expand Down Expand Up @@ -307,9 +310,17 @@ def run(self) -> None:
finally:
logger.info("Main event loop exited.")
self._drain_scheduler()
if os.name == "nt":
assert self._windows_credentials_resolver is not None
self._windows_credentials_resolver.clear()
if sys.platform == "win32":
if (
self._job_run_as_user_override is not None
and self._job_run_as_user_override.logon_token is not None
):
unload_and_close(
self._job_run_as_user_override.user_profile,
self._job_run_as_user_override.logon_token,
)
elif self._windows_credentials_resolver is not None:
self._windows_credentials_resolver.clear()

def _drain_scheduler(self) -> None:
# Called only from self.run() during shutdown.
Expand Down Expand Up @@ -901,6 +912,8 @@ def _create_new_sessions(
# For Windows the WA runs as Administrator so fail jobs that were configured to runAs - WORKER_AGENT_USER as that would provide Admin privileges to the job
if (
os.name == "nt"
and self._job_run_as_user_override.job_user is None
and not self._job_run_as_user_override.run_as_agent
and job_details.job_run_as_user
and job_details.job_run_as_user.is_worker_agent_user
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def validate_entity_data(
run_as_windows = entity_data["jobRunAsUser"].get("windows", None)
if os.name == "nt" and not run_as_windows:
raise ValueError(
'Expected ""jobRunAs" -> "windows" to exist when "jobRunAs" -> "runAs" is "QUEUE_CONFIGURED_USER" but it was not present'
'Expected "jobRunAs" -> "windows" to exist when "jobRunAs" -> "runAs" is "QUEUE_CONFIGURED_USER" but it was not present'
)
if os.name == "posix" and not run_as_posix:
raise ValueError(
Expand Down
8 changes: 8 additions & 0 deletions src/deadline_worker_agent/startup/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class ParsedCommandLineArguments(Namespace):
no_shutdown: bool | None = None
run_jobs_as_agent_user: bool | None = None
posix_job_user: str | None = None
windows_job_user: str | None = None
disallow_instance_profile: bool | None = None
logs_dir: Path | None = None
local_session_logs: bool | None = None
Expand Down Expand Up @@ -77,6 +78,13 @@ def get_argument_parser() -> ArgumentParser:
"If not set, defaults to what the service sets.",
default=None,
)
elif os.name == "nt":
parser.add_argument(
"--windows-job-user",
help="Overrides the windows user that the Worker Agent impersonates. In doing so, resets the specified user's password to a cryptographically random, unstored value during worker startup. "
"If not set, impersonation behavior defers to what the service sets.",
default=None,
)

parser.add_argument(
"--logs-dir",
Expand Down
37 changes: 35 additions & 2 deletions src/deadline_worker_agent/startup/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

from __future__ import annotations

import sys
import os
import getpass
import logging as _logging
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional, Sequence, Tuple, cast
from typing import Any, Optional, Sequence, Tuple, cast, TYPE_CHECKING

from pydantic import ValidationError

Expand All @@ -17,6 +19,12 @@
from .cli_args import ParsedCommandLineArguments, get_argument_parser
from .settings import WorkerSettings

if sys.platform == "win32":
from ..windows.win_logon import reset_user_password, PasswordResetException, users_equal

if TYPE_CHECKING:
from _win32typing import PyHKEY, PyHANDLE

_logger = _logging.getLogger(__name__)


Expand All @@ -28,6 +36,11 @@ class JobsRunAsUserOverride:
job_user: Optional[SessionUser] = None
"""If provided and run_as_agent is False, then all Jobs run by this agent will run as this user."""

if sys.platform == "win32":
# we need to keep this handle referenced to avoid it being garbage collected.
logon_token: Optional[PyHANDLE] = None
user_profile: Optional[PyHKEY] = None


# Default paths for the Worker persistence directory subdirectories.
# The persistence directory is expected to be located on a file-system that is local to the Worker
Expand Down Expand Up @@ -121,6 +134,8 @@ def __init__(
settings_kwargs["run_jobs_as_agent_user"] = parsed_cli_args.run_jobs_as_agent_user
if parsed_cli_args.posix_job_user is not None:
settings_kwargs["posix_job_user"] = parsed_cli_args.posix_job_user
if parsed_cli_args.windows_job_user is not None:
settings_kwargs["windows_job_user"] = parsed_cli_args.windows_job_user
if parsed_cli_args.disallow_instance_profile is not None:
settings_kwargs["allow_instance_profile"] = (
not parsed_cli_args.disallow_instance_profile
Expand Down Expand Up @@ -150,6 +165,24 @@ def __init__(
run_as_agent=settings.run_jobs_as_agent_user,
job_user=PosixSessionUser(user=user, group=group),
)
elif sys.platform == "win32" and settings.windows_job_user is not None:
if users_equal(settings.windows_job_user, getpass.getuser()):
raise ConfigurationError(
f"Windows job user override must not be the user running the worker agent: {getpass.getuser()}."
" If you wish to run jobs as the agent user, set run_jobs_as_agent_user = true in the agent configuration file."
)
try:
cache_entry = reset_user_password(settings.windows_job_user)
except PasswordResetException as e:
raise ConfigurationError(
f"Failed to reset password for user {settings.windows_job_user}: {e}"
) from e
self.job_run_as_user_overrides = JobsRunAsUserOverride(
run_as_agent=settings.run_jobs_as_agent_user,
job_user=cache_entry.windows_session_user,
logon_token=cache_entry.logon_token,
user_profile=cache_entry.user_profile,
)
else:
self.job_run_as_user_overrides = JobsRunAsUserOverride(
run_as_agent=settings.run_jobs_as_agent_user
Expand Down Expand Up @@ -197,7 +230,7 @@ def _validate(self) -> None:
and self.job_run_as_user_overrides.job_user is not None
):
raise ConfigurationError(
"Cannot specify a POSIX job user when the option to running Jobs as the agent user is enabled."
f"Cannot specify a {'windows' if os.name == 'nt' else 'posix'} job user when the option to run jobs as the agent user is enabled."
)

if self.host_metrics_logging_interval_seconds <= 0:
Expand Down
Loading

0 comments on commit 84c83bd

Please sign in to comment.