Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build: pass PATH environment variable to Docker container #10133

Merged
merged 5 commits into from
Mar 16, 2023
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 39 additions & 28 deletions readthedocs/doc_builder/environments.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from docker.errors import APIError as DockerAPIError
from docker.errors import DockerException
from docker.errors import NotFound as DockerNotFoundError
from requests.exceptions import ConnectionError, ReadTimeout
from requests.exceptions import ConnectionError, ReadTimeout # noqa
ericholscher marked this conversation as resolved.
Show resolved Hide resolved
from requests_toolbelt.multipart.encoder import MultipartEncoder

from readthedocs.api.v2.client import api as api_v2
Expand Down Expand Up @@ -73,7 +73,7 @@ def __init__(
bin_path=None,
record_as_success=False,
demux=False,
**kwargs,
**kwargs, # pylint: disable=unused-argument
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like we have linting configured too strongly, if so much of this PR is linting hints... Maybe that's making the code better, but I'm not convinced this check is useful?

):
self.command = command
self.shell = shell
Expand Down Expand Up @@ -252,8 +252,8 @@ def save(self):
{key: str(value) for key, value in data.items()}
)
resource = api_v2.command
resp = resource._store['session'].post(
resource._store['base_url'] + '/',
resp = resource._store["session"].post( # pylint: disable=protected-access
resource._store["base_url"] + "/", # pylint: disable=protected-access
data=encoder,
headers={
'Content-Type': encoder.content_type,
Expand Down Expand Up @@ -301,11 +301,35 @@ def run(self):

self.start_time = datetime.utcnow()
client = self.build_env.get_client()

# Create a copy of the environment to update PATH variable
environment = self._environment.copy()
# Default PATH variable
environment[
"PATH"
] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
humitos marked this conversation as resolved.
Show resolved Hide resolved
# Add asdf extra paths
environment["PATH"] += (
f":/home/{settings.RTD_DOCKER_USER}/.asdf/shims"
f":/home/{settings.RTD_DOCKER_USER}/.asdf/bin"
)

if settings.RTD_DOCKER_COMPOSE:
environment["PATH"] += ":/root/.asdf/shims:/root/.asdf/bin"

# Prepend the BIN_PATH if it's defined
if self.bin_path:
path = environment.get("PATH")
bin_path = self._escape_command(self.bin_path)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we only escaping this here? If this is user input and security critical, it should be very explicitly mentioned.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why that _escape_command is there. I copied from our previous code,

if self.bin_path:
bin_path = self._escape_command(self.bin_path)
prefix += f'PATH={bin_path}:$PATH '

In theory, it's not input from the user. This should be the venv path, if I understand correctly this code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be not required, I think 🤷🏼

After a quick grep, it's always self.python_env.venv_bin() or self.venv_bin(). So, we could probably remove it if we want, but I'm not sure.

environment["PATH"] = bin_path
if path:
environment["PATH"] += f":{path}"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic feels really weird to me. We're overwriting then appending the original path if it exists, but the variable names don't make that clear. I think we want something closer to:

Suggested change
if self.bin_path:
path = environment.get("PATH")
bin_path = self._escape_command(self.bin_path)
environment["PATH"] = bin_path
if path:
environment["PATH"] += f":{path}"
if self.bin_path:
original_path = environment.get("PATH")
# SECURITY CRITICAL: Escape user input
escaped_bin_path = self._escape_command(self.bin_path)
environment["PATH"] = f"{escaped_bin_path}:{original_path}"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need the if because original_path could be None.

I'm taking the suggestion of the variable names, tho.

try:
exec_cmd = client.exec_create(
container=self.build_env.container_id,
cmd=self.get_wrapped_command(),
environment=self._environment,
environment=environment,
user=self.user,
workdir=self.cwd,
stdout=True,
Expand Down Expand Up @@ -357,31 +381,18 @@ def get_wrapped_command(self):
"""
Wrap command in a shell and optionally escape special bash characters.

In order to set the current working path inside a docker container, we
need to wrap the command in a shell call manually.

Some characters will be interpreted as shell characters without
escaping, such as: ``pip install requests<0.8``. When passing
``escape_command=True`` in the init method this escapes a good majority
of those characters.
"""
prefix = ''
if self.bin_path:
bin_path = self._escape_command(self.bin_path)
prefix += f'PATH={bin_path}:$PATH '

command = (
' '.join(
self._escape_command(part) if self.escape_command else part
for part in self.command
)
)
return (
"/bin/sh -c '{prefix}{cmd}'".format(
prefix=prefix,
cmd=command,
)
)
return f"/bin/bash -c '{command}'"

def _escape_command(self, cmd):
r"""Escape the command by prefixing suspicious chars with `\`."""
Expand Down Expand Up @@ -524,14 +535,14 @@ class BuildEnvironment(BaseEnvironment):
"""

def __init__(
self,
project=None,
version=None,
build=None,
config=None,
environment=None,
record=True,
**kwargs,
self,
project=None,
version=None,
build=None,
config=None,
environment=None,
record=True,
**kwargs, # pylint: disable=unused-argument
):
super().__init__(project, environment)
self.version = version
Expand All @@ -557,7 +568,7 @@ def run(self, *cmd, **kwargs):
})
return super().run(*cmd, **kwargs)

def run_command_class(self, *cmd, **kwargs): # pylint: disable=arguments-differ
def run_command_class(self, *cmd, **kwargs): # pylint: disable=signature-differs
kwargs.update({
'build_env': self,
})
Expand Down