Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docker: infer dependencies on COPY of pex binaries. #12920

Merged
merged 10 commits into from
Sep 24, 2021
5 changes: 5 additions & 0 deletions 3rdparty/python/lockfiles/user_reqs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# "generated_with_requirements": [
# "PyYAML<5.5,>=5.4",
# "ansicolors==1.1.8",
# "dockerfile==3.2.0",
# "fasteners==0.16",
# "freezegun==1.1.0",
# "humbug==0.2.6",
Expand Down Expand Up @@ -52,6 +53,10 @@ charset-normalizer==2.0.5; python_full_version >= "3.6.0" and python_version >=
colorama==0.4.4; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "win32" or sys_platform == "win32" and python_version >= "3.6" and python_full_version >= "3.5.0" \
--hash=sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2 \
--hash=sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b
dockerfile==3.2.0; python_full_version >= "3.6.1" \
--hash=sha256:e6e00b82b82042fb4df569ae00bd2648ac6c8823f51c406da31ab01c728926c2 \
--hash=sha256:e6bd64408386b7ba2259d85820e0fe90de1b6b8269f560f18aba100c6aa40b7d \
--hash=sha256:e13fd3768216a788189e0667521e1435a273a4129119a8453085d897fc34aac8
fasteners==0.16 \
--hash=sha256:74b6847e0a6bb3b56c8511af8e24c40e4cf7a774dfff5b251c260ed338096a4b \
--hash=sha256:c995d8c26b017c5d6a6de9ad29a0f9cdd57de61ae1113d28fac26622b06a0933
Expand Down
1 change: 1 addition & 0 deletions 3rdparty/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
ansicolors==1.1.8
dockerfile==3.2.0
fasteners==0.16
freezegun==1.1.0

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ show_traceback = true
module = [
"bs4",
"colors",
"dockerfile",
"fasteners",
"freezegun",
"hdrh",
Expand Down
49 changes: 49 additions & 0 deletions src/python/pants/backend/docker/dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).


from pants.backend.docker.parser import DockerfileInfo, DockerfileParseRequest
from pants.backend.docker.target_types import DockerDependencies, DockerImageSources
from pants.backend.python.goals.package_pex_binary import PexBinaryFieldSet
from pants.engine.addresses import Address, Addresses, UnparsedAddressInputs
from pants.engine.rules import Get, collect_rules, rule
from pants.engine.target import (
InjectDependenciesRequest,
InjectedDependencies,
Targets,
WrappedTarget,
)
from pants.engine.unions import UnionRule


class InjectDockerDependencies(InjectDependenciesRequest):
inject_for = DockerDependencies


@rule
async def inject_docker_dependencies(request: InjectDockerDependencies) -> InjectedDependencies:
"""Inspects COPY instructions in the Dockerfile for references to known targets."""
original_tgt = await Get(WrappedTarget, Address, request.dependencies_field.address)
sources = original_tgt.target[DockerImageSources]
if not sources.value:
return InjectedDependencies()

dockerfile = await Get(DockerfileInfo, DockerfileParseRequest(sources))
targets = await Get(
Targets,
UnparsedAddressInputs(
dockerfile.putative_target_addresses,
owning_address=None,
),
)

return InjectedDependencies(
Addresses([tgt.address for tgt in targets if PexBinaryFieldSet.is_applicable(tgt)])
)


def rules():
return [
*collect_rules(),
UnionRule(InjectDependenciesRequest, InjectDockerDependencies),
]
63 changes: 63 additions & 0 deletions src/python/pants/backend/docker/dependencies_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from textwrap import dedent

import pytest

from pants.backend.docker.dependencies import InjectDockerDependencies, inject_docker_dependencies
from pants.backend.docker.parser import parse_dockerfile
from pants.backend.docker.target_types import DockerDependencies, DockerImage
from pants.backend.python.target_types import PexBinary
from pants.engine.addresses import Address
from pants.engine.target import InjectedDependencies
from pants.testutil.rule_runner import QueryRule, RuleRunner


@pytest.fixture
def rule_runner() -> RuleRunner:
return RuleRunner(
rules=[
inject_docker_dependencies,
parse_dockerfile,
QueryRule(InjectedDependencies, (InjectDockerDependencies,)),
],
target_types=[DockerImage, PexBinary],
)


def test_inject_docker_dependencies(rule_runner: RuleRunner) -> None:
rule_runner.add_to_build_file(
"project/image/test",
dedent(
"""\
docker_image(name="image")
"""
),
)
rule_runner.create_file(
"project/image/test/Dockerfile",
dedent(
"""\
FROM baseimage
ENTRYPOINT ["./entrypoint"]
COPY project.hello.main/main_binary.pex /entrypoint
"""
),
)
rule_runner.add_to_build_file(
"project/hello/main",
dedent(
"""\
pex_binary(name="main_binary")
"""
),
)
tgt = rule_runner.get_target(Address("project/image/test", target_name="image"))
injected = rule_runner.request(
InjectedDependencies,
[InjectDockerDependencies(tgt[DockerDependencies])],
)
assert injected == InjectedDependencies(
[Address("project/hello/main", target_name="main_binary")]
)
40 changes: 35 additions & 5 deletions src/python/pants/backend/docker/docker_build_context_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@
import pytest

from pants.backend.docker.docker_build_context import DockerBuildContext, DockerBuildContextRequest
from pants.backend.docker.rules import rules
from pants.backend.docker.docker_build_context import rules as context_rules
from pants.backend.docker.target_types import DockerImage
from pants.backend.python import target_types_rules
from pants.backend.python.goals import package_pex_binary
from pants.backend.python.goals.package_pex_binary import PexBinaryFieldSet
from pants.backend.python.target_types import PexBinary
from pants.backend.python.util_rules import pex_from_targets
from pants.core.goals.package import BuiltPackage
from pants.core.target_types import Files
from pants.core.util_rules.source_files import rules as source_files_rules
from pants.core.target_types import rules as core_target_types_rules
from pants.engine.addresses import Address
from pants.engine.fs import Snapshot
from pants.testutil.rule_runner import QueryRule, RuleRunner
Expand All @@ -21,11 +27,15 @@
def rule_runner() -> RuleRunner:
return RuleRunner(
rules=[
*rules(),
*source_files_rules(),
*context_rules(),
*core_target_types_rules(),
*package_pex_binary.rules(),
*pex_from_targets.rules(),
*target_types_rules.rules(),
QueryRule(BuiltPackage, [PexBinaryFieldSet]),
QueryRule(DockerBuildContext, (DockerBuildContextRequest,)),
],
target_types=[DockerImage, Files],
target_types=[DockerImage, Files, PexBinary],
)


Expand Down Expand Up @@ -144,3 +154,23 @@ def test_files_out_of_tree(rule_runner: RuleRunner) -> None:
"res/static/sub/s03",
],
)


def test_packaged_pex_path(rule_runner: RuleRunner) -> None:
# This test is here to ensure that we catch if there is any change in the generated path where
# built pex binaries go, as we rely on that for dependency inference in the Dockerfile.
rule_runner.set_options([], env_inherit={"PATH", "PYENV_ROOT", "HOME"})
rule_runner.write_files(
{
"src/docker/BUILD": """docker_image(dependencies=["src/python/proj/cli:bin"])""",
"src/docker/Dockerfile": """FROM python""",
"src/python/proj/cli/BUILD": """pex_binary(name="bin", entry_point="main.py")""",
"src/python/proj/cli/main.py": """print("cli main")""",
}
)

assert_build_context(
rule_runner,
Address("src/docker", target_name="docker"),
expected_files=["src/docker/Dockerfile", "src.python.proj.cli/bin.pex"],
)
108 changes: 108 additions & 0 deletions src/python/pants/backend/docker/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import re
from dataclasses import dataclass
from typing import ClassVar, Generator, Pattern

from dockerfile import Command, parse_string

from pants.backend.docker.target_types import DockerImageSources
from pants.engine.fs import DigestContents, PathGlobs
from pants.engine.rules import Get, collect_rules, rule
from pants.option.global_options import GlobalOptions


@dataclass(frozen=True)
class DockerfileParseRequest:
sources: DockerImageSources


@dataclass(frozen=True)
class DockerfileInfo:
putative_target_addresses: tuple[str, ...] = ()


_pex_target_regexp: str = r"""
(?# optional path, one level with dot-separated words)
(?:(?P<path>(?:\w[.0-9_-]?)+) /)?

(?# binary name, with .pex file extension)
(?P<name>(?:\w[.0-9_-]?)+) \.pex$
"""


@dataclass(frozen=True)
class ParsedDockerfile:
commands: tuple[Command, ...]
pex_target_regexp: ClassVar[Pattern]

def __post_init__(self):
# Compile regexp when creating the first instance, and store it on the class.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not obvious to me that this is thread-safe. Assignment to a simple variable is atomic in Python, but there is no clear answer on whether assignment to a member (which involves the object's member dictionary) is atomic. In practice it probably depends on the implementation.

My apologies for making the regex issue more complicated than it needed to be, but I'm now thinking we should do the simple thing you did all along and compile at module load time, storing the compiled regex directly in the module. In v2 there are so few regexes that this is not going to be a performance issue. Again, sorry for spinning you in circles on this...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, now we've explored the alternatives and can motivate the (small) cost of load-time compiling the regexp. I can leave a short comment about it so it is not lost and forgotten.
Running in circles is common for me 🐶

if not getattr(ParsedDockerfile, "pex_target_regexp", None):
ParsedDockerfile.pex_target_regexp = re.compile(_pex_target_regexp, re.VERBOSE)

@classmethod
def parse(cls, dockerfile: str) -> "ParsedDockerfile":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe dockerfile_contents? So it's obvious that you pass the contents, not a path?

return cls(parse_string(dockerfile))

def get_all(self, command_name: str) -> Generator[Command, None, None]:
for command in self.commands:
if command.cmd.upper() == command_name:
yield command

def translate_to_address(self, value: str) -> str | None:
# Technically this could be a classmethod, but we need at least one instance created first,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After simplifying the regex, this can be a staticmethod again...

# so it is safer to simply have this as an instance method.

# Translate something that resembles a packaged pex binary to its corresponding target
# address. E.g. src.python.tool/bin.pex => src/python/tool:bin
pex = re.match(self.pex_target_regexp, value)
if pex:
path = (pex.group("path") or "").replace(".", "/")
name = pex.group("name")
return ":".join([path, name])

return None

def copy_source_addresses(self) -> Generator[str, None, None]:
for copy in self.get_all("COPY"):
if copy.flags:
# Do not consider COPY --from=... instructions etc.
continue
# The last element of copy.value is the destination.
for source in copy.value[:-1]:
address = self.translate_to_address(source)
if address:
yield address

def putative_target_addresses(self) -> tuple[str, ...]:
addresses: list[str] = []
addresses.extend(self.copy_source_addresses())
return tuple(addresses)


@rule
async def parse_dockerfile(
request: DockerfileParseRequest, global_options: GlobalOptions
) -> DockerfileInfo:
if not request.sources.value:
return DockerfileInfo()

contents = await Get(
DigestContents,
PathGlobs,
request.sources.path_globs(global_options.options.files_not_found_behavior),
)

parsed = ParsedDockerfile.parse(contents[0].content.decode())

return DockerfileInfo(
putative_target_addresses=parsed.putative_target_addresses(),
)


def rules():
return collect_rules()
49 changes: 49 additions & 0 deletions src/python/pants/backend/docker/parser_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).


from textwrap import dedent

import pytest

from pants.backend.docker.parser import ParsedDockerfile


def test_putative_target_addresses() -> None:
parsed = ParsedDockerfile.parse(
dedent(
"""\
FROM base
COPY some.target/binary.pex some.target/tool.pex /bin
COPY --from=scratch this.is/ignored.pex /opt
COPY binary another/cli.pex tool /bin
"""
)
)
assert parsed.putative_target_addresses() == (
"some/target:binary",
"some/target:tool",
"another:cli",
)


@pytest.mark.parametrize(
"copy_source, putative_target_address",
[
("a/b", None),
("a/b.c", None),
("a.b", None),
("a.pex", ":a"),
("a/b.pex", "a:b"),
("a.b/c.pex", "a/b:c"),
("a.b.c/d.pex", "a/b/c:d"),
("a.b/c/d.pex", None),
("a/b/c.pex", None),
("a.0-1/b_2.pex", "a/0-1:b_2"),
("a#b/c.pex", None),
],
)
def test_translate_to_address(copy_source, putative_target_address) -> None:
parsed = ParsedDockerfile(())
actual = parsed.translate_to_address(copy_source)
assert actual == putative_target_address
4 changes: 4 additions & 0 deletions src/python/pants/backend/docker/rules.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pants.backend.docker.dependencies import rules as dependencies_rules
from pants.backend.docker.docker_binary import rules as binary_rules
from pants.backend.docker.docker_build import rules as build_rules
from pants.backend.docker.docker_build_context import rules as context_rules
from pants.backend.docker.parser import rules as parser_rules


def rules():
return [
*binary_rules(),
*build_rules(),
*context_rules(),
*dependencies_rules(),
*parser_rules(),
]