diff --git a/.bazelci/presubmit.yml b/.bazelci/presubmit.yml index 99a3bd7c9..19fe26aef 100644 --- a/.bazelci/presubmit.yml +++ b/.bazelci/presubmit.yml @@ -49,6 +49,8 @@ platforms: - "//tests/docker/util:test_container_commit_rule" - "//tests/docker/util:test_container_extract_rule" - "//tests/docker/util:test_extract_script" + - "//tests/docker/util:test_container_commit_layer_rule" + - "//tests/docker/util:test_container_commit_layer_metadata" # Disabled tests that do not run in BuildKite CI. # The targets listed below is not an exhaustive list of disabled targets @@ -114,6 +116,8 @@ platforms: - "//tests/docker/util:test_container_commit_rule" - "//tests/docker/util:test_container_extract_rule" - "//tests/docker/util:test_extract_script" + - "//tests/docker/util:test_container_commit_layer_rule" + - "//tests/docker/util:test_container_commit_layer_metadata" # Disabled tests that do not run in BuildKite CI. @@ -186,6 +190,7 @@ platforms: - "//tests/docker/util:test_container_commit_rule" - "//tests/docker/util:test_container_extract_rule" - "//tests/docker/util:test_extract_script" + - "//tests/docker/util:test_container_commit_layer_rule" # Disabled e2e tests that pull from localhost in nested workspace - "-//testing/new_pusher_tests/..." test_flags: diff --git a/contrib/BUILD b/contrib/BUILD index ee0ee2c69..ec93da8f7 100644 --- a/contrib/BUILD +++ b/contrib/BUILD @@ -44,6 +44,13 @@ py_library( srcs_version = "PY2AND3", ) +py_binary( + name = "extract_last_layer", + srcs = [":extract_last_layer.py"], + legacy_create_init = False, + python_version = "PY3", +) + py_binary( name = "compare_ids_test", srcs = [":compare_ids_test.py"], diff --git a/contrib/extract_last_layer.py b/contrib/extract_last_layer.py new file mode 100755 index 000000000..9efbd25e3 --- /dev/null +++ b/contrib/extract_last_layer.py @@ -0,0 +1,100 @@ +# Copyright 2020 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Extracts the last layer of a docker image out of an image tarball + +Takes three arguments: the path to the image tarball, the output file for the layer, and the output file for the layer diffID +""" + + +from __future__ import print_function +from json import JSONDecoder +import hashlib +import sys +import tarfile + + +def extract_last_layer(tar_path, layer_path, diffid_path): + """Extracts the last layer from a docker image from an image tarball + + Args: + tar_path: str path to the tarball + layer_path: str path for the output layer + diffid_path: str path for the layer diff ID + + + Returns: + str the diff ID of the layer + + """ + tar = tarfile.open(tar_path, mode="r") + + decoder = JSONDecoder() + try: + # Extracts it as a file object (not to the disk) + manifest = tar.extractfile("manifest.json").read().decode("utf-8") + except Exception as e: + print(( + "Unable to extract manifest.json, make sure {} " + "is a valid docker image.\n").format(tar_path), + e, + file=sys.stderr) + exit(1) + + # Get the manifest dictionary from JSON + manifest = decoder.decode(manifest)[0] + + # Get the last layer tar path + layers = manifest["Layers"] + + last_layer_path = layers[-1] + + layer_id = last_layer_path.split("/")[0] + + # Hash the layer as we extract it + diff_id = hashlib.sha256() + + try: + # Extract the layer from the image to the output path + last_layer = tar.extractfile(last_layer_path) + with open(layer_path, "wb") as f: + # Extract in blocks, to avoid loading the entire layer in memory + while True: + buf = last_layer.read(4096) + if buf: + diff_id.update(buf) + f.write(buf) + else: + break + except Exception as e: + print(( + "Unable to extract last layer {} to {}, make sure {} " + "is a valid docker image and that the layer path is writable\n").format(layer_id, layer_path, tar_path), + e, + file=sys.stderr) + exit(1) + + # Output the diff ID hash + diff_id_digest = diff_id.hexdigest() + try: + with open(diffid_path, "w") as f: + f.write(diff_id_digest) + except Exception as e: + print("Unable to write layer Diff ID {} to {}, make sure the path is writeable\n".format(diff_id_digest, diffid_path), e, file=sys.stderr) + exit(1) + + return layer_id + + +if __name__ == "__main__": + print(extract_last_layer(sys.argv[1], sys.argv[2], sys.argv[3])) diff --git a/docker/util/BUILD b/docker/util/BUILD index 46f6dc6df..1c230a5cd 100644 --- a/docker/util/BUILD +++ b/docker/util/BUILD @@ -34,6 +34,7 @@ exports_files([ "commit.sh.tpl", "extract.sh.tpl", "image_util.sh.tpl", + "commit_layer.sh.tpl", ]) bzl_library( diff --git a/docker/util/README.md b/docker/util/README.md index 840c0c64d..24cb83883 100644 --- a/docker/util/README.md +++ b/docker/util/README.md @@ -11,6 +11,7 @@ properly via * [container_run_and_commit](#container_run_and_commit) * [container_run_and_extract](#container_run_and_extract) +* [container_run_and_commit_layer](#container_run_and_commit_layer) ## container_run_and_commit @@ -137,3 +138,79 @@ bazel-out directory. + + +## container_run_and_commit_layer + +
+container_run_and_commit_layer(name, commands, docker_run_flags, image, env)
+
+ +This rule runs a set of commands in a given image, waits for the commands +to finish, and then outputs the difference to a tarball, similar to `container_layer`. The output can be used in the `layers` attribute of `container_image`. + +### Attributes + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
name + Name; required +

+ A unique name for this target. +

+
commands + List of strings; required +

+ A list of commands to run (sequentially) inside `sh` in the container. If the base image uses a non-standard entrypoint, you may need to use `docker_run_flags` to change the entrypoint to a shell. +

+
docker_run_flags + List of strings; optional +

+ Extra flags to pass to the docker run command. You may want to use this to override the `entrypoint` for images with a non-standard entrypoint with `["--entrypoint=''"]`. These flags only apply to the build step of this rule, and do not affect the output layer. That is, if you change the entrypoint here, and use the layer in a `container_image` later, the entrypoint of that image will not be changed. +

+
image + Label; required +

+ The image to run the commands in. +

+
env + Dictionary from strings to strings, optional +

Dictionary + from environment variable names to their values when running the + Docker image.

+

+ + env = { + "FOO": "bar", + ... + }, + +

+

The values of this field support make variables (e.g., $(FOO)) and stamp variables; keys support make variables as well.

+
+ diff --git a/docker/util/commit_layer.sh.tpl b/docker/util/commit_layer.sh.tpl new file mode 100644 index 000000000..e0c826f02 --- /dev/null +++ b/docker/util/commit_layer.sh.tpl @@ -0,0 +1,40 @@ +#!/bin/bash + +set -ex + +# Load utils +source %{util_script} + +# Resolve the docker tool path +DOCKER="%{docker_tool_path}" +DOCKER_FLAGS="%{docker_flags}" + +if [[ -z "$DOCKER" ]]; then + echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?" + exit 1 +fi + +# Load the image and remember its name +image_id=$(%{image_id_extractor_path} %{image_tar}) +$DOCKER $DOCKER_FLAGS load -i %{image_tar} + +id=$($DOCKER $DOCKER_FLAGS run -d %{docker_run_flags} --env-file %{env_file_path} $image_id %{commands}) +# Actually wait for the container to finish running its commands +retcode=$($DOCKER $DOCKER_FLAGS wait $id) +# Trigger a failure if the run had a non-zero exit status +if [ $retcode != 0 ]; then + $DOCKER $DOCKER_FLAGS logs $id && false +fi +OUTPUT_IMAGE_TAR="%{output_layer_tar}.image.tar" +reset_cmd $image_id $id %{output_image} +$DOCKER $DOCKER_FLAGS save %{output_image} -o $OUTPUT_IMAGE_TAR +# Delete the container and the intermediate image +$DOCKER $DOCKER_FLAGS rm $id +$DOCKER $DOCKER_FLAGS rmi %{output_image} + +# Extract the last layer from the image - this will be the layer generated by $DOCKER commit +%{image_last_layer_extractor_path} $OUTPUT_IMAGE_TAR %{output_layer_tar} %{output_diff_id} + +# Delete the intermediate tar +rm $OUTPUT_IMAGE_TAR + diff --git a/docker/util/run.bzl b/docker/util/run.bzl index d745a3d19..fbc51a821 100644 --- a/docker/util/run.bzl +++ b/docker/util/run.bzl @@ -18,6 +18,18 @@ to new container image, or extract specified targets to a directory on the host machine. """ +load("@bazel_skylib//lib:dicts.bzl", "dicts") +load( + "@bazel_tools//tools/build_defs/hash:hash.bzl", + _hash_tools = "tools", +) +load("@io_bazel_rules_docker//container:layer.bzl", "zip_layer") +load("@io_bazel_rules_docker//container:providers.bzl", "LayerInfo") +load( + "//skylib:zip.bzl", + _zip_tools = "tools", +) + def _extract_impl( ctx, name = "", @@ -277,6 +289,186 @@ commit = struct( implementation = _commit_impl, ) +def _commit_layer_impl( + ctx, + name = None, + image = None, + commands = None, + docker_run_flags = None, + env = None, + compression = None, + compression_options = None, + output_layer_tar = None): + """Implementation for the container_run_and_commit_layer rule. + + This rule runs a set of commands in a given image, waits for the commands + to finish, and then extracts the layer of changes into a new container_layer target. + + Args: + ctx: The bazel rule context + name: A unique name for this rule. + image: The input image tarball + commands: The commands to run in the input image container + docker_run_flags: String list, overrides ctx.attr.docker_run_flags + env: str Dict, overrides ctx.attr.env + compression: str, overrides ctx.attr.compression + compression_options: str list, overrides ctx.attr.compression_options + output_layer_tar: The output layer obtained as a result of running + the commands on the input image + """ + + name = name or ctx.attr.name + image = image or ctx.file.image + commands = commands or ctx.attr.commands + docker_run_flags = docker_run_flags or ctx.attr.docker_run_flags + env = env or ctx.attr.env + script = ctx.actions.declare_file(name + ".build") + compression = compression or ctx.attr.compression + compression_options = compression_options or ctx.attr.compression_options + output_layer_tar = output_layer_tar or ctx.outputs.layer + + toolchain_info = ctx.toolchains["@io_bazel_rules_docker//toolchains/docker:toolchain_type"].info + + # Generate a shell script to execute the reset cmd + image_utils = ctx.actions.declare_file("image_util.sh") + ctx.actions.expand_template( + template = ctx.file._image_utils_tpl, + output = image_utils, + substitutions = { + "%{docker_flags}": " ".join(toolchain_info.docker_flags), + "%{docker_tool_path}": toolchain_info.tool_path, + }, + is_executable = True, + ) + + docker_env = [ + "{}={}".format( + ctx.expand_make_variables("env", key, {}), + ctx.expand_make_variables("env", value, {}), + ) + for key, value in env.items() + ] + + env_file = ctx.actions.declare_file(name + ".env") + ctx.actions.write(env_file, "\n".join(docker_env)) + + output_diff_id = ctx.actions.declare_file(output_layer_tar.basename + ".sha256") + + # Generate a shell script to execute the run statement and extract the layer + ctx.actions.expand_template( + template = ctx.file._run_tpl, + output = script, + substitutions = { + "%{commands}": _process_commands(commands), + "%{docker_flags}": " ".join(toolchain_info.docker_flags), + "%{docker_run_flags}": " ".join(docker_run_flags), + "%{docker_tool_path}": toolchain_info.tool_path, + "%{env_file_path}": env_file.path, + "%{image_id_extractor_path}": ctx.executable._extract_image_id.path, + "%{image_last_layer_extractor_path}": ctx.executable._last_layer_extractor_tool.path, + "%{image_tar}": image.path, + "%{output_diff_id}": output_diff_id.path, + "%{output_image}": "bazel/%s:%s" % ( + ctx.label.package or "default", + name, + ), + "%{output_layer_tar}": output_layer_tar.path, + "%{util_script}": image_utils.path, + }, + is_executable = True, + ) + + runfiles = [image, image_utils, env_file] + + ctx.actions.run( + outputs = [output_layer_tar, output_diff_id], + inputs = runfiles, + executable = script, + tools = [ctx.executable._extract_image_id, ctx.executable._last_layer_extractor_tool], + use_default_shell_env = True, + ) + + # Generate a zipped layer and calculate the blob sum, this is for LayerInfo + zipped_layer, blob_sum = zip_layer( + ctx, + output_layer_tar, + compression = compression, + compression_options = compression_options, + ) + + return [ + LayerInfo( + unzipped_layer = output_layer_tar, + diff_id = output_diff_id, + zipped_layer = zipped_layer, + blob_sum = blob_sum, + env = env, + ), + ] + +_commit_layer_attrs = dicts.add({ + "commands": attr.string_list( + doc = "A list of commands to run (sequentially) in the container.", + mandatory = True, + allow_empty = False, + ), + "compression": attr.string(default = "gzip"), + "compression_options": attr.string_list(), + "docker_run_flags": attr.string_list( + doc = "Extra flags to pass to the docker run command.", + mandatory = False, + ), + "env": attr.string_dict(), + "image": attr.label( + doc = "The image to run the commands in.", + mandatory = True, + allow_single_file = True, + cfg = "target", + ), + "_extract_image_id": attr.label( + default = Label("//contrib:extract_image_id"), + cfg = "host", + executable = True, + allow_files = True, + ), + "_image_utils_tpl": attr.label( + default = "//docker/util:image_util.sh.tpl", + allow_single_file = True, + ), + "_last_layer_extractor_tool": attr.label( + default = Label("//contrib:extract_last_layer"), + cfg = "host", + executable = True, + allow_files = True, + ), + "_run_tpl": attr.label( + default = Label("//docker/util:commit_layer.sh.tpl"), + allow_single_file = True, + ), +}, _hash_tools, _zip_tools) + +_commit_layer_outputs = { + "layer": "%{name}-layer.tar", +} + +container_run_and_commit_layer = rule( + attrs = _commit_layer_attrs, + doc = ("This rule runs a set of commands in a given image, waits" + + "for the commands to finish, and then commits the" + + "container state to a new layer."), + executable = False, + outputs = _commit_layer_outputs, + implementation = _commit_layer_impl, + toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"], +) + +# Export container_run_and_commit_layer rule for other bazel rules to depend on. +commit_layer = struct( + attrs = _commit_layer_attrs, + outputs = _commit_layer_outputs, + implementation = _commit_layer_impl, +) + def _process_commands(command_list): # Use the $ to allow escape characters in string return 'sh -c $\"{0}\"'.format(" && ".join(command_list)) diff --git a/tests/docker/util/BUILD b/tests/docker/util/BUILD index 61395f23a..40a6b5776 100644 --- a/tests/docker/util/BUILD +++ b/tests/docker/util/BUILD @@ -28,6 +28,7 @@ load("//contrib:test.bzl", "container_test") load( "//docker/util:run.bzl", "container_run_and_commit", + "container_run_and_commit_layer", "container_run_and_extract", ) @@ -95,3 +96,33 @@ container_test( configs = [":container_commit.yaml"], image = ":test_container_commit_image", ) + +container_run_and_commit_layer( + name = "test_container_commit_layer", + commands = ["touch /foo.txt"], + docker_run_flags = [ + "-u", + "root", + ], + image = "@debian_base//image", +) + +rule_test( + name = "test_container_commit_layer_rule", + generates = [ + "test_container_commit_layer-layer.tar", + ], + rule = "test_container_commit_layer", +) + +container_image( + name = "test_container_commit_layer_image", + base = "@debian_base//image", + layers = [":test_container_commit_layer"], +) + +container_test( + name = "test_container_commit_layer_metadata", + configs = [":container_commit_layer.yaml"], + image = ":test_container_commit_layer_image", +) diff --git a/tests/docker/util/container_commit_layer.yaml b/tests/docker/util/container_commit_layer.yaml new file mode 100644 index 000000000..25dfe726d --- /dev/null +++ b/tests/docker/util/container_commit_layer.yaml @@ -0,0 +1,6 @@ +schemaVersion: 2.0.0 + +fileExistenceTests: +- name: 'foo.txt' + path: '/' + shouldExist: true