Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add container_run_and_commit_layer rule #1586

Merged
merged 15 commits into from
Sep 23, 2020
5 changes: 5 additions & 0 deletions .bazelci/presubmit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ platforms:
- "//tests/docker/util:test_container_commit_rule"
- "//tests/docker/util:test_container_extract_rule"
- "//tests/docker/util:test_extract_script"
- "//tests/docker/util:test_container_commit_layer_rule"
- "//tests/docker/util:test_container_commit_layer_metadata"

# Disabled tests that do not run in BuildKite CI.
# The targets listed below is not an exhaustive list of disabled targets
Expand Down Expand Up @@ -114,6 +116,8 @@ platforms:
- "//tests/docker/util:test_container_commit_rule"
- "//tests/docker/util:test_container_extract_rule"
- "//tests/docker/util:test_extract_script"
- "//tests/docker/util:test_container_commit_layer_rule"
- "//tests/docker/util:test_container_commit_layer_metadata"


# Disabled tests that do not run in BuildKite CI.
Expand Down Expand Up @@ -186,6 +190,7 @@ platforms:
- "//tests/docker/util:test_container_commit_rule"
- "//tests/docker/util:test_container_extract_rule"
- "//tests/docker/util:test_extract_script"
- "//tests/docker/util:test_container_commit_layer_rule"
# Disabled e2e tests that pull from localhost in nested workspace
- "-//testing/new_pusher_tests/..."
test_flags:
Expand Down
7 changes: 7 additions & 0 deletions contrib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ py_library(
srcs_version = "PY2AND3",
)

py_binary(
name = "extract_last_layer",
srcs = [":extract_last_layer.py"],
legacy_create_init = False,
python_version = "PY3",
)

py_binary(
name = "compare_ids_test",
srcs = [":compare_ids_test.py"],
Expand Down
100 changes: 100 additions & 0 deletions contrib/extract_last_layer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright 2020 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Extracts the last layer of a docker image out of an image tarball

Takes three arguments: the path to the image tarball, the output file for the layer, and the output file for the layer diffID
"""


from __future__ import print_function
from json import JSONDecoder
import hashlib
import sys
import tarfile


def extract_last_layer(tar_path, layer_path, diffid_path):
"""Extracts the last layer from a docker image from an image tarball

Args:
tar_path: str path to the tarball
layer_path: str path for the output layer
diffid_path: str path for the layer diff ID


Returns:
str the diff ID of the layer

"""
tar = tarfile.open(tar_path, mode="r")

decoder = JSONDecoder()
try:
# Extracts it as a file object (not to the disk)
manifest = tar.extractfile("manifest.json").read().decode("utf-8")
except Exception as e:
print((
"Unable to extract manifest.json, make sure {} "
"is a valid docker image.\n").format(tar_path),
e,
file=sys.stderr)
exit(1)

# Get the manifest dictionary from JSON
manifest = decoder.decode(manifest)[0]

# Get the last layer tar path
layers = manifest["Layers"]

last_layer_path = layers[-1]

layer_id = last_layer_path.split("/")[0]

# Hash the layer as we extract it
diff_id = hashlib.sha256()

try:
# Extract the layer from the image to the output path
last_layer = tar.extractfile(last_layer_path)
with open(layer_path, "wb") as f:
# Extract in blocks, to avoid loading the entire layer in memory
while True:
buf = last_layer.read(4096)
if buf:
diff_id.update(buf)
f.write(buf)
else:
break
except Exception as e:
print((
"Unable to extract last layer {} to {}, make sure {} "
"is a valid docker image and that the layer path is writable\n").format(layer_id, layer_path, tar_path),
e,
file=sys.stderr)
exit(1)

# Output the diff ID hash
diff_id_digest = diff_id.hexdigest()
try:
with open(diffid_path, "w") as f:
f.write(diff_id_digest)
except Exception as e:
print("Unable to write layer Diff ID {} to {}, make sure the path is writeable\n".format(diff_id_digest, diffid_path), e, file=sys.stderr)
exit(1)

return layer_id


if __name__ == "__main__":
print(extract_last_layer(sys.argv[1], sys.argv[2], sys.argv[3]))
1 change: 1 addition & 0 deletions docker/util/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ exports_files([
"commit.sh.tpl",
"extract.sh.tpl",
"image_util.sh.tpl",
"commit_layer.sh.tpl",
])

bzl_library(
Expand Down
77 changes: 77 additions & 0 deletions docker/util/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ properly via

* [container_run_and_commit](#container_run_and_commit)
* [container_run_and_extract](#container_run_and_extract)
* [container_run_and_commit_layer](#container_run_and_commit_layer)

## container_run_and_commit

Expand Down Expand Up @@ -137,3 +138,79 @@ bazel-out directory.
</tbody>
</table>

<a name="#container_run_and_commit_layer"></a>

## container_run_and_commit_layer

<pre>
container_run_and_commit_layer(<a href="#container_run_and_commit_layer-name">name</a>, <a href="#container_run_and_commit_layer-commands">commands</a>, <a href="#container_run_and_commit_layer-docker_run_flags">docker_run_flags</a>, <a href="#container_run_and_commit_layer-image">image</a>, <a href="#container_run_and_commit_layer-env">env</a>)
</pre>

This rule runs a set of commands in a given image, waits for the commands
to finish, and then outputs the difference to a tarball, similar to <a href="/README.md#container_layer">`container_layer`</a>. The output can be used in the `layers` attribute of <a href="/README.md#container_image">`container_image`</a>.

### Attributes

<table class="params-table">
<colgroup>
<col class="col-param" />
<col class="col-description" />
</colgroup>
<tbody>
<tr id="container_run_and_commit_layer-name">
<td><code>name</code></td>
<td>
<a href="https://bazel.build/docs/build-ref.html#name">Name</a>; required
<p>
A unique name for this target.
</p>
</td>
</tr>
<tr id="container_run_and_commit_layer-commands">
<td><code>commands</code></td>
<td>
List of strings; required
<p>
A list of commands to run (sequentially) inside `sh` in the container. If the base image uses a non-standard entrypoint, you may need to use `docker_run_flags` to change the entrypoint to a shell.
</p>
</td>
</tr>
<tr id="container_run_and_commit_layer-docker_run_flags">
<td><code>docker_run_flags</code></td>
<td>
List of strings; optional
<p>
Extra flags to pass to the docker run command. You may want to use this to override the `entrypoint` for images with a non-standard entrypoint with `["--entrypoint=''"]`. These flags only apply to the build step of this rule, and do not affect the output layer. That is, if you change the entrypoint here, and use the layer in a `container_image` later, the entrypoint of that image will not be changed.
</p>
</td>
</tr>
<tr id="container_run_and_commit_layer-image">
<td><code>image</code></td>
<td>
<a href="https://bazel.build/docs/build-ref.html#labels">Label</a>; required
<p>
The image to run the commands in.
</p>
</td>
</tr>
<tr id="container_run_and_commit_layer-env">
<td><code>env</code></td>
<td>
<code>Dictionary from strings to strings, optional</code>
<p><a href="https://docs.docker.com/engine/reference/builder/#env">Dictionary
from environment variable names to their values when running the
Docker image.</a></p>
<p>
<code>
env = {
"FOO": "bar",
...
},
</code>
</p>
<p>The values of this field support make variables (e.g., <code>$(FOO)</code>) and stamp variables; keys support make variables as well.</p>
</td>
</tr>
</tbody>
</table>

40 changes: 40 additions & 0 deletions docker/util/commit_layer.sh.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

set -ex

# Load utils
source %{util_script}

# Resolve the docker tool path
DOCKER="%{docker_tool_path}"
DOCKER_FLAGS="%{docker_flags}"

if [[ -z "$DOCKER" ]]; then
echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?"
exit 1
fi

# Load the image and remember its name
image_id=$(%{image_id_extractor_path} %{image_tar})
$DOCKER $DOCKER_FLAGS load -i %{image_tar}

id=$($DOCKER $DOCKER_FLAGS run -d %{docker_run_flags} --env-file %{env_file_path} $image_id %{commands})
# Actually wait for the container to finish running its commands
retcode=$($DOCKER $DOCKER_FLAGS wait $id)
# Trigger a failure if the run had a non-zero exit status
if [ $retcode != 0 ]; then
$DOCKER $DOCKER_FLAGS logs $id && false
fi
OUTPUT_IMAGE_TAR="%{output_layer_tar}.image.tar"
reset_cmd $image_id $id %{output_image}
$DOCKER $DOCKER_FLAGS save %{output_image} -o $OUTPUT_IMAGE_TAR
# Delete the container and the intermediate image
$DOCKER $DOCKER_FLAGS rm $id
$DOCKER $DOCKER_FLAGS rmi %{output_image}

# Extract the last layer from the image - this will be the layer generated by $DOCKER commit
%{image_last_layer_extractor_path} $OUTPUT_IMAGE_TAR %{output_layer_tar} %{output_diff_id}

# Delete the intermediate tar
rm $OUTPUT_IMAGE_TAR

Loading