bazelbuild · alex1545 · Sep 23, 2020 · Aug 8, 2020 · Aug 8, 2020 · Aug 8, 2020
@@ -49,6 +49,8 @@ platforms:
     - "//tests/docker/util:test_container_commit_rule"
     - "//tests/docker/util:test_container_extract_rule"
     - "//tests/docker/util:test_extract_script"
+    - "//tests/docker/util:test_container_commit_layer_rule"
+    - "//tests/docker/util:test_container_commit_layer_metadata"
 
     # Disabled tests that do not run in BuildKite CI.
     # The targets listed below is not an exhaustive list of disabled targets
@@ -114,6 +116,8 @@ platforms:
     - "//tests/docker/util:test_container_commit_rule"
     - "//tests/docker/util:test_container_extract_rule"
     - "//tests/docker/util:test_extract_script"
+    - "//tests/docker/util:test_container_commit_layer_rule"
+    - "//tests/docker/util:test_container_commit_layer_metadata"
 
 
     # Disabled tests that do not run in BuildKite CI.
@@ -186,6 +190,7 @@ platforms:
     - "//tests/docker/util:test_container_commit_rule"
     - "//tests/docker/util:test_container_extract_rule"
     - "//tests/docker/util:test_extract_script"
+    - "//tests/docker/util:test_container_commit_layer_rule"
     # Disabled e2e tests that pull from localhost in nested workspace
     - "-//testing/new_pusher_tests/..."
     test_flags:

@@ -44,6 +44,13 @@ py_library(
     srcs_version = "PY2AND3",
 )
 
+py_binary(
+    name = "extract_last_layer",
+    srcs = [":extract_last_layer.py"],
+    legacy_create_init = False,
+    python_version = "PY3",
+)
+
 py_binary(
     name = "compare_ids_test",
     srcs = [":compare_ids_test.py"],

@@ -0,0 +1,100 @@
+# Copyright 2020 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Extracts the last layer of a docker image out of an image tarball
+
+Takes three arguments: the path to the image tarball, the output file for the layer, and the output file for the layer diffID
+"""
+
+
+from __future__ import print_function
+from json import JSONDecoder
+import hashlib
+import sys
+import tarfile
+
+
+def extract_last_layer(tar_path, layer_path, diffid_path):
+  """Extracts the last layer from a docker image from an image tarball
+
+  Args:
+    tar_path: str path to the tarball
+    layer_path: str path for the output layer
+    diffid_path: str path for the layer diff ID
+
+
+  Returns:
+    str the diff ID of the layer
+
+  """
+  tar = tarfile.open(tar_path, mode="r")
+
+  decoder = JSONDecoder()
+  try:
+    # Extracts it as a file object (not to the disk)
+    manifest = tar.extractfile("manifest.json").read().decode("utf-8")
+  except Exception as e:
+    print((
+        "Unable to extract manifest.json, make sure {} "
+        "is a valid docker image.\n").format(tar_path),
+          e,
+          file=sys.stderr)
+    exit(1)
+
+  # Get the manifest dictionary from JSON
+  manifest = decoder.decode(manifest)[0]
+
+  # Get the last layer tar path
+  layers = manifest["Layers"]
+
+  last_layer_path = layers[-1]
+
+  layer_id = last_layer_path.split("/")[0]
+
+  # Hash the layer as we extract it
+  diff_id = hashlib.sha256()
+
+  try:
+    # Extract the layer from the image to the output path
+    last_layer = tar.extractfile(last_layer_path)
+    with open(layer_path, "wb") as f:
+      # Extract in blocks, to avoid loading the entire layer in memory
+      while True:
+        buf = last_layer.read(4096)
+        if buf:
+          diff_id.update(buf)
+          f.write(buf)
+        else:
+          break
+  except Exception as e:
+    print((
+        "Unable to extract last layer {} to {}, make sure {} "
+        "is a valid docker image and that the layer path is writable\n").format(layer_id, layer_path, tar_path),
+          e,
+          file=sys.stderr)
+    exit(1)
+
+  # Output the diff ID hash
+  diff_id_digest = diff_id.hexdigest()
+  try:
+    with open(diffid_path, "w") as f:
+      f.write(diff_id_digest)
+  except Exception as e:
+    print("Unable to write layer Diff ID {} to {}, make sure the path is writeable\n".format(diff_id_digest, diffid_path), e, file=sys.stderr)
+    exit(1)
+
+  return layer_id
+
+
+if __name__ == "__main__":
+  print(extract_last_layer(sys.argv[1], sys.argv[2], sys.argv[3]))
@@ -34,6 +34,7 @@ exports_files([
     "commit.sh.tpl",
     "extract.sh.tpl",
     "image_util.sh.tpl",
+    "commit_layer.sh.tpl",
 ])
 
 bzl_library(

@@ -11,6 +11,7 @@ properly via
 
 * [container_run_and_commit](#container_run_and_commit)
 * [container_run_and_extract](#container_run_and_extract)
+* [container_run_and_commit_layer](#container_run_and_commit_layer)
 
 ## container_run_and_commit
 
@@ -137,3 +138,79 @@ bazel-out directory.
   </tbody>
 </table>
 
+<a name="#container_run_and_commit_layer"></a>
+
+## container_run_and_commit_layer
+
+<pre>
+container_run_and_commit_layer(<a href="#container_run_and_commit_layer-name">name</a>, <a href="#container_run_and_commit_layer-commands">commands</a>, <a href="#container_run_and_commit_layer-docker_run_flags">docker_run_flags</a>, <a href="#container_run_and_commit_layer-image">image</a>, <a href="#container_run_and_commit_layer-env">env</a>)
+</pre>
+
+This rule runs a set of commands in a given image, waits for the commands
+to finish, and then outputs the difference to a tarball, similar to <a href="/README.md#container_layer">`container_layer`</a>. The output can be used in the `layers` attribute of <a href="/README.md#container_image">`container_image`</a>.
+
+### Attributes
+
+<table class="params-table">
+  <colgroup>
+    <col class="col-param" />
+    <col class="col-description" />
+  </colgroup>
+  <tbody>
+    <tr id="container_run_and_commit_layer-name">
+      <td><code>name</code></td>
+      <td>
+        <a href="https://bazel.build/docs/build-ref.html#name">Name</a>; required
+        <p>
+          A unique name for this target.
+        </p>
+      </td>
+    </tr>
+    <tr id="container_run_and_commit_layer-commands">
+      <td><code>commands</code></td>
+      <td>
+        List of strings; required
+        <p>
+          A list of commands to run (sequentially) inside `sh` in the container. If the base image uses a non-standard entrypoint, you may need to use `docker_run_flags` to change the entrypoint to a shell.
+        </p>
+      </td>
+    </tr>
+    <tr id="container_run_and_commit_layer-docker_run_flags">
+      <td><code>docker_run_flags</code></td>
+      <td>
+        List of strings; optional
+        <p>
+          Extra flags to pass to the docker run command. You may want to use this to override the `entrypoint` for images with a non-standard entrypoint with `["--entrypoint=''"]`. These flags only apply to the build step of this rule, and do not affect the output layer. That is, if you change the entrypoint here, and use the layer in a `container_image` later, the entrypoint of that image will not be changed.
+        </p>
+      </td>
+    </tr>
+    <tr id="container_run_and_commit_layer-image">
+      <td><code>image</code></td>
+      <td>
+        <a href="https://bazel.build/docs/build-ref.html#labels">Label</a>; required
+        <p>
+          The image to run the commands in.
+        </p>
+      </td>
+    </tr>
+    <tr id="container_run_and_commit_layer-env">
+      <td><code>env</code></td>
+      <td>
+        <code>Dictionary from strings to strings, optional</code>
+        <p><a href="https://docs.docker.com/engine/reference/builder/#env">Dictionary
+               from environment variable names to their values when running the
+               Docker image.</a></p>
+        <p>
+          <code>
+          env = {
+            "FOO": "bar",
+            ...
+          },
+          </code>
+        </p>
+        <p>The values of this field support make variables (e.g., <code>$(FOO)</code>) and stamp variables; keys support make variables as well.</p>
+      </td>
+    </tr>
+  </tbody>
+</table>
+
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+set -ex
+
+# Load utils
+source %{util_script}
+
+# Resolve the docker tool path
+DOCKER="%{docker_tool_path}"
+DOCKER_FLAGS="%{docker_flags}"
+
+if [[ -z "$DOCKER" ]]; then
+    echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?"
+    exit 1
+fi
+
+# Load the image and remember its name
+image_id=$(%{image_id_extractor_path} %{image_tar})
+$DOCKER $DOCKER_FLAGS load -i %{image_tar}
+
+id=$($DOCKER $DOCKER_FLAGS run -d %{docker_run_flags} --env-file %{env_file_path} $image_id %{commands})
+# Actually wait for the container to finish running its commands
+retcode=$($DOCKER $DOCKER_FLAGS wait $id)
+# Trigger a failure if the run had a non-zero exit status
+if [ $retcode != 0 ]; then
+  $DOCKER $DOCKER_FLAGS logs $id && false
+fi
+OUTPUT_IMAGE_TAR="%{output_layer_tar}.image.tar"
+reset_cmd $image_id $id %{output_image}
+$DOCKER $DOCKER_FLAGS save %{output_image} -o $OUTPUT_IMAGE_TAR
+# Delete the container and the intermediate image
+$DOCKER $DOCKER_FLAGS rm $id
+$DOCKER $DOCKER_FLAGS rmi %{output_image}
+
+# Extract the last layer from the image - this will be the layer generated by $DOCKER commit
+%{image_last_layer_extractor_path} $OUTPUT_IMAGE_TAR %{output_layer_tar} %{output_diff_id}
+
+# Delete the intermediate tar
+rm $OUTPUT_IMAGE_TAR
+