From 768137526d201789c2391eb331231d945ad73e6c Mon Sep 17 00:00:00 2001 From: Kayla Date: Mon, 12 Nov 2018 18:05:07 -0800 Subject: [PATCH 1/2] Save manifest json and digest and allow specifying platform when dowmloading images. --- README.md | 25 +++++++++++------------ client/v1/docker_session_.py | 2 +- client/v2/docker_http_.py | 13 ++++++++++++ client/v2_2/docker_http_.py | 12 ++++++++++++ client/v2_2/docker_image_.py | 3 ++- client/v2_2/docker_session_.py | 2 +- client/v2_2/save_.py | 36 ++++++++++++++++++++++++---------- puller_test.sh | 29 ++++++++++++++++++++++++++- tools/docker_appender_.py | 13 ++++++------ tools/docker_puller_.py | 25 +++++++++++++---------- tools/docker_pusher_.py | 10 ++++------ tools/fast_importer_.py | 9 ++++----- tools/fast_puller_.py | 26 ++++++++++++++---------- tools/fast_pusher_.py | 7 ++----- 14 files changed, 143 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index 10111a100..6ec1a79a8 100755 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ $ bazel run @containerregistry//:puller.par -- --help ``` ``` -usage: puller.par [-h] [--name NAME] [--directory DIRECTORY] +usage: puller.par [-h] --name NAME --directory DIRECTORY [--platform PLATFORM] [--stderrthreshold STDERRTHRESHOLD] Pull images from a Docker Registry, faaaaast. @@ -44,6 +44,8 @@ optional arguments: Supports fully-qualified tag or digest references. --directory DIRECTORY Where to save the image's files. + --platform PLATFORM Which platform image to pull for multi-platform + manifest lists. Formatted as os/arch. --stderrthreshold STDERRTHRESHOLD Write log events at or above this level to stderr. ``` @@ -55,8 +57,8 @@ $ bazel run @containerregistry//:pusher.par -- --help ``` ``` -usage: pusher.par [-h] [--name NAME] [--tarball TARBALL] [--config CONFIG] - [--digest DIGEST] [--layer LAYER] +usage: pusher.par [-h] --name NAME [--tarball TARBALL] [--config CONFIG] + [--manifest MANIFEST] [--digest DIGEST] [--layer LAYER] [--stamp-info-file STAMP_INFO_FILE] [--oci] [--stderrthreshold STDERRTHRESHOLD] @@ -67,6 +69,7 @@ optional arguments: --name NAME The name of the docker image to push. --tarball TARBALL An optional legacy base image tarball. --config CONFIG The path to the file storing the image config. + --manifest MANIFEST The path to the file storing the image manifest. --digest DIGEST The list of layer digest filenames in order. --layer LAYER The list of layer filenames in order. --stamp-info-file STAMP_INFO_FILE @@ -84,9 +87,8 @@ $ bazel run @containerregistry//:importer.par -- --help ``` ``` -usage: importer.par [-h] [--tarball TARBALL] [--format {tar,tar.gz}] - [--directory DIRECTORY] - [--stderrthreshold STDERRTHRESHOLD] +usage: importer.par [-h] --tarball TARBALL [--format {tar,tar.gz}] --directory + DIRECTORY [--stderrthreshold STDERRTHRESHOLD] Import images from a tarball into our faaaaaast format. @@ -141,9 +143,8 @@ $ bazel run @containerregistry//:appender.par -- --help ``` ``` -usage: appender.par [-h] [--src-image SRC_IMAGE] [--tarball TARBALL] - [--dst-image DST_IMAGE] - [--stderrthreshold STDERRTHRESHOLD] +usage: appender.par [-h] --src-image SRC_IMAGE --tarball TARBALL --dst-image + DST_IMAGE [--stderrthreshold STDERRTHRESHOLD] Append tarballs to an image in a Docker Registry. @@ -156,7 +157,6 @@ optional arguments: The name of the new image. --stderrthreshold STDERRTHRESHOLD Write log events at or above this level to stderr. - ``` ## digester.par @@ -167,7 +167,8 @@ $ bazel run @containerregistry//:digester.par -- --help ``` usage: digester.par [-h] [--tarball TARBALL] --output-digest OUTPUT_DIGEST - [--config CONFIG] [--digest DIGEST] [--layer LAYER] [--oci] + [--config CONFIG] [--manifest MANIFEST] [--digest DIGEST] + [--layer LAYER] [--oci] [--stderrthreshold STDERRTHRESHOLD] Calculate digest for a container image. @@ -178,10 +179,10 @@ optional arguments: --output-digest OUTPUT_DIGEST Filename to store digest in. --config CONFIG The path to the file storing the image config. + --manifest MANIFEST The path to the file storing the image manifest. --digest DIGEST The list of layer digest filenames in order. --layer LAYER The list of layer filenames in order. --oci Image has an OCI Manifest. --stderrthreshold STDERRTHRESHOLD Write log events at or above this level to stderr. - ``` diff --git a/client/v1/docker_session_.py b/client/v1/docker_session_.py index 6d266d600..d9f852612 100755 --- a/client/v1/docker_session_.py +++ b/client/v1/docker_session_.py @@ -72,7 +72,7 @@ def __enter__(self): accepted_codes=[ six.moves.http_client.OK, six.moves.http_client.CREATED ], - body='[]') + body='[]') # pytype: disable=wrong-arg-types # The response should have an X-Docker-Token header, which # we should extract and annotate subsequent requests with: diff --git a/client/v2/docker_http_.py b/client/v2/docker_http_.py index 27ba60aa3..5945531d6 100755 --- a/client/v2/docker_http_.py +++ b/client/v2/docker_http_.py @@ -73,6 +73,13 @@ def detail(self): def _DiagnosticsFromContent(content): + """Extract and return the diagnostics from content.""" + try: + content = content.decode('utf8') + except: # pylint: disable=bare-except + # Assume it's already decoded. Defensive coding for old py2 habits that + # are hard to break. Passing does not make the problem worse. + pass try: o = json.loads(content) return [Diagnostic(d) for d in o.get('errors', [])] @@ -277,6 +284,12 @@ def _Refresh(self): raise TokenRefreshException('Bad status during token exchange: %d\n%s' % (resp.status, content)) + try: + content = content.decode('utf8') + except: # pylint: disable=bare-except + # Assume it's already decoded. Defensive coding for old py2 habits that + # are hard to break. Passing does not make the problem worse. + pass wrapper_object = json.loads(content) token = wrapper_object.get('token') or wrapper_object.get('access_token') _CheckState(token is not None, diff --git a/client/v2_2/docker_http_.py b/client/v2_2/docker_http_.py index da47a7d51..9fb14785f 100755 --- a/client/v2_2/docker_http_.py +++ b/client/v2_2/docker_http_.py @@ -103,8 +103,14 @@ def detail(self): def _DiagnosticsFromContent(content): + """Extract and return the diagnostics from content.""" try: content = content.decode('utf8') + except: # pylint: disable=bare-except + # Assume it's already decoded. Defensive coding for old py2 habits that + # are hard to break. Passing does not make the problem worse. + pass + try: o = json.loads(content) return [Diagnostic(d) for d in o.get('errors', [])] except: # pylint: disable=bare-except @@ -308,6 +314,12 @@ def _Refresh(self): raise TokenRefreshException('Bad status during token exchange: %d\n%s' % (resp.status, content)) + try: + content = content.decode('utf8') + except: # pylint: disable=bare-except + # Assume it's already decoded. Defensive coding for old py2 habits that + # are hard to break. Passing does not make the problem worse. + pass wrapper_object = json.loads(content) token = wrapper_object.get('token') or wrapper_object.get('access_token') _CheckState(token is not None, 'Malformed JSON response: %s' % content) diff --git a/client/v2_2/docker_image_.py b/client/v2_2/docker_image_.py index 53e2488b5..2025a5036 100755 --- a/client/v2_2/docker_image_.py +++ b/client/v2_2/docker_image_.py @@ -770,7 +770,8 @@ def uncompressed_layer(self, diff_id): if diff_id in self._uncompressed_layer_to_filename: with io.open(self._uncompressed_layer_to_filename[diff_id], u'rb') as reader: - return reader.read() + # TODO(b/118349036): Remove the disable once the pytype bug is fixed. + return reader.read() # pytype: disable=bad-return-type if self._legacy_base and diff_id in self._legacy_base.diff_ids(): return self._legacy_base.uncompressed_layer(diff_id) return super(FromDisk, self).uncompressed_layer(diff_id) diff --git a/client/v2_2/docker_session_.py b/client/v2_2/docker_session_.py index 54b7941ee..7aff6ae35 100755 --- a/client/v2_2/docker_session_.py +++ b/client/v2_2/docker_session_.py @@ -244,7 +244,7 @@ def _put_manifest( content_type=image.media_type(), accepted_codes=[ six.moves.http_client.OK, six.moves.http_client.CREATED, - six.moves.http_client.ACCEPTED + six.moves.http_client.ACCEPTED # pytype: disable=wrong-arg-types ]) def _start_upload(self, diff --git a/client/v2_2/save_.py b/client/v2_2/save_.py index b402db6cc..2e4c5368d 100755 --- a/client/v2_2/save_.py +++ b/client/v2_2/save_.py @@ -146,12 +146,14 @@ def fast(image, directory, After calling this, the following filesystem will exist: directory/ - config.json <-- only *.json, the image's config - 001.tar.gz <-- the first layer's .tar.gz filesystem delta - 001.sha256 <-- the sha256 of 1.tar.gz with a "sha256:" prefix. + config.json <-- only *.json, the image's config + digest <-- sha256 digest of the image's manifest + manifest.json <-- the image's manifest + 001.tar.gz <-- the first layer's .tar.gz filesystem delta + 001.sha256 <-- the sha256 of 1.tar.gz with a "sha256:" prefix. ... - N.tar.gz <-- the Nth layer's .tar.gz filesystem delta - N.sha256 <-- the sha256 of N.tar.gz with a "sha256:" prefix. + N.tar.gz <-- the Nth layer's .tar.gz filesystem delta + N.sha256 <-- the sha256 of N.tar.gz with a "sha256:" prefix. We pad layer indices to only 3 digits because of a known ceiling on the number of filesystem layers Docker supports. @@ -180,6 +182,12 @@ def write_file(name, accessor, 'unused') future_to_params[f] = config_file + executor.submit(write_file, os.path.join(directory, 'digest'), + lambda unused: image.digest(), 'unused') + executor.submit(write_file, os.path.join(directory, 'manifest.json'), + lambda unused: image.manifest().encode('utf8'), + 'unused') + idx = 0 layers = [] for blob in reversed(image.fs_layers()): @@ -214,12 +222,14 @@ def uncompressed(image, After calling this, the following filesystem will exist: directory/ - config.json <-- only *.json, the image's config - 001.tar <-- the first layer's .tar filesystem delta - 001.sha256 <-- the sha256 of 001.tar with a "sha256:" prefix. + config.json <-- only *.json, the image's config + digest <-- sha256 digest of the image's manifest + manifest.json <-- the image's manifest + 001.tar <-- the first layer's .tar filesystem delta + 001.sha256 <-- the sha256 of 001.tar with a "sha256:" prefix. ... - NNN.tar <-- the NNNth layer's .tar filesystem delta - NNN.sha256 <-- the sha256 of NNN.tar with a "sha256:" prefix. + NNN.tar <-- the NNNth layer's .tar filesystem delta + NNN.sha256 <-- the sha256 of NNN.tar with a "sha256:" prefix. We pad layer indices to only 3 digits because of a known ceiling on the number of filesystem layers Docker supports. @@ -248,6 +258,12 @@ def write_file(name, accessor, 'unused') future_to_params[f] = config_file + executor.submit(write_file, os.path.join(directory, 'digest'), + lambda unused: image.digest(), 'unused') + executor.submit(write_file, os.path.join(directory, 'manifest.json'), + lambda unused: image.manifest().encode('utf8'), + 'unused') + idx = 0 layers = [] for diff_id in reversed(image.diff_ids()): diff --git a/puller_test.sh b/puller_test.sh index 8f0ad7f0c..58d0347d3 100755 --- a/puller_test.sh +++ b/puller_test.sh @@ -17,7 +17,7 @@ # Unit tests for puller.par # Trick to chase the symlink before the docker build. -cp puller.par puller2.par +cp -f puller.par puller2.par # Test pulling an image by just invoking the puller function test_puller() { @@ -27,6 +27,26 @@ function test_puller() { puller.par --name="${image}" --directory=/tmp/ } +test_puller_multiplatform() { + local image=$1 + local platform=$2 + local expected_digest=$3 + + local tmpdir=$(mktemp -d) + + echo "TESTING: ${image} platform ${platform}" + + puller.par --name="${image}" --directory="${tmpdir}" --platform="${platform}" + + digest=$(cat "${tmpdir}/digest") + rm -rf "${tmpdir}" + + if [[ "${digest}" != "${expected_digest}" ]]; then + echo "Expected digest '${expected_digest}', got '${digest}'" + return 1 + fi +} + # Test pulling an image from inside a docker container with a # certain base / entrypoint function test_base() { @@ -88,4 +108,11 @@ test_image gcr.io/google-containers/pause@sha256:9ce5316f9752b8347484ab0f6778573 # Test pulling manifest list by digest, this should resolve to amd64/linux test_image index.docker.io/library/busybox@sha256:1669a6aa7350e1cdd28f972ddad5aceba2912f589f19a090ac75b7083da748db +# Test pulling manifest list explicitly specifying a platform +test_puller_multiplatform gcr.io/google-containers/pause:3.1 linux/amd64 \ + sha256:59eec8837a4d942cc19a52b8c09ea75121acc38114a2c68b98983ce9356b8610 + +test_puller_multiplatform gcr.io/google-containers/pause:3.1 linux/ppc64le \ + sha256:bcf9771c0b505e68c65440474179592ffdfa98790eb54ffbf129969c5e429990 + # TODO(user): Add an authenticated pull test. diff --git a/tools/docker_appender_.py b/tools/docker_appender_.py index 62a38b77c..910175147 100755 --- a/tools/docker_appender_.py +++ b/tools/docker_appender_.py @@ -37,12 +37,15 @@ parser.add_argument( '--src-image', action='store', - help=('The name of the docker image to append to.')) + help='The name of the docker image to append to.', + required=True) -parser.add_argument('--tarball', action='store', help='The tarball to append.') +parser.add_argument('--tarball', action='store', help='The tarball to append.', + required=True) parser.add_argument( - '--dst-image', action='store', help='The name of the new image.') + '--dst-image', action='store', help='The name of the new image.', + required=True) _THREADS = 8 @@ -52,10 +55,6 @@ def main(): args = parser.parse_args() logging_setup.Init(args=args) - if not args.src_image or not args.tarball or not args.dst_image: - raise Exception('--src-image, --dst-image and --tarball are required ' - 'arguments.') - transport = transport_pool.Http(httplib2.Http, size=_THREADS) # This library can support push-by-digest, but the likelihood of a user diff --git a/tools/docker_puller_.py b/tools/docker_puller_.py index 426dcfe75..f6e7101d0 100755 --- a/tools/docker_puller_.py +++ b/tools/docker_puller_.py @@ -43,16 +43,19 @@ '--name', action='store', help=('The name of the docker image to pull and save. ' - 'Supports fully-qualified tag or digest references.')) + 'Supports fully-qualified tag or digest references.'), + required=True) parser.add_argument( - '--tarball', action='store', help='Where to save the image tarball.') + '--tarball', action='store', help='Where to save the image tarball.', + required=True) -_DEFAULT_TAG = 'i-was-a-digest' - -_PROCESSOR_ARCHITECTURE = 'amd64' +parser.add_argument( + '--platform', action='store', default='linux/amd64', + help=('Which platform image to pull for multi-platform manifest lists. ' + 'Formatted as os/arch.')) -_OPERATING_SYSTEM = 'linux' +_DEFAULT_TAG = 'i-was-a-digest' # Today save.tarball expects a tag, which is emitted into one or more files @@ -79,10 +82,12 @@ def main(): args = parser.parse_args() logging_setup.Init(args=args) - if not args.name or not args.tarball: - logging.fatal('--name and --tarball are required arguments.') + if '/' not in args.platform: + logging.fatal('--platform must be specified in os/arch format.') sys.exit(1) + os, arch = args.platform.split('/', 1) + retry_factory = retry.Factory() retry_factory = retry_factory.WithSourceTransportCallable(httplib2.Http) transport = transport_pool.Http(retry_factory.Build, size=8) @@ -116,8 +121,8 @@ def main(): with image_list.FromRegistry(name, creds, transport) as img_list: if img_list.exists(): platform = image_list.Platform({ - 'architecture': _PROCESSOR_ARCHITECTURE, - 'os': _OPERATING_SYSTEM, + 'architecture': arch, + 'os': os, }) # pytype: disable=wrong-arg-types with img_list.resolve(platform) as default_child: diff --git a/tools/docker_pusher_.py b/tools/docker_pusher_.py index 1ecb29210..ea526b2df 100755 --- a/tools/docker_pusher_.py +++ b/tools/docker_pusher_.py @@ -38,10 +38,12 @@ description='Push images to a Docker Registry.') parser.add_argument( - '--name', action='store', help=('The name of the docker image to push.')) + '--name', action='store', help='The name of the docker image to push.', + required=True) parser.add_argument( - '--tarball', action='store', help='Where to load the image tarball.') + '--tarball', action='store', help='Where to load the image tarball.', + required=True) parser.add_argument( '--stamp-info-file', @@ -79,10 +81,6 @@ def main(): args = parser.parse_args() logging_setup.Init(args=args) - if not args.name or not args.tarball: - logging.fatal('--name and --tarball are required arguments.') - sys.exit(1) - retry_factory = retry.Factory() retry_factory = retry_factory.WithSourceTransportCallable(httplib2.Http) transport = transport_pool.Http(retry_factory.Build, size=_THREADS) diff --git a/tools/fast_importer_.py b/tools/fast_importer_.py index 33ca30001..ca64692a9 100755 --- a/tools/fast_importer_.py +++ b/tools/fast_importer_.py @@ -33,7 +33,8 @@ '--tarball', action='store', help=('The tarball containing the docker image to rewrite ' - 'into our fast on-disk format.')) + 'into our fast on-disk format.'), + required=True) parser.add_argument( '--format', @@ -43,7 +44,8 @@ help='The form in which to save layers.') parser.add_argument( - '--directory', action='store', help='Where to save the image\'s files.') + '--directory', action='store', help='Where to save the image\'s files.', + required=True) _THREADS = 32 @@ -53,9 +55,6 @@ def main(): args = parser.parse_args() logging_setup.Init(args=args) - if not args.tarball or not args.directory: - raise Exception('--tarball and --directory are required arguments.') - method = save.uncompressed if args.format == 'tar.gz': method = save.fast diff --git a/tools/fast_puller_.py b/tools/fast_puller_.py index 4e3910c79..c31e61529 100755 --- a/tools/fast_puller_.py +++ b/tools/fast_puller_.py @@ -45,16 +45,19 @@ '--name', action='store', help=('The name of the docker image to pull and save. ' - 'Supports fully-qualified tag or digest references.')) + 'Supports fully-qualified tag or digest references.'), + required=True) parser.add_argument( - '--directory', action='store', help='Where to save the image\'s files.') + '--directory', action='store', help='Where to save the image\'s files.', + required=True) -_THREADS = 8 - -_PROCESSOR_ARCHITECTURE = 'amd64' +parser.add_argument( + '--platform', action='store', default='linux/amd64', + help=('Which platform image to pull for multi-platform manifest lists. ' + 'Formatted as os/arch.')) -_OPERATING_SYSTEM = 'linux' +_THREADS = 8 def main(): @@ -62,8 +65,11 @@ def main(): args = parser.parse_args() logging_setup.Init(args=args) - if not args.name or not args.directory: - logging.fatal('--name and --directory are required arguments.') + if '/' not in args.platform: + logging.fatal('--platform must be specified in os/arch format.') + sys.exit(1) + + os, arch = args.platform.split('/', 1) retry_factory = retry.Factory() retry_factory = retry_factory.WithSourceTransportCallable(httplib2.Http) @@ -97,8 +103,8 @@ def main(): with image_list.FromRegistry(name, creds, transport) as img_list: if img_list.exists(): platform = image_list.Platform({ - 'architecture': _PROCESSOR_ARCHITECTURE, - 'os': _OPERATING_SYSTEM, + 'architecture': arch, + 'os': os, }) # pytype: disable=wrong-arg-types with img_list.resolve(platform) as default_child: diff --git a/tools/fast_pusher_.py b/tools/fast_pusher_.py index 9856c4125..3123afdab 100755 --- a/tools/fast_pusher_.py +++ b/tools/fast_pusher_.py @@ -44,7 +44,8 @@ description='Push images to a Docker Registry, faaaaaast.') parser.add_argument( - '--name', action='store', help=('The name of the docker image to push.')) + '--name', action='store', help='The name of the docker image to push.', + required=True) # The name of this flag was chosen for compatibility with docker_pusher.py parser.add_argument( @@ -109,10 +110,6 @@ def main(): args = parser.parse_args() logging_setup.Init(args=args) - if not args.name: - logging.fatal('--name is a required arguments.') - sys.exit(1) - # This library can support push-by-digest, but the likelihood of a user # correctly providing us with the digest without using this library # directly is essentially nil. From 85edd13b45836540d948ac32495d8e376cc8c59f Mon Sep 17 00:00:00 2001 From: Kayla Date: Tue, 13 Nov 2018 13:39:15 -0800 Subject: [PATCH 2/2] Save manifest and digest, allow specifying platform when dowmloading, update httplib2 dep. --- def.bzl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/def.bzl b/def.bzl index cb4d340ef..263e63441 100755 --- a/def.bzl +++ b/def.bzl @@ -15,9 +15,9 @@ def repositories(): native.new_http_archive( name = "httplib2", - url = "https://codeload.github.com/httplib2/httplib2/tar.gz/v0.10.3", - sha256 = "d1bee28a68cc665c451c83d315e3afdbeb5391f08971dcc91e060d5ba16986f1", - strip_prefix = "httplib2-0.10.3/python2/httplib2/", + url = "https://codeload.github.com/httplib2/httplib2/tar.gz/v0.11.3", + sha256 = "d9f568c183d1230f271e9c60bd99f3f2b67637c3478c9068fea29f7cca3d911f", + strip_prefix = "httplib2-0.11.3/python2/httplib2/", type = "tar.gz", build_file_content = """ py_library(