Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve fail and retry logic when pushing images #103

Merged
merged 5 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions buildrunner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,16 +618,19 @@ def run(self): # pylint: disable=too-many-statements,too-many-branches,too-many

# see if we should push registered tags to remote registries/repositories
if self.push:
# push the multi-platform images
self.log.write(
'Push requested--pushing generated images/packages to remote registries/repositories\n'
)
# Push multi-platform images
if multi_platform.tagged_images_names:
self.log.write(f"===> multi_platform.tagged_images_names: {multi_platform.tagged_images_names}")
self.log.write(
f"===> Pushing {len(multi_platform.tagged_images_names)} multiplatform image(s)\n"
)
for local_name, dest_name in multi_platform.tagged_images_names.items():
self.log.write(f"\nlocal_name: {local_name} dest_name: {dest_name}\n")
self.log.write(f"Pushing {local_name} to {dest_name}\n")
multi_platform.push(name=local_name, dest_names=dest_name)

self.log.write(
'Push requested--pushing generated images/packages to remote registries/repositories\n'
)
# Push single platform images
_docker_client = docker.new_client(timeout=self.docker_timeout)
for _repo_tag, _insecure_registry in self.repo_tags_to_push:
self.log.write(
Expand Down
39 changes: 29 additions & 10 deletions buildrunner/docker/multiplatform_image_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
from typing import Dict, List, Optional

import python_on_whales
import timeout_decorator
from python_on_whales import docker
from retry import retry

from buildrunner.docker import get_dockerfile

logger = logging.getLogger(__name__)

PUSH_TIMEOUT = 300


class ImageInfo:
"""Image information repo with associated tags"""
Expand Down Expand Up @@ -529,6 +532,23 @@ def get_path(file):

return self._intermediate_built_images[mp_image_name]

@timeout_decorator.timeout(PUSH_TIMEOUT)
def _push_with_timeout(self, src_names: List[str], tag_names: List[str]) -> None:
"""
Creates tags from a set of source images in the remote registry.
This method will timeout if it takes too long. An exception may be
caught and retried for the timeout.

Args:
src_names (List[str]): The source images to combine into the image manifest
tag_names (List[str]): The tags to push with the final image manifest

Raises:
TimeoutError: If the image fails to push within the timeout
"""
logger.info(f'Pushing sources {src_names} to tags {tag_names}')
docker.buildx.imagetools.create(sources=src_names, tags=tag_names)

def push(self, name: str, dest_names: List[str] = None) -> None:
"""
Pushes the image to the remote registry embedded in dest_names or name if dest_names is None
Expand Down Expand Up @@ -569,19 +589,18 @@ def push(self, name: str, dest_names: List[str] = None) -> None:
while retries > 0:
retries -= 1
logger.debug(f"Creating manifest list {name} with timeout {timeout_seconds} seconds")
curr_process = Process(target=docker.buildx.imagetools.create,
kwargs={"sources": src_names, "tags": tagged_names})
curr_process.start()
curr_process.join(timeout_seconds)
if curr_process.is_alive():
curr_process.kill()
if retries == 0:
raise TimeoutError(f"Timeout pushing {dest_names} after {retries} retries"
f" and {timeout_seconds} seconds each try")
else:
try:
# Push each tag individually in order to prevent strange errors with multiple matching tags
for tag_name in tagged_names:
self._push_with_timeout(src_names, [tag_name])
# Process finished within timeout
logger.info(f"Successfully created multiplatform images {dest_names}")
break
except Exception as exc: # pylint: disable=broad-exception-caught
logger.warning(f"Caught exception while pushing images, retrying: {exc}")
if retries == 0:
raise TimeoutError(f"Timeout pushing {dest_names} after {retries} retries"
f" and {timeout_seconds} seconds each try")
timeout_seconds += timeout_step_seconds

# Cap timeout at max timeout
Expand Down
2 changes: 1 addition & 1 deletion buildrunner/steprunner/tasks/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def run(self, context): # pylint: disable=too-many-branches
image_ids = ','.join([image.trunc_digest() for image in images])
platforms = [f'{image.platform}:{image.trunc_digest()}' for image in images]
self.step_runner.build_runner.add_artifact(
os.path.join(self.step_runner.name, image_ids.replace(',', '/')),
repo.repository,
{
'type': 'docker-image',
'docker:image': image_ids,
Expand Down
11 changes: 9 additions & 2 deletions tests/test-files/test-multi-platform-image-reuse.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,20 @@ steps:
- linux/amd64
- linux/arm64/v8
push:
repository: user1/buildrunner-test-multi-platform
- repository: user1/buildrunner-test-multi-platform
tags: [ 'latest', '0.0.1' ]
- repository: user2/buildrunner-test-multi-platform
tags: [ 'latest', '0.0.1' ]
run:
image: user1/buildrunner-test-multi-platform:0.0.1
cmd: echo "Hello World"

use-built-image:
use-built-image1:
run:
image: user1/buildrunner-test-multi-platform:0.0.1
cmd: echo "Hello World"

use-built-image2:
run:
image: user2/buildrunner-test-multi-platform:0.0.1
cmd: echo "Hello World"
Loading