Skip to content

Commit

Permalink
Optimize container build time
Browse files Browse the repository at this point in the history
- Use cache mounts for pip and apt, heavily reducing network requests
  when cache is warm.
- Remove git as a build-time dependency, other than switching to archive
  endpoints as was done in 39b4f08, this also requires either setting
  `KARAPACE_VERSION` as container build arg or building
  karapace/version.py independently.
- Add hadolint pre-commit check for linting Dockerfile.
- Move to using Python base images for builder and final stage. This
  allows omitting installation of some build tools. It also allows
  moving to a more recent Python version, no longer being bound by
  what's in distro repositories. Wheel availability of some of our
  Python dependencies prevents us from moving to 3.11 for now.
- Change installation approach to construct a virtualenv in the builder
  step, and copying it unaltered to the final stage, with dependencies
  and Karapace itself installed in it. This allows having even fewer
  layers in the final stage, and is simpler.
- Introduces a _much_ stricter .dockerignore, ignoring files by default
  and explicitly including what's required. This makes sure changes in
  unrelated files does not evict layer cache. For example, a few files
  that previously erroneously evicted caches, because everything was
  included:
  - .git/*
  - .mypy_cache/*
  - container/Dockerfile itself
  - __pycache__/*
  - .idea/*
  • Loading branch information
aiven-anton committed May 4, 2023
1 parent 70957d5 commit 793e204
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 64 deletions.
29 changes: 18 additions & 11 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
# Ignoring files that are specific to a given checkout, these change based on
# the user commands and not on the repository history. They are not important
# to determine the state of the repository and would invalidate the cache
# layer.
#
# - .git/logs/HEAD - command history
# - .git/index - binary file for the current index, very important for a
# working repository, not interesting for our image
#
.git/logs/HEAD
.git/index
# Ignore everything by default. Making as few files as possible part of default context
# ensures only relevant changes will evict layer cache.
*

# Include source directories and files required for building.
!karapace
!requirements.txt
!setup.py
!version.py
!README.rst
!container/start.sh

# Ignore some files in source directories.
**/.DS_Store
**/Thumbs.db
**/*.pyc
**/*.pyo
**/__pycache__
3 changes: 3 additions & 0 deletions .github/workflows/container-smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v3

- name: Build karapace/version.py
run: python version.py

- name: Build and start services
run: docker compose --file=container/compose.yml up --build --wait --detach

Expand Down
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,16 @@ repos:
- id: mypy
pass_filenames: false

- repo: https://github.com/hadolint/hadolint
rev: v2.12.0
hooks:
- id: hadolint-docker
alias: hadolint
args:
# This rule has false positives when using a mounted cache volume.
# https://github.com/hadolint/hadolint/issues/497
- --ignore=DL3042

- repo: https://github.com/PyCQA/pylint
# Note: pre-commit autoupdate changes to an alpha version. Instead, manually find the
# latest stable version here: https://github.com/pylint-dev/pylint/releases
Expand Down
58 changes: 22 additions & 36 deletions container/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,56 +1,42 @@
# Builder image contains header files and additional dependencies necessary to
# generate wheel files.
FROM debian:stable-slim AS builder
# Current versions of avro and zstandard don't yet have wheels for 3.11.
FROM python:3.10.11-bullseye AS builder

ARG KARAPACE_VERSION

# Build dependencies that need to be installed:
# - git: Used to install dependencies directly from their public repos (release
# not on PyPI).
# - python3-devel: Python .h files, used to compile C extensions (e.g. multidict)
#
# Build dependencies that need to be installed because of `--no-install-recommends`:
# - gcc: g++ and gcc to compile C extensions
# - python3-wheel: Library to generate .whl files
# - python3-setuptools: Packaging library
#
RUN apt-get update && \
apt-get -y install --no-install-recommends git python3-dev python3-pip python3-setuptools python3-wheel gcc && \
rm -rf /var/lib/apt/lists/*
# Create, activate, and enforce usage of virtualenv.
RUN python3 -m venv /venv
ENV PATH="/venv/bin:$PATH"
ENV PIP_REQUIRE_VIRTUALENV=true

# Copy the requirements.txt and generate wheels for each dependency. Using a
# separate command to use layer caching.
# Copy the requirements.txt and install dependencies in venv. Using a separate
# command to use layer caching.
#
# Note: the requirements.txt is pinned, if any of the dependencies is updated
# the cache will be invalidated and the image regenerated, which is the
# intended behavior.
#
COPY ./requirements/requirements.txt /build/
RUN pip3 wheel --requirement /build/requirements.txt --wheel-dir /build/dependencies-wheels
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m pip install -r /build/requirements.txt

COPY . /build/karapace-repo
RUN pip3 wheel --no-deps /build/karapace-repo --wheel-dir /build/karapace-wheel
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m pip install /build/karapace-repo

# Karapace image.
FROM debian:stable-slim AS karapace

RUN groupadd --system karapace && \
useradd --system --gid karapace karapace && \
mkdir /opt/karapace /opt/karapace/runtime /var/log/karapace && \
chown --recursive karapace:karapace /opt/karapace /var/log/karapace

RUN apt-get update && \
apt-get -y install --no-install-recommends python3-pip protobuf-compiler && \
rm -rf /var/lib/apt/lists/*
FROM python:3.10.11-slim-bullseye AS karapace

COPY --from=builder /build/dependencies-wheels/*.whl /build/dependencies-wheels/
RUN pip3 install --no-deps /build/dependencies-wheels/*.whl && rm -rf /build/dependencies-wheels/
RUN groupadd --system karapace \
&& useradd --system --gid karapace karapace \
&& mkdir /opt/karapace /opt/karapace/runtime /var/log/karapace \
&& chown --recursive karapace:karapace /opt/karapace /var/log/karapace

COPY --from=builder /build/karapace-wheel/*.whl /build/karapace-wheel/
RUN pip3 install --no-deps /build/karapace-wheel/*.whl && rm -rf /build/karapace-wheel/
# Copy virtualenv from builder and activate it.
COPY --from=builder /venv /venv
ENV PATH="/venv/bin:$PATH"

COPY ./container/start.sh /opt/karapace
RUN chmod 500 /opt/karapace/start.sh && chown karapace:karapace /opt/karapace/start.sh
RUN chmod 500 /opt/karapace/start.sh \
&& chown karapace:karapace /opt/karapace/start.sh

COPY ./container/healthcheck.py /opt/karapace

Expand Down
2 changes: 1 addition & 1 deletion container/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ registry)
[[ -n ${KARAPACE_REGISTRY_PORT+isset} ]] && export KARAPACE_PORT="${KARAPACE_REGISTRY_PORT}"
[[ -n ${KARAPACE_REGISTRY_CLIENT_ID+isset} ]] && export KARAPACE_CLIENT_ID="${KARAPACE_REGISTRY_CLIENT_ID}"
[[ -n ${KARAPACE_REGISTRY_GROUP_ID+isset} ]] && export KARAPACE_GROUP_ID="${KARAPACE_REGISTRY_GROUP_ID}"
# Map misspelt environment variable to correct spelling for backwards compatibility.
# Map misspelled environment variables to correct spelling for backwards compatibility.
[[ -n ${KARAPACE_REGISTRY_MASTER_ELIGIBITY+isset} ]] && export KARAPACE_MASTER_ELIGIBILITY="${KARAPACE_REGISTRY_MASTER_ELIGIBITY}"
[[ -n ${KARAPACE_REGISTRY_MASTER_ELIGIBILITY+isset} ]] && export KARAPACE_MASTER_ELIGIBILITY="${KARAPACE_REGISTRY_MASTER_ELIGIBILITY}"
[[ -n ${KARAPACE_REGISTRY_TOPIC_NAME+isset} ]] && export KARAPACE_TOPIC_NAME="${KARAPACE_REGISTRY_TOPIC_NAME}"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
with open(readme_path, encoding="utf8") as fp:
readme_text = fp.read()

version_for_setup_py = version.get_project_version("karapace/version.py")
version_for_setup_py = version.get_project_version()
version_for_setup_py = ".dev".join(version_for_setup_py.split("-", 2)[:2])

setup(
Expand Down
37 changes: 22 additions & 15 deletions version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,35 @@
Copyright (c) 2023 Aiven Ltd
See LICENSE for details
"""
import importlib.util
from __future__ import annotations

from typing import Final

import os
import pathlib
import subprocess

version_file: Final = pathlib.Path(__file__).parent.resolve() / "karapace/version.py"


def save_version(new_ver, old_ver, version_file):
def save_version(new_ver, old_ver):
if not new_ver:
return False
version_file = os.path.join(os.path.dirname(__file__), version_file)
if not old_ver or new_ver != old_ver:
with open(version_file, mode="w", encoding="utf8") as fp:
fp.write(f'"""{__doc__}"""\n__version__ = "{new_ver}"\n')
version_file.write_text(f'"""{__doc__}"""\n__version__ = "{new_ver}"\n')
return True


def get_project_version(version_file: str) -> str:
version_file_full_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), version_file)
module_spec = importlib.util.spec_from_file_location("verfile", version_file_full_path)
module = importlib.util.module_from_spec(module_spec)
file_ver = getattr(module, "__version__", None)
def from_version_file() -> str | None:
try:
import karapace.version
except ImportError:
return None
return karapace.version.__version__


def get_project_version() -> str:
file_ver = from_version_file()

version = os.getenv("KARAPACE_VERSION")
if version is None:
Expand All @@ -40,16 +49,14 @@ def get_project_version(version_file: str) -> str:
git_ver = f"0.0.1-0-unknown-{git_ver}"
version = git_ver

if save_version(version, file_ver, version_file):
if save_version(version, file_ver):
return version

if not file_ver:
raise RuntimeError(f"version not available from git or from file {version_file!r}")
raise RuntimeError(f"version not available from git or from file {str(version_file)!r}")

return file_ver


if __name__ == "__main__":
import sys

get_project_version(sys.argv[1])
get_project_version()

0 comments on commit 793e204

Please sign in to comment.