From 9123515bc38180fabf6c49cfaad7cf859c3e1ecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20K=C3=BCgler?= Date: Mon, 19 Feb 2024 19:24:08 +0100 Subject: [PATCH] Docker/Dockerfile - add FREESURFER_URL build-arg and pass the URL to install_fs_pruned.sh Docker/install_fs_pruned.sh - add option to download FreeSurfer froma different URL - optimize upx option for multiple threads - reorder so upx runs before link are created Docker/build.py - add --attest argument - change building logic, so it works with docker-container - add attestation logic --- Docker/Dockerfile | 4 +- Docker/build.py | 176 ++++++++++++++++++++++++++++-------- Docker/install_fs_pruned.sh | 87 ++++++++++++++---- 3 files changed, 214 insertions(+), 53 deletions(-) diff --git a/Docker/Dockerfile b/Docker/Dockerfile index 0f43d880..29379615 100644 --- a/Docker/Dockerfile +++ b/Docker/Dockerfile @@ -123,8 +123,10 @@ FROM build_base AS build_freesurfer COPY ./Docker/install_fs_pruned.sh /install/ SHELL ["/bin/bash", "--login", "-c"] +ARG FREESURFER_URL=default + # install freesurfer and point to new python location -RUN /install/install_fs_pruned.sh /opt --upx && \ +RUN /install/install_fs_pruned.sh /opt --upx --url $FREESURFER_URL && \ rm /opt/freesurfer/bin/fspython && \ rm -R /install && \ ln -s /venv/bin/python3 /opt/freesurfer/bin/fspython diff --git a/Docker/build.py b/Docker/build.py index 394d0812..d86b4afd 100755 --- a/Docker/build.py +++ b/Docker/build.py @@ -82,12 +82,14 @@ def __import(file: Path, name: str, *tokens: str, **rename_tokens: str): def docker_image(arg) -> str: - """Returns a str with the image. + """ + Returns a str with the image. Raises ====== ArgumentTypeError - if it is not a valid docker image.""" + if it is not a valid docker image. + """ from re import match # regex from https://stackoverflow.com/questions/39671641/regex-to-parse-docker-tag pattern = r"^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(? str: if match(pattern, arg): return arg else: - raise argparse.ArgumentTypeError(f"The image '{arg}' does not look like a " - f"valid image name.") + raise argparse.ArgumentTypeError( + f"The image '{arg}' does not look like a valid image name." + ) def target(arg) -> Target: @@ -112,7 +115,8 @@ def target(arg) -> Target: return cast(Target, arg) else: raise argparse.ArgumentTypeError( - f"target must be one of {', '.join(get_args(Target))}, but was {arg}.") + f"target must be one of {', '.join(get_args(Target))}, but was {arg}." + ) class CacheSpec: @@ -204,7 +208,14 @@ def make_parser() -> argparse.ArgumentParser: type=docker_image, dest="image_tag", metavar="image[:tag]", - help="""tag build stage/target as [:]""") + help="""tag build stage/target as [:]""", + ) + parser.add_argument( + "--attest", + action="store_true", + help="add sbom and provenance attestation (requires docker-container buildkit " + "builder created with 'docker buildx create')", + ) parser.add_argument( "--target", default="runtime", @@ -214,28 +225,47 @@ def make_parser() -> argparse.ArgumentParser: help=f"""target to build (from list of targets below, defaults to runtime):
- build_conda: "finished" conda build image
- build_freesurfer: "finished" freesurfer build image
- - runtime: final fastsurfer runtime image""") + - runtime: final fastsurfer runtime image""", + ) parser.add_argument( "--rm", action="store_true", - help="disables caching, i.e. removes all intermediate images.") + help="disables caching, i.e. removes all intermediate images.", + ) + cache_kwargs = {} + if "FASTSURFER_BUILD_CACHE" in os.environ: + try: + cache_kwargs = { + "default": CacheSpec(os.environ["FASTSURFER_BUILD_CACHE"]) + } + except ValueError as e: + logger.warning( + f"ERROR while parsing the environment variable 'FASTSURFER_BUILD_CACHE' " + f"{os.environ['FASTSURFER_BUILD_CACHE']} (ignoring this environment " + f"variable): {e.args[0]}" + ) parser.add_argument( "--cache", type=CacheSpec, - help="""cache as defined in https://docs.docker.com/build/cache/backends/ - (using --cache-to syntax, parameters are automatically filtered for use - in --cache-to and --cache-from), e.g.: - --cache type=registry,ref=server/fastbuild,mode=max.""") + help=f"""cache as defined in https://docs.docker.com/build/cache/backends/ + (using --cache-to syntax, parameters are automatically filtered for use + in --cache-to and --cache-from), e.g.: + --cache type=registry,ref=server/fastbuild,mode=max. + Will default to the environment variable FASTSURFER_BUILD_CACHE: + {cache_kwargs.get('default', 'N/A')}""", + **cache_kwargs, + ) parser.add_argument( "--dry_run", "--print", action="store_true", help="Instead of starting processes, write the commands to stdout, so they can " - "be dry_run with 'build.py ... --dry_run | bash'.") + "be dry_run with 'build.py ... --dry_run | bash'.", + ) parser.add_argument( "--tag_dev", action="store_true", - help="Also tag the resulting image as 'fastsurfer:dev'." + help="Also tag the resulting image as 'fastsurfer:dev'.", ) expert = parser.add_argument_group('Expert options') @@ -281,12 +311,56 @@ def red(skk): return "\033[91m {}\033[00m" .format(skk) +def get_builder(Popen, require_builder_type: str) -> tuple[bool, str]: + """Get the builder to build the fastsurfer image.""" + from subprocess import PIPE + from re import compile + buildx_binfo = Popen(["docker", "buildx", "ls"], stdout=PIPE, stderr=PIPE).finish() + header, *lines = buildx_binfo.out_str("utf-8").strip().split("\n") + header_pattern = compile("\\S+\\s*") + fields = {} + pos = 0 + while pos < len(header) and (match := header_pattern.search(header, pos)): + start, pos = match.span() + fields[match.group().strip()] = slice(start, pos) + builders = {line[fields["NAME/NODE"]]: line[fields["DRIVER/ENDPOINT"]] + for line in lines if not line.startswith(" ")} + builders = {key.strip(): value.strip() for key, value in builders.items()} + default_builders = [name for name in builders.keys() if name.endswith("*")] + if len(default_builders) != 1: + raise RuntimeError("Could not find default builder of buildx") + default_builder = default_builders[0][:-1].strip() + builders[default_builder] = builders[default_builders[0]] + del builders[default_builders[0]] + cannot_use_default_builder = ( + require_builder_type and builders[default_builder] != require_builder_type + ) + if cannot_use_default_builder: + # if the default builder is a docker builder (which does not support + for builder in builders.keys(): + if (builder.startswith("fastsurfer") and + builders[builder] == require_builder_type): + default_builder = builder + break + if builders[default_builder] != require_builder_type: + # did not find an appropriate builder + raise RuntimeError( + "Could not find an appropriate builder from the current builder " + "(see docker buildx use) or builders named fastsurfer* (searching for " + f"a builder of type {require_builder_type}, docker " + "builders may not be supported with the selected export settings. " + "Create builder with 'docker buildx create --name fastsurfer'." + ) + return not cannot_use_default_builder, default_builder + + def docker_build_image( image_name: str, dockerfile: Path, working_directory: Optional[Path] = None, context: Path | str = ".", dry_run: bool = False, + attestation: bool = False, **kwargs) -> None: """ Build a docker image. @@ -307,9 +381,9 @@ def docker_build_image( cache_to : str, optional Forces usage of buildx over build, use docker build caching as in the --cache-to argument to docker buildx build. + attestation : bool, default=False + Whether to create sbom and provenance attestation - Other Parameters - ---------------- Additional kwargs add additional build flags to the build command in the following manner: "_" is replaced by "-" in the keyword name and each sequence entry is passed with its own flag, e.g. `docker_build_image(..., build_arg=["TEST=1", "VAL=2"])` is @@ -321,6 +395,11 @@ def docker_build_image( "daemon, which may take a while...") extra_env = {"DOCKER_BUILDKIT": "1"} + from shutil import which + docker_cmd = which("docker") + if docker_cmd is None: + raise FileNotFoundError("Could not locate the docker executable") + def to_pair(key, values): if isinstance(values, Sequence) and isinstance(values, (str, bytes)): values = [values] @@ -333,32 +412,48 @@ def to_pair(key, values): # always use/require buildx (required for sbom and provenance) Popen = _import_calls(working_directory) # from fastsurfer dir - cmd_exec_args = ["docker", "buildx", "version"] - buildx_test = Popen(cmd_exec_args, stdout=PIPE, stderr=PIPE).finish() - if "'buildx' is not a docker command" in buildx_test.err_str('utf-8').strip(): - raise RuntimeError( - "Using --cache requires docker buildx, install with 'wget -qO ~/" - ".docker/cli-plugins/docker-buildx https://github.com/docker/buildx/" - "releases/download//buildx-.'\n" - "e.g. 'wget -qO ~/.docker/cli-plugins/docker-buildx " - "https://github.com/docker/buildx/releases/download/v0.12.1/" - "buildx-v0.12.1.linux-amd64'\n" - "You may need to 'chmod +x ~/.docker/cli-plugins/docker-buildx'\n" - "See also https://github.com/docker/buildx#manual-download" + if attestation or \ + any(kwargs.get(f"cache_{c}", "inline") != "inline" for c in ("to", "from")): + buildx_test = Popen( + [docker_cmd, "buildx", "version"], + stdout=PIPE, + stderr=PIPE, + ).finish() + if "'buildx' is not a docker command" in buildx_test.err_str("utf-8").strip(): + wget_cmd = ( + "wget -qO ~/.docker/cli-plugins/docker-buildx https://github.com/docker" + "/buildx/releases/download/{0:s}/buildx-{0:s}.{1:s}" + ) + raise RuntimeError( + f"Using --cache or attestation requires docker buildx, install with " + f"'{wget_cmd % ('', '')}'\n" + f"e.g. '{wget_cmd % ('v0.12.1', 'linux-amd64')}\n" + f"You may need to 'chmod +x ~/.docker/cli-plugins/docker-buildx'\n" + f"See also https://github.com/docker/buildx#manual-download" + ) + + if not attestation: + # tag image_name in local registry (simple standard case) + args.extend(["--output", f"type=image,name={image_name}"]) + else: + # want to create sbom and provenance manifests, so needs to use a + # docker-container builder + args.extend(["--attest", "type=sbom", "--provenance=true"]) + can_use_default_builder, alternative_builder = get_builder( + Popen, + "docker-container", ) + if not can_use_default_builder: + args.extend(["--builder", alternative_builder]) + args.extend(["--output", f"type=docker,name={image_name}", "--load"]) + args.extend(("-t", image_name)) params = [to_pair(*a) for a in kwargs.items()] - args.extend(["--attest", "type=sbom", "--provenance=true"]) - args.extend(["-t", image_name, "-f", str(dockerfile)] + list(chain(*params))) + args.extend(["-f", str(dockerfile)] + list(chain(*params))) args.append(str(context)) if dry_run: extra_environment = [f"{k}={v}" for k, v in extra_env.items()] print(" ".join(extra_environment + ["docker"] + args)) else: - from shutil import which - docker_cmd = which("docker") - if docker_cmd is None: - raise FileNotFoundError("Could not locate the docker executable") - Popen = _import_calls(working_directory) # from fastsurfer dir env = dict(os.environ) env.update(extra_env) with Popen([docker_cmd] + args + ["--progress=plain"], @@ -434,6 +529,14 @@ def main( if not bool(image_tag): image_tag = f"fastsurfer:{version_tag}{image_suffix}".replace("+", "_") + attestation = bool(keywords.get("attest")) + if not attestation: + # only attestation requires and actively changes to a docker-container driver + if cache is not None and cache.type != "inline": + return ("The docker build interface only support caching inline, i.e. " + "--cache type=inline. Use --save_docker or --save_oci for other " + "caching drivers.") + if tag_dev: kwargs["tag"] = f"fastsurfer:dev{image_suffix}" @@ -449,7 +552,8 @@ def main( working_directory=fastsurfer_home, context=fastsurfer_home, dry_run=dry_run, - **kwargs + attestation=attestation, + **kwargs, ) except RuntimeError as e: return e.args[0] diff --git a/Docker/install_fs_pruned.sh b/Docker/install_fs_pruned.sh index c8bd5ca5..7367834f 100755 --- a/Docker/install_fs_pruned.sh +++ b/Docker/install_fs_pruned.sh @@ -15,21 +15,41 @@ fslink="https://surfer.nmr.mgh.harvard.edu/pub/dist/freesurfer/7.4.1/freesurfer-linux-ubuntu22_amd64-7.4.1.tar.gz" -if [ "$#" -lt 1 ]; then +if [[ "$#" -lt 1 ]]; then echo - echo "Usage: install_fs_prunded install_dir <--upx>" + echo "Usage: install_fs_prunded install_dir [--upx] [--url freesurfer_download_url]" echo echo "--upx is optional, if passed, fs/bin will be packed" - echo + echo "--url is optional, if passed, freesurfer will be downloaded from it instead of $fslink" + echo exit 2 fi - where=/opt -if [ "$#" -ge 1 ]; then +if [[ "$#" -ge 1 ]]; then where=$1 + shift fi +upx="false" +while [[ "$#" -ge 1 ]]; do + lowercase=$(echo "$1" | tr '[:upper:]' '[:lower:]') + case $lowercase in + --upx) + upx="true" + shift + ;; + --url) + if [[ "$2" != "default" ]]; then fslink=$2; fi + shift + shift + ;; + *) + echo "Invalid argument $1" + exit 1 + ;; + esac +done fss=$where/fs-tmp fsd=$where/freesurfer echo @@ -41,6 +61,42 @@ echo "$fslink" echo +function run_parallel () +{ + # param 1 num_parallel_processes + # param 2 command (printf string) + # param 3 how many entries to consume from $@ per "run" + # param ... parameters to format, ie. we are executing $(printf $command $@...) + i=0 + pids=() + num_parallel_processes=$1 + command=$2 + num=$3 + shift + shift + shift + args=("$@") + j=0 + while [[ "$j" -lt "${#args}" ]] + do + cmd=$(printf "$command" "${args[@]:$j:$num}") + j=$((j + num)) + $cmd & + pids=("${pids[@]}" "$!") + i=$((i + 1)) + if [[ "$i" -ge "$num_parallel_processes" ]] + then + wait "${pids[0]}" + pids=("${pids[@]:1}") + fi + done + for pid in "${pids[@]}" + do + wait "$pid" + done +} + + # get Freesurfer and upack (some of it) echo "Downloading FS and unpacking portions ..." wget --no-check-certificate -qO- $fslink | tar zxv --no-same-owner -C $where \ @@ -338,6 +394,14 @@ do cp -r $fss/$file $fsd/$file done +# pack if desired with upx (do this before adding all the links +if [[ "$upx" == "true" ]] ; then + echo "finding executables in $fsd/bin/..." + exe=$(find $fsd/bin -exec file {} \; | grep ELF | cut -d: -f1) + echo "packing $fsd/bin/ executables (this can take a while) ..." + run_parallel 8 "upx -9 %s %s %s %s" 4 $exe +fi + # Modify fsbindings Python package to allow calling scripts like asegstats2table directly: echo "from . import legacy" > "$fsd/python/packages/fsbindings/__init__.py" @@ -404,7 +468,7 @@ do done # use our python (not really needed in recon-all anyway) -p3=`which python3` +p3=$(which python3) if [ "$p3" == "" ]; then echo "No python3 found, please install first!" echo @@ -413,13 +477,4 @@ fi ln -s $p3 $fsd/bin/fspython #cleanup -rm -rf $fss - -# pack if desired with upx -if [ "$#" -ge 2 ]; then - if [ "${2^^}" == "--UPX" ] ; then - echo "packing $fsd/bin/ executables (this can take a while) ..." - exe=`find $fsd/bin -exec file {} \; | grep ELF | cut -d: -f1` - upx -9 $exe - fi -fi +rm -rf $fss \ No newline at end of file