From fa756ba90c1bd5856c2ca4ed36d7cf3dccc7a3c0 Mon Sep 17 00:00:00 2001 From: Fares Schulz Date: Fri, 23 Aug 2024 19:16:55 +0200 Subject: [PATCH] Fixed the docker ownership error --- .gitignore | 3 ++- Dockerfile | 2 +- TODO.md | 4 ++-- docs/SETUP.md | 46 +++++++++++++++++++++++++++++++++++++++++++--- docs/USAGE.md | 3 +-- exp_workflow.sh | 25 ++++++++++++++++++++++--- slurm_job.sh | 6 ++++-- sync_logs.sh | 4 +++- 8 files changed, 78 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 1d6e1c5..2629c60 100644 --- a/.gitignore +++ b/.gitignore @@ -28,5 +28,6 @@ NOTES.md # Ignore singularity image *.sif - +# Ignore local environment vars +local.env diff --git a/Dockerfile b/Dockerfile index 2d45cbd..390e588 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,7 +29,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ COPY global.env /tmp/global.env # Install Python Version -RUN export $(grep -v '^#' /tmp/global.env | xargs) \ +RUN . global.env \ && echo "Using Python version: ${TUSTU_PYTHON_VERSION}" \ && echo "Downloading Python version: ${TUSTU_PYTHON_VERSION}" \ && wget --no-check-certificate https://www.python.org/ftp/python/${TUSTU_PYTHON_VERSION}/Python-${TUSTU_PYTHON_VERSION}.tgz \ diff --git a/TODO.md b/TODO.md index c48068a..03bfda3 100644 --- a/TODO.md +++ b/TODO.md @@ -12,8 +12,8 @@ ## Pipeline specific -- Solve docker ownership issue - Delete the .gitignore in data/raw - delete init.py - why source use if else -- create logs folder on ssh host \ No newline at end of file +- create logs folder on ssh host +- don't need to install torch? \ No newline at end of file diff --git a/docs/SETUP.md b/docs/SETUP.md index 5367079..3769bd4 100644 --- a/docs/SETUP.md +++ b/docs/SETUP.md @@ -292,17 +292,55 @@ This shell script runs the experiment pipeline (`dvc exp run`) and performs some ### Run the DVC Experiment Pipeline in a Docker Container -To run the Docker container with repository, SSH and Git-gonfig bindings use the following command with the appropriate image name substituted for the placeholder ``: +In order to run the DVC experiment pipeline in a Docker container we need to first setup a docker volume containing our local ssh setup. Since the local ssh setup is not visible to the docker container, we will then mount the volume to the container. A simple bind mount will allways work, because the .ssh folder ownership is not changed. + +To create the Docker volume, use the following command: + +```sh +docker volume create --name ssh-config +``` + +In order to copy the local ssh setup to the Docker volume, we are obliged to create a temporary container that binds the volume. + +```sh +docker run -it --rm -v ssh-config:/root/.ssh -v $HOME/.ssh:/local-ssh alpine:latest +# Inside the container +cp -r /local-ssh/* /root/.ssh/ +# Copying the files will change the ownership to root +# Check your the files +ls -la /root/.ssh/ +``` + +> **Info**: This will not change the ownership of the files on your local machine. + +Next as dvc needs the git username and email to be set, we will create a `local.env` file in the repository root directory with the following content: + +```env +TUSTU_GIT_USERNAME="Your Name" +TUSTU_GIT_EMAIL="name@domain.com" +``` + +> **Info**: This file is git-ignored and is read by the [exp_workflow.sh](./../exp_workflow.sh) script. It will then configure git with the provided username and email every time the script is run. Your local git configuration will not be changed, as this happens only if the [exp_workflow.sh](./../exp_workflow.sh) script is run from within a Docker container. + +We can now run the experiment within the docker container with repository and SSH volume mounted: ```sh docker run --rm \ --mount type=bind,source="$(pwd)",target=/home/app \ - --mount type=bind,source="$HOME/.ssh",target=/root/.ssh \ - --mount type=bind,source="$HOME/.gitconfig",target=/root/.gitconfig \ + --mount type=volume,source=ssh-config,target=/root/.ssh \ \ /home/app/exp_workflow.sh ``` +In case you want to interact with the container, you can run it in interactive mode. `docker run --help` shows you all available options. + +```sh +docker run -it --rm \ + --mount type=bind,source="$(pwd)",target=/home/app \ + --mount type=volume,source=ssh-config,target=/root/.ssh \ + +``` + ## 6 - SLURM Job Configuration This section covers setting up SLURM jobs for the HPC cluster. SLURM manages resource allocation for your task, which we will specify in a batch job script. Our goal is to run the DVC experiment pipeline inside a Singularity Container on the nodes that have been pulled and converted from your DockerHub image. The batch job script template [slurm_job.sh](../slurm_job.sh) handles these processes and requires minimal configuration. @@ -405,3 +443,5 @@ python multi_submission.py ``` For more information on running and monitoring jobs, refer to the [User Guide](./USAGE.md). + +> **Info**: Singularity is used for containerization on the cluster. In the [slurm_job.sh](./../slurm_job.sh) the image is pulled from DockerHub and converted to a Singularity image. Unlike docker, singularity by default binds the complete home directory of the executing user to the container. Also, when entering a singularity container, the user in a singularity container is the same as the user on the host system. Therefore, we do not get the same permission issues as with docker. diff --git a/docs/USAGE.md b/docs/USAGE.md index af2ed37..8df8f14 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -45,8 +45,7 @@ To run the entire pipeline locally, execute the following command with the appro # with Docker docker run --rm \ --mount type=bind,source="$(pwd)",target=/home/app \ - --mount type=bind,source="$HOME/.ssh",target=/root/.ssh \ - --mount type=bind,source="$HOME/.gitconfig",target=/root/.gitconfig \ + --mount type=volume,source=ssh-config,target=/root/.ssh \ \ /home/app/exp_workflow.sh ``` diff --git a/exp_workflow.sh b/exp_workflow.sh index 4e76605..e766efd 100755 --- a/exp_workflow.sh +++ b/exp_workflow.sh @@ -7,13 +7,31 @@ # Description: This script runs an experiment with DVC within a temporary directory copy and pushes the results to the DVC and Git remote. # Set environment variables defined in global.env -export $(grep -v '^#' global.env | xargs) +set -o allexport +source global.env +set +o allexport # Define DEFAULT_DIR in the host environment export DEFAULT_DIR="$PWD" - TUSTU_TMP_DIR=tmp +# Setup a global git configuration if beeing inside a docker container +# Docker containers create a /.dockerenv file in the root directory +if [ -f /.dockerenv ]; then + if [ -f local.env ]; then + source local.env; + fi + if [ -z "$TUSTU_GIT_USERNAME" ] || [ -z "$TUSTU_GIT_EMAIL" ]; then + echo "[ERROR] Please create a local.env with the vars:"; + echo "TUSTU_GIT_USERNAME=MY NAME"; + echo "TUSTU_GIT_EMAIL=myemail@domain.com"; + exit 1; + fi + git config --global user.name "$TUSTU_GIT_USERNAME" + git config --global user.email "$TUSTU_GIT_EMAIL" + git config --global safe.directory "$PWD" +fi + # Return function that will be called on exit or error return_to_default_dir() { # Disable the trap to prevent re-entry @@ -45,7 +63,8 @@ if [ -f ".dvc/config.local" ]; then fi; echo ".git"; } | while read file; do - rsync -aR "$file" $TUSTU_EXP_TMP_DIR; + # --chown flag is needed for docker to avoid permission issues + rsync -aR --chown $(id -u):$(id -g) "$file" $TUSTU_EXP_TMP_DIR; done && # Change the working directory to the temporary sub-directory diff --git a/slurm_job.sh b/slurm_job.sh index 4b4aae7..76e91b1 100644 --- a/slurm_job.sh +++ b/slurm_job.sh @@ -18,8 +18,10 @@ # Load necessary modules module load singularity/4.0.2 -# Set environment variables defined in global.env and local.env -export $(grep -v '^#' global.env | xargs) +# Set environment variables defined in global.env +set -o allexport +source global.env +set +o allexport # Define DEFAULT_DIR in the host environment export DEFAULT_DIR="$PWD" diff --git a/sync_logs.sh b/sync_logs.sh index 544f0d7..acad8ea 100755 --- a/sync_logs.sh +++ b/sync_logs.sh @@ -4,7 +4,9 @@ # This file is licensed under the Apache License, Version 2.0. # See the LICENSE file in the root of this project for details. -export $(grep -v '^#' global.env | xargs) +set -o allexport +source global.env +set +o allexport while true; do # Run the rsync command