Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 fixing randomly failing [sys] deploy simcore #2430

Closed
wants to merge 15 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
log = logging.getLogger(__name__)

SERVICES_TO_SKIP = ["sidecar", "postgres", "redis", "rabbit"]
SERVICE_HEALTHCHECK_ENTRYPOINT = {"director-v2": "/"}
SERVICE_HEALTHCHECK_ENTRYPOINT = {"director-v2": "/", "migration": "/"}


@pytest.fixture(scope="module")
Expand Down
2 changes: 1 addition & 1 deletion packages/simcore-sdk/tests/integration/test_nodeports.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from simcore_sdk.node_ports._item import ItemConcreteValue
from simcore_sdk.node_ports.nodeports import Nodeports

pytest_simcore_core_services_selection = ["postgres", "storage"]
pytest_simcore_core_services_selection = ["migration", "postgres", "storage"]

pytest_simcore_ops_services_selection = [
"minio",
Expand Down
2 changes: 1 addition & 1 deletion packages/simcore-sdk/tests/integration/test_nodeports2.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from simcore_sdk.node_ports_v2.links import ItemConcreteValue
from simcore_sdk.node_ports_v2.nodeports_v2 import Nodeports

pytest_simcore_core_services_selection = ["postgres", "storage"]
pytest_simcore_core_services_selection = ["migration", "postgres", "storage"]

pytest_simcore_ops_services_selection = ["minio"]

Expand Down
3 changes: 3 additions & 0 deletions scripts/common-docker-boot/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# wait-for script

Used by almost all the services. Update it from the repo's official [release page](https://github.com/eficode/wait-for/releases).
184 changes: 184 additions & 0 deletions scripts/common-docker-boot/wait-for
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#!/bin/sh

# The MIT License (MIT)
#
# Copyright (c) 2017 Eficode Oy
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

set -- "$@" -- "$TIMEOUT" "$QUIET" "$PROTOCOL" "$HOST" "$PORT" "$result"
TIMEOUT=15
QUIET=0
# The protocol to make the request with, either "tcp" or "http"
PROTOCOL="tcp"

echoerr() {
if [ "$QUIET" -ne 1 ]; then printf "%s\n" "$*" 1>&2; fi
}

usage() {
exitcode="$1"
cat << USAGE >&2
Usage:
$0 host:port|url [-t timeout] [-- command args]
-q | --quiet Do not output any status messages
-t TIMEOUT | --timeout=timeout Timeout in seconds, zero for no timeout
-- COMMAND ARGS Execute command with args after the test finishes
USAGE
exit "$exitcode"
}

wait_for() {
case "$PROTOCOL" in
tcp)
if ! command -v nc >/dev/null; then
echoerr 'nc command is missing!'
exit 1
fi
;;
wget)
if ! command -v wget >/dev/null; then
echoerr 'nc command is missing!'
exit 1
fi
;;
esac

while :; do
case "$PROTOCOL" in
tcp)
nc -w 1 -z "$HOST" "$PORT" > /dev/null 2>&1
;;
http)
wget --timeout=1 -q "$HOST" -O /dev/null > /dev/null 2>&1
;;
*)
echoerr "Unknown protocol '$PROTOCOL'"
exit 1
;;
esac

result=$?

if [ $result -eq 0 ] ; then
if [ $# -gt 7 ] ; then
for result in $(seq $(($# - 7))); do
result=$1
shift
set -- "$@" "$result"
done

TIMEOUT=$2 QUIET=$3 PROTOCOL=$4 HOST=$5 PORT=$6 result=$7
shift 7
exec "$@"
fi
exit 0
fi

if [ "$TIMEOUT" -le 0 ]; then
break
fi
TIMEOUT=$((TIMEOUT - 1))

sleep 1
done
echo "Operation timed out" >&2
exit 1
}

while :; do
case "$1" in
http://*|https://*)
HOST="$1"
PROTOCOL="http"
shift 1
;;
*:* )
HOST=$(printf "%s\n" "$1"| cut -d : -f 1)
PORT=$(printf "%s\n" "$1"| cut -d : -f 2)
shift 1
;;
-q | --quiet)
QUIET=1
shift 1
;;
-q-*)
QUIET=0
echoerr "Unknown option: $1"
usage 1
;;
-q*)
QUIET=1
result=$1
shift 1
set -- -"${result#-q}" "$@"
;;
-t | --timeout)
TIMEOUT="$2"
shift 2
;;
-t*)
TIMEOUT="${1#-t}"
shift 1
;;
--timeout=*)
TIMEOUT="${1#*=}"
shift 1
;;
--)
shift
break
;;
--help)
usage 0
;;
-*)
QUIET=0
echoerr "Unknown option: $1"
usage 1
;;
*)
QUIET=0
echoerr "Unknown argument: $1"
usage 1
;;
esac
done

if ! [ "$TIMEOUT" -ge 0 ] 2>/dev/null; then
echoerr "Error: invalid timeout '$TIMEOUT'"
usage 3
fi

case "$PROTOCOL" in
tcp)
if [ "$HOST" = "" ] || [ "$PORT" = "" ]; then
echoerr "Error: you need to provide a host and port to test."
usage 2
fi
;;
http)
if [ "$HOST" = "" ]; then
echoerr "Error: you need to provide a host to test."
usage 2
fi
;;
esac

wait_for "$@"
73 changes: 43 additions & 30 deletions services/api-server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,33 @@ FROM python:${PYTHON_VERSION}-slim-buster as base
LABEL maintainer=pcrespov

RUN set -eux; \
apt-get update; \
apt-get install -y gosu; \
rm -rf /var/lib/apt/lists/*; \
# verify that the binary works
gosu nobody true
apt-get update; \
apt-get install -y gosu; \
rm -rf /var/lib/apt/lists/*; \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if wget is needed in both dev/prod stages why don't you install it here?

# verify that the binary works
gosu nobody true

# simcore-user uid=8004(scu) gid=8004(scu) groups=8004(scu)
ENV SC_USER_ID=8004 \
SC_USER_NAME=scu \
SC_BUILD_TARGET=base \
SC_BOOT_MODE=default
SC_USER_NAME=scu \
SC_BUILD_TARGET=base \
SC_BOOT_MODE=default

RUN adduser \
--uid ${SC_USER_ID} \
--disabled-password \
--gecos "" \
--shell /bin/sh \
--home /home/${SC_USER_NAME} \
${SC_USER_NAME}
--uid ${SC_USER_ID} \
--disabled-password \
--gecos "" \
--shell /bin/sh \
--home /home/${SC_USER_NAME} \
${SC_USER_NAME}


# Sets utf-8 encoding for Python et al
ENV LANG=C.UTF-8

# Turns off writing .pyc files; superfluous on an ephemeral container.
ENV PYTHONDONTWRITEBYTECODE=1 \
VIRTUAL_ENV=/home/scu/.venv
VIRTUAL_ENV=/home/scu/.venv

# Ensures that the python and pip executables used in the image will be
# those from our virtualenv.
Expand All @@ -56,20 +56,21 @@ FROM base as build
ENV SC_BUILD_TARGET=build

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
&& apt-get install -y --no-install-recommends \
build-essential \
wget \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*


# NOTE: python virtualenv is used here such that installed
# packages may be moved to production image easily by copying the venv
RUN python -m venv "${VIRTUAL_ENV}"

RUN pip install --no-cache-dir --upgrade \
pip~=21.0.1 \
wheel \
setuptools
pip~=21.0.1 \
wheel \
setuptools

WORKDIR /build

Expand All @@ -96,7 +97,7 @@ COPY --chown=scu:scu services/storage/client-sdk /build/services/storage/client-
WORKDIR /build/services/api-server

RUN pip --no-cache-dir install -r requirements/prod.txt &&\
pip --no-cache-dir list -v
pip --no-cache-dir list -v


# --------------------------Production stage -------------------
Expand All @@ -109,7 +110,7 @@ RUN pip --no-cache-dir install -r requirements/prod.txt &&\
FROM base as production

ENV SC_BUILD_TARGET=production \
SC_BOOT_MODE=production
SC_BOOT_MODE=production

ENV PYTHONOPTIMIZE=TRUE

Expand All @@ -118,18 +119,27 @@ WORKDIR /home/scu
# Starting from clean base image, copies pre-installed virtualenv from prod-only-deps
COPY --chown=scu:scu --from=prod-only-deps ${VIRTUAL_ENV} ${VIRTUAL_ENV}

# wget required by wait-for
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
wget \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Copies booting scripts
COPY --chown=scu:scu services/api-server/docker services/api-server/docker
RUN chmod +x services/api-server/docker/*.sh
COPY --chown=scu:scu scripts/common-docker-boot/wait-for /usr/local/bin/wait-for
RUN chmod +x /usr/local/bin/wait-for

HEALTHCHECK --interval=30s \
--timeout=20s \
--start-period=30s \
--retries=3 \
CMD ["python3", "services/api-server/docker/healthcheck.py", "http://localhost:8000/"]
--timeout=20s \
--start-period=30s \
--retries=3 \
CMD ["python3", "services/api-server/docker/healthcheck.py", "http://localhost:8000/"]

ENTRYPOINT [ "/bin/sh", "services/api-server/docker/entrypoint.sh" ]
CMD ["/bin/sh", "services/api-server/docker/boot.sh"]
CMD ["/bin/sh", "-c", "wait-for http://migration:8000 -- services/api-server/docker/boot.sh"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are going to have an issue in the deployments here.
all the apps are renamed using $SWARM_STACK_NAME_migration. Please double-check and correct as this will fail for sure.



# --------------------------Development stage -------------------
Expand All @@ -148,5 +158,8 @@ WORKDIR /devel

RUN chown -R scu:scu "${VIRTUAL_ENV}"

COPY --chown=scu:scu scripts/common-docker-boot/wait-for /usr/local/bin/wait-for
RUN chmod +x /usr/local/bin/wait-for

ENTRYPOINT ["/bin/sh", "services/api-server/docker/entrypoint.sh"]
CMD ["/bin/sh", "services/api-server/docker/boot.sh"]
CMD ["/bin/sh", "-c", "wait-for http://migration:8000 -- services/api-server/docker/boot.sh"]
Loading