Skip to content

Commit

Permalink
Add rapids-pip-retry in the same vein as rapids-conda-retry (#132)
Browse files Browse the repository at this point in the history
Co-authored-by: Robert Maynard <robertjmaynard@gmail.com>
  • Loading branch information
gforsyth and robertmaynard authored Feb 4, 2025
1 parent 5e2ae24 commit 824276a
Showing 1 changed file with 139 additions and 0 deletions.
139 changes: 139 additions & 0 deletions tools/rapids-pip-retry
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/bin/bash
#
# rapids-pip-retry
#
# wrapper for pip that retries the command after a hash mismatch error that
# (usually) results from an interrupted network connection
#
# Example usage:
# $ rapids-pip-retry install nvidia-cublas-cu11
#
# Configurable options are set using the following env vars:
#
# RAPIDS_PIP_EXE - override the pip executable
# Default is "python -m pip"
#
# RAPIDS_PIP_RETRY_MAX - set to a positive integer to set the max number of retry
# attempts (attempts after the initial try).
# Default is 3 retries
#
# RAPIDS_PIP_RETRY_SLEEP - set to a positive integer to set the duration, in
# seconds, to wait between retries.
# Default is a 10 second sleep
set -o pipefail
export RAPIDS_SCRIPT_NAME="rapids-pip-retry"

pipretry_help="
rapids-pip-retry options:
--pipretry_max_retries=n Retry the pip command at most n times (default is 3)
--pipretry_sleep_interval=n Sleep n seconds between retries (default is 10)
ALSO rapids-pip-retry options can be set using the following env vars:
RAPIDS_PIP_RETRY_MAX - set to a positive integer to set the max number of retry
attempts (attempts after the initial try).
Default is 3 retries
RAPIDS_PIP_RETRY_SLEEP - set to a positive integer to set the duration, in
seconds, to wait between retries.
Default is a 10 second sleep
==========
"
max_retries=${RAPIDS_PIP_RETRY_MAX:=3}
sleep_interval=${RAPIDS_PIP_RETRY_SLEEP:=10}
exitcode=0
needToRetry=0
retries=0
args=""

# Temporarily set this to something else (eg. a script called "testPip" that
# prints "ERROR: THESE PACKAGES DO NOT MATCH THE HASHES" and exits with 1) for
# testing this script.
# pipCmd=./testPip
pipCmd=${RAPIDS_PIP_EXE:=python -m pip}

# Function to run pip and check output for specific retryable errors
# input variables:
# pipCmd: the command used for running pip, which accepts the args
# passed to this script
# outfile: file to tee output to for checking, likely a temp file
# output variables:
# exitcode: the exit code from running ${pipCmd} ${args}
# needToRetry: 1 if the command should be retried, 0 if it should not be
function runPip {
# shellcheck disable=SC2086
# RAPIDS_OTEL_WRAPPER is optionally passed in as an env var.
${RAPIDS_OTEL_WRAPPER:-} ${pipCmd} ${args} 2>&1 | tee "${outfile}"
exitcode=$?
needToRetry=0
needToClean=0
retryingMsg=""

if (( exitcode != 0 )); then
# Show exit code
rapids-echo-stderr "pip returned exit code: ${exitcode}"


if grep -q 'THESE PACKAGES DO NOT MATCH THE HASHES' "${outfile}"; then
retryingMsg="Retrying, found Hash Mismatch Error in output..."
needToRetry=1
else
rapids-echo-stderr "Exiting, no retryable ${RAPIDS_PIP_EXE} errors detected: \
THESE PACKAGES DO NOT MATCH THE HASHES"
fi

if (( needToRetry == 1 )) && \
(( retries >= max_retries )); then
# Catch instance where we run out of retries
rapids-echo-stderr "Exiting, reached max retries..."
else
# Give reason for retry
rapids-echo-stderr "${retryingMsg}"
if (( needToClean == 1 )); then
rapids-echo-stderr "Cleaning tarball cache before retrying..."
# shellcheck disable=SC2035
${pipCmd} cache clear *
fi
fi
fi
}

# Process and remove args recognized only by this script, save others for pip
# Process help separately
for arg in "$@"; do
opt=${arg%%=*}
val=${arg##*=}
if [[ ${opt} == "--help" ]] || [[ ${opt} == "-h" ]]; then
echo "${pipretry_help}"
${pipCmd} --help
exit $?
elif [[ ${opt} == "--pipretry_max_retries" ]]; then
max_retries=${val}
elif [[ ${opt} == "--pipretry_sleep_interval" ]]; then
sleep_interval=${val}
else
args="${args} ${arg}"
fi
done

# Run command
outfile=$(mktemp)
# shellcheck disable=SC2086
runPip ${args}

# Retry loop, only if needed
while (( needToRetry == 1 )) && \
(( retries < max_retries )); do

retries=$(( retries + 1 ))
rapids-echo-stderr "Waiting, retry ${retries} of ${max_retries} -> sleeping for ${sleep_interval} seconds..."
sleep "${sleep_interval}"
rapids-echo-stderr "Starting, retry ${retries} of ${max_retries} -> sleep done..."

# shellcheck disable=SC2086
runPip ${args}
done

rm -f "${outfile}"
exit ${exitcode}

0 comments on commit 824276a

Please sign in to comment.