-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Robert Maynard <robertjmaynard@gmail.com>
- Loading branch information
1 parent
5e2ae24
commit 824276a
Showing
1 changed file
with
139 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
#!/bin/bash | ||
# | ||
# rapids-pip-retry | ||
# | ||
# wrapper for pip that retries the command after a hash mismatch error that | ||
# (usually) results from an interrupted network connection | ||
# | ||
# Example usage: | ||
# $ rapids-pip-retry install nvidia-cublas-cu11 | ||
# | ||
# Configurable options are set using the following env vars: | ||
# | ||
# RAPIDS_PIP_EXE - override the pip executable | ||
# Default is "python -m pip" | ||
# | ||
# RAPIDS_PIP_RETRY_MAX - set to a positive integer to set the max number of retry | ||
# attempts (attempts after the initial try). | ||
# Default is 3 retries | ||
# | ||
# RAPIDS_PIP_RETRY_SLEEP - set to a positive integer to set the duration, in | ||
# seconds, to wait between retries. | ||
# Default is a 10 second sleep | ||
set -o pipefail | ||
export RAPIDS_SCRIPT_NAME="rapids-pip-retry" | ||
|
||
pipretry_help=" | ||
rapids-pip-retry options: | ||
--pipretry_max_retries=n Retry the pip command at most n times (default is 3) | ||
--pipretry_sleep_interval=n Sleep n seconds between retries (default is 10) | ||
ALSO rapids-pip-retry options can be set using the following env vars: | ||
RAPIDS_PIP_RETRY_MAX - set to a positive integer to set the max number of retry | ||
attempts (attempts after the initial try). | ||
Default is 3 retries | ||
RAPIDS_PIP_RETRY_SLEEP - set to a positive integer to set the duration, in | ||
seconds, to wait between retries. | ||
Default is a 10 second sleep | ||
========== | ||
" | ||
max_retries=${RAPIDS_PIP_RETRY_MAX:=3} | ||
sleep_interval=${RAPIDS_PIP_RETRY_SLEEP:=10} | ||
exitcode=0 | ||
needToRetry=0 | ||
retries=0 | ||
args="" | ||
|
||
# Temporarily set this to something else (eg. a script called "testPip" that | ||
# prints "ERROR: THESE PACKAGES DO NOT MATCH THE HASHES" and exits with 1) for | ||
# testing this script. | ||
# pipCmd=./testPip | ||
pipCmd=${RAPIDS_PIP_EXE:=python -m pip} | ||
|
||
# Function to run pip and check output for specific retryable errors | ||
# input variables: | ||
# pipCmd: the command used for running pip, which accepts the args | ||
# passed to this script | ||
# outfile: file to tee output to for checking, likely a temp file | ||
# output variables: | ||
# exitcode: the exit code from running ${pipCmd} ${args} | ||
# needToRetry: 1 if the command should be retried, 0 if it should not be | ||
function runPip { | ||
# shellcheck disable=SC2086 | ||
# RAPIDS_OTEL_WRAPPER is optionally passed in as an env var. | ||
${RAPIDS_OTEL_WRAPPER:-} ${pipCmd} ${args} 2>&1 | tee "${outfile}" | ||
exitcode=$? | ||
needToRetry=0 | ||
needToClean=0 | ||
retryingMsg="" | ||
|
||
if (( exitcode != 0 )); then | ||
# Show exit code | ||
rapids-echo-stderr "pip returned exit code: ${exitcode}" | ||
|
||
|
||
if grep -q 'THESE PACKAGES DO NOT MATCH THE HASHES' "${outfile}"; then | ||
retryingMsg="Retrying, found Hash Mismatch Error in output..." | ||
needToRetry=1 | ||
else | ||
rapids-echo-stderr "Exiting, no retryable ${RAPIDS_PIP_EXE} errors detected: \ | ||
THESE PACKAGES DO NOT MATCH THE HASHES" | ||
fi | ||
|
||
if (( needToRetry == 1 )) && \ | ||
(( retries >= max_retries )); then | ||
# Catch instance where we run out of retries | ||
rapids-echo-stderr "Exiting, reached max retries..." | ||
else | ||
# Give reason for retry | ||
rapids-echo-stderr "${retryingMsg}" | ||
if (( needToClean == 1 )); then | ||
rapids-echo-stderr "Cleaning tarball cache before retrying..." | ||
# shellcheck disable=SC2035 | ||
${pipCmd} cache clear * | ||
fi | ||
fi | ||
fi | ||
} | ||
|
||
# Process and remove args recognized only by this script, save others for pip | ||
# Process help separately | ||
for arg in "$@"; do | ||
opt=${arg%%=*} | ||
val=${arg##*=} | ||
if [[ ${opt} == "--help" ]] || [[ ${opt} == "-h" ]]; then | ||
echo "${pipretry_help}" | ||
${pipCmd} --help | ||
exit $? | ||
elif [[ ${opt} == "--pipretry_max_retries" ]]; then | ||
max_retries=${val} | ||
elif [[ ${opt} == "--pipretry_sleep_interval" ]]; then | ||
sleep_interval=${val} | ||
else | ||
args="${args} ${arg}" | ||
fi | ||
done | ||
|
||
# Run command | ||
outfile=$(mktemp) | ||
# shellcheck disable=SC2086 | ||
runPip ${args} | ||
|
||
# Retry loop, only if needed | ||
while (( needToRetry == 1 )) && \ | ||
(( retries < max_retries )); do | ||
|
||
retries=$(( retries + 1 )) | ||
rapids-echo-stderr "Waiting, retry ${retries} of ${max_retries} -> sleeping for ${sleep_interval} seconds..." | ||
sleep "${sleep_interval}" | ||
rapids-echo-stderr "Starting, retry ${retries} of ${max_retries} -> sleep done..." | ||
|
||
# shellcheck disable=SC2086 | ||
runPip ${args} | ||
done | ||
|
||
rm -f "${outfile}" | ||
exit ${exitcode} |