From b7a6f145237b73775eaa54b9d46413a802abcf60 Mon Sep 17 00:00:00 2001 From: driazati Date: Wed, 14 Sep 2022 09:06:38 -0700 Subject: [PATCH] [ci] Add retries to docker push This should mitigate failures like in https://ci.tlcpack.ai/blue/organizations/jenkins/tvm/detail/main/4274/pipeline. This also moves the `retry` function to a script now that we have PR #12604. --- Jenkinsfile | 1429 ++---------------------------- ci/jenkins/Deploy.groovy.j2 | 6 +- ci/jenkins/DockerBuild.groovy.j2 | 6 +- ci/jenkins/Prepare.groovy.j2 | 6 +- ci/jenkins/macros.j2 | 26 +- ci/scripts/retry.sh | 39 + 6 files changed, 146 insertions(+), 1366 deletions(-) create mode 100644 ci/scripts/retry.sh diff --git a/Jenkinsfile b/Jenkinsfile index ed1cf4b09e6eb..5f3cd5f2215e4 100755 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -45,7 +45,7 @@ // 'python3 jenkins/generate.py' // Note: This timestamp is here to ensure that updates to the Jenkinsfile are // always rebased on main before merging: -// Generated at 2022-09-01T11:52:42.195970 +// Generated at 2022-09-14T11:16:11.619787 import org.jenkinsci.plugins.pipeline.modeldefinition.Utils // NOTE: these lines are scanned by docker/dev_common.sh. Please update the regex as needed. --> @@ -145,26 +145,7 @@ def init_git() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done -} - + . ci/scripts/retry.sh retry 3 timeout 5m git submodule update --init -f --jobs 0 """, label: 'Update git submodules', @@ -196,27 +177,8 @@ def docker_init(image) { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done -} - - retry 3 docker pull ${image} + . ci/scripts/retry.sh + retry 5 docker pull ${image} """, label: 'Pull docker image', ) @@ -453,8 +415,9 @@ def ecr_push(full_name) { sh( script: """ set -x + . ci/scripts/retry.sh docker tag ${full_name} \$AWS_ECR_REPO/${full_name} - docker push \$AWS_ECR_REPO/${full_name} + retry 5 docker push \$AWS_ECR_REPO/${full_name} """, label: 'Upload image to ECR' ) @@ -495,7 +458,8 @@ def ecr_pull(full_name) { sh( script: """ set -eux - docker pull ${full_name} + . ci/scripts/retry.sh + retry 5 docker pull ${full_name} """, label: 'Pull image from ECR' ) @@ -778,26 +742,7 @@ stage('Build') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh md5sum build/libtvm.so retry 3 aws s3 cp --no-progress build/libtvm.so s3://${s3_prefix}/gpu/build/libtvm.so md5sum build/libvta_fsim.so @@ -818,26 +763,7 @@ stage('Build') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh md5sum build/libtvm.so retry 3 aws s3 cp --no-progress build/libtvm.so s3://${s3_prefix}/gpu2/build/libtvm.so md5sum build/libvta_fsim.so @@ -868,26 +794,7 @@ stage('Build') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh md5sum build/libvta_tsim.so retry 3 aws s3 cp --no-progress build/libvta_tsim.so s3://${s3_prefix}/cpu/build/libvta_tsim.so md5sum build/libtvm.so @@ -928,26 +835,7 @@ stage('Build') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh md5sum build/libtvm.so retry 3 aws s3 cp --no-progress build/libtvm.so s3://${s3_prefix}/cpu-minimal/build/libtvm.so md5sum build/libtvm_runtime.so @@ -1003,26 +891,7 @@ stage('Build') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh md5sum build/libvta_tsim.so retry 3 aws s3 cp --no-progress build/libvta_tsim.so s3://${s3_prefix}/i386/build/libvta_tsim.so md5sum build/libtvm.so @@ -1057,26 +926,7 @@ stage('Build') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh md5sum build/libtvm.so retry 3 aws s3 cp --no-progress build/libtvm.so s3://${s3_prefix}/arm/build/libtvm.so md5sum build/libvta_fsim.so @@ -1109,26 +959,7 @@ stage('Build') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh md5sum build/libtvm.so retry 3 aws s3 cp --no-progress build/libtvm.so s3://${s3_prefix}/cortexm/build/libtvm.so md5sum build/libtvm_runtime.so @@ -1164,26 +995,7 @@ stage('Build') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh md5sum build/libtvm.so retry 3 aws s3 cp --no-progress build/libtvm.so s3://${s3_prefix}/hexagon/build/libtvm.so md5sum build/libtvm_runtime.so @@ -1215,26 +1027,7 @@ stage('Build') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh md5sum build/libtvm.so retry 3 aws s3 cp --no-progress build/libtvm.so s3://${s3_prefix}/riscv/build/libtvm.so md5sum build/libtvm_runtime.so @@ -1278,26 +1071,7 @@ def shard_run_unittest_GPU_1_of_3() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu2/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu2/build/libvta_fsim.so build/libvta_fsim.so @@ -1315,26 +1089,7 @@ def shard_run_unittest_GPU_1_of_3() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -1394,26 +1149,7 @@ def shard_run_unittest_GPU_2_of_3() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -1476,26 +1212,7 @@ def shard_run_unittest_GPU_3_of_3() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -1555,26 +1272,7 @@ def shard_run_integration_CPU_1_of_4() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so md5sum build/libvta_tsim.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so @@ -1631,26 +1329,7 @@ def shard_run_integration_CPU_2_of_4() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so md5sum build/libvta_tsim.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so @@ -1707,26 +1386,7 @@ def shard_run_integration_CPU_3_of_4() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so md5sum build/libvta_tsim.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so @@ -1783,26 +1443,7 @@ def shard_run_integration_CPU_4_of_4() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so md5sum build/libvta_tsim.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so @@ -1860,26 +1501,7 @@ def shard_run_python_i386_1_of_3() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/i386/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/i386/build/libvta_fsim.so build/libvta_fsim.so @@ -1936,26 +1558,7 @@ def shard_run_python_i386_2_of_3() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/i386/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/i386/build/libvta_fsim.so build/libvta_fsim.so @@ -2012,26 +1615,7 @@ def shard_run_python_i386_3_of_3() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/i386/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/i386/build/libvta_fsim.so build/libvta_fsim.so @@ -2088,26 +1672,7 @@ def shard_run_test_Hexagon_1_of_8() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm_runtime.so build/libtvm_runtime.so @@ -2163,26 +1728,7 @@ def shard_run_test_Hexagon_2_of_8() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm_runtime.so build/libtvm_runtime.so @@ -2237,26 +1783,7 @@ def shard_run_test_Hexagon_3_of_8() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm_runtime.so build/libtvm_runtime.so @@ -2311,26 +1838,7 @@ def shard_run_test_Hexagon_4_of_8() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm_runtime.so build/libtvm_runtime.so @@ -2385,26 +1893,7 @@ def shard_run_test_Hexagon_5_of_8() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm_runtime.so build/libtvm_runtime.so @@ -2459,26 +1948,7 @@ def shard_run_test_Hexagon_6_of_8() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm_runtime.so build/libtvm_runtime.so @@ -2533,26 +2003,7 @@ def shard_run_test_Hexagon_7_of_8() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm_runtime.so build/libtvm_runtime.so @@ -2607,26 +2058,7 @@ def shard_run_test_Hexagon_8_of_8() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/hexagon/build/libtvm_runtime.so build/libtvm_runtime.so @@ -2682,26 +2114,7 @@ def shard_run_integration_aarch64_1_of_4() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libvta_fsim.so build/libvta_fsim.so @@ -2757,26 +2170,7 @@ def shard_run_integration_aarch64_2_of_4() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libvta_fsim.so build/libvta_fsim.so @@ -2832,26 +2226,7 @@ def shard_run_integration_aarch64_3_of_4() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libvta_fsim.so build/libvta_fsim.so @@ -2907,26 +2282,7 @@ def shard_run_integration_aarch64_4_of_4() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libvta_fsim.so build/libvta_fsim.so @@ -2983,26 +2339,7 @@ def shard_run_topi_GPU_1_of_3() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -3057,26 +2394,7 @@ def shard_run_topi_GPU_2_of_3() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -3131,26 +2449,7 @@ def shard_run_topi_GPU_3_of_3() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -3206,26 +2505,7 @@ def shard_run_frontend_GPU_1_of_6() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -3280,26 +2560,7 @@ def shard_run_frontend_GPU_2_of_6() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -3354,26 +2615,7 @@ def shard_run_frontend_GPU_3_of_6() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -3428,26 +2670,7 @@ def shard_run_frontend_GPU_4_of_6() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -3502,26 +2725,7 @@ def shard_run_frontend_GPU_5_of_6() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -3576,26 +2780,7 @@ def shard_run_frontend_GPU_6_of_6() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -3651,26 +2836,7 @@ def shard_run_topi_aarch64_1_of_2() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libvta_fsim.so build/libvta_fsim.so @@ -3730,26 +2896,7 @@ def shard_run_topi_aarch64_2_of_2() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libvta_fsim.so build/libvta_fsim.so @@ -3809,26 +2956,7 @@ def shard_run_frontend_aarch64_1_of_2() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libvta_fsim.so build/libvta_fsim.so @@ -3883,26 +3011,7 @@ def shard_run_frontend_aarch64_2_of_2() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/arm/build/libvta_fsim.so build/libvta_fsim.so @@ -3958,26 +3067,7 @@ def shard_run_test_Cortex_M_1_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4037,26 +3127,7 @@ def shard_run_test_Cortex_M_2_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4111,26 +3182,7 @@ def shard_run_test_Cortex_M_3_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4185,26 +3237,7 @@ def shard_run_test_Cortex_M_4_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4259,26 +3292,7 @@ def shard_run_test_Cortex_M_5_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4333,26 +3347,7 @@ def shard_run_test_Cortex_M_6_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4407,26 +3402,7 @@ def shard_run_test_Cortex_M_7_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4481,26 +3457,7 @@ def shard_run_test_Cortex_M_8_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4555,26 +3512,7 @@ def shard_run_test_Cortex_M_9_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4629,26 +3567,7 @@ def shard_run_test_Cortex_M_10_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4703,26 +3622,7 @@ def shard_run_test_Cortex_M_11_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4777,26 +3677,7 @@ def shard_run_test_Cortex_M_12_of_12() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cortexm/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4852,26 +3733,7 @@ def shard_run_test_RISC_V_1_of_1() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/riscv/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/riscv/build/libtvm_runtime.so build/libtvm_runtime.so @@ -4923,26 +3785,7 @@ def run_unittest_minimal() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu-minimal/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu-minimal/build/libtvm_runtime.so build/libtvm_runtime.so @@ -5142,26 +3985,7 @@ stage('Test') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libvta_tsim.so build/libvta_tsim.so md5sum build/libvta_tsim.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so @@ -5217,26 +4041,7 @@ stage('Test') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/cpu/build/libvta_fsim.so build/libvta_fsim.so @@ -5282,26 +4087,7 @@ stage('Test') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libtvm.so build/libtvm.so md5sum build/libtvm.so retry 3 aws s3 cp --no-progress s3://${s3_prefix}/gpu/build/libvta_fsim.so build/libvta_fsim.so @@ -5326,26 +4112,7 @@ stage('Test') { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh md5sum docs.tgz retry 3 aws s3 cp --no-progress docs.tgz s3://${s3_prefix}/docs/docs.tgz """, @@ -5395,10 +4162,11 @@ def update_docker(ecr_image, hub_image) { sh( script: """ set -eux + . ci/scripts/retry.sh docker tag \ ${ecr_image} \ ${hub_image} - docker push ${hub_image} + retry 5 docker push ${hub_image} """, label: "Update ${hub_image} on Docker Hub", ) @@ -5457,26 +4225,7 @@ def deploy() { sh( script: """ set -eux - retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done - } - + . ci/scripts/retry.sh retry 3 aws s3 cp --no-progress s3://${s3_prefix}/docs/docs.tgz docs.tgz md5sum docs.tgz """, @@ -5555,9 +4304,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/ci_arm:${tag} docker tag tlcpackstaging/ci_arm:${tag} tlcpack/ci-arm:${tag} - docker push tlcpack/ci-arm:${tag} + retry 5 docker push tlcpack/ci-arm:${tag} """, label: 'Tag tlcpackstaging/ci_arm image to tlcpack', ) @@ -5568,9 +4318,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/ci_cortexm:${tag} docker tag tlcpackstaging/ci_cortexm:${tag} tlcpack/ci-cortexm:${tag} - docker push tlcpack/ci-cortexm:${tag} + retry 5 docker push tlcpack/ci-cortexm:${tag} """, label: 'Tag tlcpackstaging/ci_cortexm image to tlcpack', ) @@ -5581,9 +4332,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/ci_cpu:${tag} docker tag tlcpackstaging/ci_cpu:${tag} tlcpack/ci-cpu:${tag} - docker push tlcpack/ci-cpu:${tag} + retry 5 docker push tlcpack/ci-cpu:${tag} """, label: 'Tag tlcpackstaging/ci_cpu image to tlcpack', ) @@ -5594,9 +4346,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/ci_gpu:${tag} docker tag tlcpackstaging/ci_gpu:${tag} tlcpack/ci-gpu:${tag} - docker push tlcpack/ci-gpu:${tag} + retry 5 docker push tlcpack/ci-gpu:${tag} """, label: 'Tag tlcpackstaging/ci_gpu image to tlcpack', ) @@ -5607,9 +4360,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/ci_hexagon:${tag} docker tag tlcpackstaging/ci_hexagon:${tag} tlcpack/ci-hexagon:${tag} - docker push tlcpack/ci-hexagon:${tag} + retry 5 docker push tlcpack/ci-hexagon:${tag} """, label: 'Tag tlcpackstaging/ci_hexagon image to tlcpack', ) @@ -5620,9 +4374,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/ci_i386:${tag} docker tag tlcpackstaging/ci_i386:${tag} tlcpack/ci-i386:${tag} - docker push tlcpack/ci-i386:${tag} + retry 5 docker push tlcpack/ci-i386:${tag} """, label: 'Tag tlcpackstaging/ci_i386 image to tlcpack', ) @@ -5633,9 +4388,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/ci_lint:${tag} docker tag tlcpackstaging/ci_lint:${tag} tlcpack/ci-lint:${tag} - docker push tlcpack/ci-lint:${tag} + retry 5 docker push tlcpack/ci-lint:${tag} """, label: 'Tag tlcpackstaging/ci_lint image to tlcpack', ) @@ -5646,9 +4402,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/ci_minimal:${tag} docker tag tlcpackstaging/ci_minimal:${tag} tlcpack/ci-minimal:${tag} - docker push tlcpack/ci-minimal:${tag} + retry 5 docker push tlcpack/ci-minimal:${tag} """, label: 'Tag tlcpackstaging/ci_minimal image to tlcpack', ) @@ -5659,9 +4416,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/ci_riscv:${tag} docker tag tlcpackstaging/ci_riscv:${tag} tlcpack/ci-riscv:${tag} - docker push tlcpack/ci-riscv:${tag} + retry 5 docker push tlcpack/ci-riscv:${tag} """, label: 'Tag tlcpackstaging/ci_riscv image to tlcpack', ) @@ -5672,9 +4430,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/ci_wasm:${tag} docker tag tlcpackstaging/ci_wasm:${tag} tlcpack/ci-wasm:${tag} - docker push tlcpack/ci-wasm:${tag} + retry 5 docker push tlcpack/ci-wasm:${tag} """, label: 'Tag tlcpackstaging/ci_wasm image to tlcpack', ) diff --git a/ci/jenkins/Deploy.groovy.j2 b/ci/jenkins/Deploy.groovy.j2 index 08516da41b9f2..d2ee4360da6b7 100644 --- a/ci/jenkins/Deploy.groovy.j2 +++ b/ci/jenkins/Deploy.groovy.j2 @@ -30,10 +30,11 @@ def update_docker(ecr_image, hub_image) { sh( script: """ set -eux + . ci/scripts/retry.sh docker tag \ ${ecr_image} \ ${hub_image} - docker push ${hub_image} + retry 5 docker push ${hub_image} """, label: "Update ${hub_image} on Docker Hub", ) @@ -144,9 +145,10 @@ def deploy() { sh( script: """ set -eux + . ci/scripts/retry.sh docker pull tlcpackstaging/{{ image.name }}:${tag} docker tag tlcpackstaging/{{ image.name }}:${tag} tlcpack/{{ image.name.replace("_", "-") }}:${tag} - docker push tlcpack/{{ image.name.replace("_", "-") }}:${tag} + retry 5 docker push tlcpack/{{ image.name.replace("_", "-") }}:${tag} """, label: 'Tag tlcpackstaging/{{ image.name }} image to tlcpack', ) diff --git a/ci/jenkins/DockerBuild.groovy.j2 b/ci/jenkins/DockerBuild.groovy.j2 index 1f3bded86242e..5ffbeded80fa2 100644 --- a/ci/jenkins/DockerBuild.groovy.j2 +++ b/ci/jenkins/DockerBuild.groovy.j2 @@ -21,8 +21,9 @@ def ecr_push(full_name) { sh( script: """ set -x + . ci/scripts/retry.sh docker tag ${full_name} \$AWS_ECR_REPO/${full_name} - docker push \$AWS_ECR_REPO/${full_name} + retry 5 docker push \$AWS_ECR_REPO/${full_name} """, label: 'Upload image to ECR' ) @@ -63,7 +64,8 @@ def ecr_pull(full_name) { sh( script: """ set -eux - docker pull ${full_name} + . ci/scripts/retry.sh + retry 5 docker pull ${full_name} """, label: 'Pull image from ECR' ) diff --git a/ci/jenkins/Prepare.groovy.j2 b/ci/jenkins/Prepare.groovy.j2 index 6d0c0ec9c4b69..4464108968dec 100644 --- a/ci/jenkins/Prepare.groovy.j2 +++ b/ci/jenkins/Prepare.groovy.j2 @@ -33,7 +33,7 @@ def init_git() { sh( script: """ set -eux - {{ m.bash_retry() }} + . ci/scripts/retry.sh retry 3 timeout 5m git submodule update --init -f --jobs 0 """, label: 'Update git submodules', @@ -65,8 +65,8 @@ def docker_init(image) { sh( script: """ set -eux - {{ m.bash_retry() }} - retry 3 docker pull ${image} + . ci/scripts/retry.sh + retry 5 docker pull ${image} """, label: 'Pull docker image', ) diff --git a/ci/jenkins/macros.j2 b/ci/jenkins/macros.j2 index 9d02ad68d6daa..9852258b18b41 100644 --- a/ci/jenkins/macros.j2 +++ b/ci/jenkins/macros.j2 @@ -140,28 +140,6 @@ def {{ method_name }}() { }, {% endmacro %} -{% macro bash_retry() %} -retry() { - local max_retries=\$1 - shift - local n=0 - local backoff_max=30 - until [ "\$n" -ge \$max_retries ] - do - "\$@" && break - n=\$((n+1)) - if [ "\$n" -eq \$max_retries ]; then - echo "failed to update after attempt \$n / \$max_retries, giving up" - exit 1 - fi - - WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))') - echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again" - sleep \$WAIT - done -} -{% endmacro %} - {% macro deploy_step(name, feature_flag, ws) %} '{{ name }}': { if ({{ feature_flag }}) { @@ -182,7 +160,7 @@ retry() { sh( script: """ set -eux - {{ bash_retry() | indent(width=14) }} + . ci/scripts/retry.sh {% for filename in filenames %} md5sum {{ filename }} retry 3 aws s3 cp --no-progress {{ filename }} s3://${s3_prefix}/{{ tag }}/{{ filename }} @@ -199,7 +177,7 @@ sh( sh( script: """ set -eux - {{ bash_retry() | indent(width=14) }} + . ci/scripts/retry.sh {% for filename in filenames %} retry 3 aws s3 cp --no-progress s3://${s3_prefix}/{{ tag }}/{{ filename }} {{ filename }} md5sum {{ filename }} diff --git a/ci/scripts/retry.sh b/ci/scripts/retry.sh new file mode 100644 index 0000000000000..08958fedce892 --- /dev/null +++ b/ci/scripts/retry.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +retry() { + local max_retries=$1 + shift + local n=0 + until [ "$n" -ge "$max_retries" ] + do + "$@" && break + n=$((n+1)) + if [ "$n" -eq "$max_retries" ]; then + echo "failed to update after attempt $n / $max_retries, giving up" + exit 1 + fi + + WAIT=$(python3 -c 'import random; print(random.randint(10, 30))') + echo "failed to update $n / $max_retries, waiting $WAIT to try again" + sleep "$WAIT" + done +}