Skip to content

Commit

Permalink
[ci] Add retries to docker push (#12773)
Browse files Browse the repository at this point in the history
This should mitigate failures like in
https://ci.tlcpack.ai/blue/organizations/jenkins/tvm/detail/main/4274/pipeline.
This also moves the `retry` function to a script now that we have
PR #12604.

Co-authored-by: driazati <driazati@users.noreply.github.com>
  • Loading branch information
driazati and driazati authored Sep 15, 2022
1 parent c900250 commit c00ce57
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 1,443 deletions.
1,555 changes: 157 additions & 1,398 deletions Jenkinsfile

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions ci/jenkins/Build.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ stage('Build') {
if (!skip_ci) {
node('CPU-SMALL') {
ws({{ m.per_exec_ws('tvm/build-gpu') }}) {
docker_init(ci_gpu)
init_git()
docker_init(ci_gpu)
sh "${docker_run} --no-gpu ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build"
make("${ci_gpu} --no-gpu", 'build', '-j2')
{{ m.upload_artifacts(tag='gpu', filenames=tvm_multilib, folders=microtvm_template_projects) }}
Expand All @@ -102,8 +102,8 @@ stage('Build') {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws({{ m.per_exec_ws('tvm/build-cpu') }}) {
docker_init(ci_cpu)
init_git()
docker_init(ci_cpu)
sh (
script: "${docker_run} ${ci_cpu} ./tests/scripts/task_config_build_cpu.sh build",
label: 'Create CPU cmake config',
Expand All @@ -126,8 +126,8 @@ stage('Build') {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws({{ m.per_exec_ws('tvm/build-cpu-minimal') }}) {
docker_init(ci_minimal)
init_git()
docker_init(ci_minimal)
sh (
script: "${docker_run} ${ci_minimal} ./tests/scripts/task_config_build_minimal.sh build",
label: 'Create CPU minimal cmake config',
Expand All @@ -144,8 +144,8 @@ stage('Build') {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws({{ m.per_exec_ws('tvm/build-wasm') }}) {
docker_init(ci_wasm)
init_git()
docker_init(ci_wasm)
sh (
script: "${docker_run} ${ci_wasm} ./tests/scripts/task_config_build_wasm.sh build",
label: 'Create WASM cmake config',
Expand All @@ -169,8 +169,8 @@ stage('Build') {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws({{ m.per_exec_ws('tvm/build-i386') }}) {
docker_init(ci_i386)
init_git()
docker_init(ci_i386)
sh (
script: "${docker_run} ${ci_i386} ./tests/scripts/task_config_build_i386.sh build",
label: 'Create i386 cmake config',
Expand All @@ -187,8 +187,8 @@ stage('Build') {
if (!skip_ci && is_docs_only_build != 1) {
node('ARM-SMALL') {
ws({{ m.per_exec_ws('tvm/build-arm') }}) {
docker_init(ci_arm)
init_git()
docker_init(ci_arm)
sh (
script: "${docker_run} ${ci_arm} ./tests/scripts/task_config_build_arm.sh build",
label: 'Create ARM cmake config',
Expand All @@ -205,8 +205,8 @@ stage('Build') {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws({{ m.per_exec_ws('tvm/build-cortexm') }}) {
docker_init(ci_cortexm)
init_git()
docker_init(ci_cortexm)
sh (
script: "${docker_run} ${ci_cortexm} ./tests/scripts/task_config_build_cortexm.sh build",
label: 'Create Cortex-M cmake config',
Expand All @@ -223,8 +223,8 @@ stage('Build') {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws({{ m.per_exec_ws('tvm/build-hexagon') }}) {
docker_init(ci_hexagon)
init_git()
docker_init(ci_hexagon)
sh (
script: "${docker_run} ${ci_hexagon} ./tests/scripts/task_config_build_hexagon.sh build",
label: 'Create Hexagon cmake config',
Expand All @@ -245,8 +245,8 @@ stage('Build') {
if (!skip_ci && is_docs_only_build != 1) {
node('CPU-SMALL') {
ws({{ m.per_exec_ws('tvm/build-riscv') }}) {
docker_init(ci_riscv)
init_git()
docker_init(ci_riscv)
sh (
script: "${docker_run} ${ci_riscv} ./tests/scripts/task_config_build_riscv.sh build",
label: 'Create RISC-V cmake config',
Expand Down
6 changes: 4 additions & 2 deletions ci/jenkins/Deploy.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ def update_docker(ecr_image, hub_image) {
sh(
script: """
set -eux
. ci/scripts/retry.sh
docker tag \
${ecr_image} \
${hub_image}
docker push ${hub_image}
retry 5 docker push ${hub_image}
""",
label: "Update ${hub_image} on Docker Hub",
)
Expand Down Expand Up @@ -144,9 +145,10 @@ def deploy() {
sh(
script: """
set -eux
. ci/scripts/retry.sh
docker pull tlcpackstaging/{{ image.name }}:${tag}
docker tag tlcpackstaging/{{ image.name }}:${tag} tlcpack/{{ image.name.replace("_", "-") }}:${tag}
docker push tlcpack/{{ image.name.replace("_", "-") }}:${tag}
retry 5 docker push tlcpack/{{ image.name.replace("_", "-") }}:${tag}
""",
label: 'Tag tlcpackstaging/{{ image.name }} image to tlcpack',
)
Expand Down
6 changes: 4 additions & 2 deletions ci/jenkins/DockerBuild.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ def ecr_push(full_name) {
sh(
script: """
set -x
. ci/scripts/retry.sh
docker tag ${full_name} \$AWS_ECR_REPO/${full_name}
docker push \$AWS_ECR_REPO/${full_name}
retry 5 docker push \$AWS_ECR_REPO/${full_name}
""",
label: 'Upload image to ECR'
)
Expand Down Expand Up @@ -63,7 +64,8 @@ def ecr_pull(full_name) {
sh(
script: """
set -eux
docker pull ${full_name}
. ci/scripts/retry.sh
retry 5 docker pull ${full_name}
""",
label: 'Pull image from ECR'
)
Expand Down
6 changes: 3 additions & 3 deletions ci/jenkins/Prepare.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def init_git() {
sh(
script: """
set -eux
{{ m.bash_retry() }}
. ci/scripts/retry.sh
retry 3 timeout 5m git submodule update --init -f --jobs 0
""",
label: 'Update git submodules',
Expand Down Expand Up @@ -65,8 +65,8 @@ def docker_init(image) {
sh(
script: """
set -eux
{{ m.bash_retry() }}
retry 3 docker pull ${image}
. ci/scripts/retry.sh
retry 5 docker pull ${image}
""",
label: 'Pull docker image',
)
Expand Down
2 changes: 1 addition & 1 deletion ci/jenkins/Test.groovy.j2
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@ stage('Test') {
if (!skip_ci) {
node('GPU') {
ws({{ m.per_exec_ws('tvm/docs-python-gpu') }}) {
docker_init(ci_gpu)
init_git()
docker_init(ci_gpu)
{{ m.download_artifacts(tag='gpu', filenames=tvm_multilib, folders=microtvm_template_projects) }}
add_microtvm_permissions()
timeout(time: 180, unit: 'MINUTES') {
Expand Down
34 changes: 6 additions & 28 deletions ci/jenkins/macros.j2
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def {{ method_name }}() {
node('{{ node }}') {
ws({{ per_exec_ws(ws) }}) {
try {
docker_init({{ docker_image }})
init_git()
docker_init({{ docker_image }})
timeout(time: max_time, unit: 'MINUTES') {
withEnv([
'PLATFORM={{ platform }}',
Expand Down Expand Up @@ -71,8 +71,8 @@ def {{ method_name }}() {
'{{ name }} {{ shard_index }} of {{ num_shards }}': {
node('{{ node }}') {
ws({{ per_exec_ws(ws) }}) {
docker_init({{ docker_image }})
init_git()
docker_init({{ docker_image }})
timeout(time: max_time, unit: 'MINUTES') {
withEnv([
'TVM_NUM_SHARDS={{ num_shards }}',
Expand All @@ -95,8 +95,8 @@ def {{ method_name }}() {
ws({{ per_exec_ws(ws) }}) {
timeout(time: max_time, unit: 'MINUTES') {
try {
docker_init({{ docker_image }})
init_git()
docker_init({{ docker_image }})
withEnv(['PLATFORM={{ platform }}'], {
{{ caller() | indent(width=8) | trim }}
})
Expand All @@ -120,8 +120,8 @@ def {{ method_name }}() {
ws({{ per_exec_ws(ws) }}) {
timeout(time: max_time, unit: 'MINUTES') {
try {
docker_init({{ docker_image }})
init_git()
docker_init({{ docker_image }})
withEnv(['PLATFORM={{ platform }}',
'TEST_STEP_NAME={{ name }}',
"SKIP_SLOW_TESTS=${skip_slow_tests}"], {
Expand All @@ -140,28 +140,6 @@ def {{ method_name }}() {
},
{% endmacro %}

{% macro bash_retry() %}
retry() {
local max_retries=\$1
shift
local n=0
local backoff_max=30
until [ "\$n" -ge \$max_retries ]
do
"\$@" && break
n=\$((n+1))
if [ "\$n" -eq \$max_retries ]; then
echo "failed to update after attempt \$n / \$max_retries, giving up"
exit 1
fi

WAIT=\$(python3 -c 'import random; print(random.randint(10, 30))')
echo "failed to update \$n / \$max_retries, waiting \$WAIT to try again"
sleep \$WAIT
done
}
{% endmacro %}

{% macro deploy_step(name, feature_flag, ws) %}
'{{ name }}': {
if ({{ feature_flag }}) {
Expand All @@ -182,7 +160,7 @@ retry() {
sh(
script: """
set -eux
{{ bash_retry() | indent(width=14) }}
. ci/scripts/retry.sh
{% for filename in filenames %}
md5sum {{ filename }}
retry 3 aws s3 cp --no-progress {{ filename }} s3://${s3_prefix}/{{ tag }}/{{ filename }}
Expand All @@ -199,7 +177,7 @@ sh(
sh(
script: """
set -eux
{{ bash_retry() | indent(width=14) }}
. ci/scripts/retry.sh
{% for filename in filenames %}
retry 3 aws s3 cp --no-progress s3://${s3_prefix}/{{ tag }}/{{ filename }} {{ filename }}
md5sum {{ filename }}
Expand Down
39 changes: 39 additions & 0 deletions ci/scripts/retry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -eux

retry() {
local max_retries=$1
shift
local n=0
until [ "$n" -ge "$max_retries" ]
do
"$@" && break
n=$((n+1))
if [ "$n" -eq "$max_retries" ]; then
echo "failed to update after attempt $n / $max_retries, giving up"
exit 1
fi

WAIT=$(python3 -c 'import random; print(random.randint(10, 30))')
echo "failed to update $n / $max_retries, waiting $WAIT to try again"
sleep "$WAIT"
done
}

0 comments on commit c00ce57

Please sign in to comment.