From 9503caf954d76546aab956bdf63ec066b8e84d24 Mon Sep 17 00:00:00 2001 From: "tin.vo" Date: Fri, 4 Oct 2024 09:59:41 -0700 Subject: [PATCH 1/7] adding logic to print failures and retry if there is an cloud-init error --- .../scripts/synchronize-repos.sh | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/enos/modules/install_packages/scripts/synchronize-repos.sh b/enos/modules/install_packages/scripts/synchronize-repos.sh index 8ea2c50dbca4..aedba103e184 100644 --- a/enos/modules/install_packages/scripts/synchronize-repos.sh +++ b/enos/modules/install_packages/scripts/synchronize-repos.sh @@ -99,12 +99,40 @@ synchronize_repos() { esac } +# Function to check cloud-init status and retry on failure # Before we start to modify repositories and install packages we'll wait for cloud-init to finish # so it doesn't race with any of our package installations. -# We run as sudo becase Amazon Linux 2 throws Python 2.7 errors when running `cloud-init status` as +# We run as sudo because Amazon Linux 2 throws Python 2.7 errors when running `cloud-init status` as # non-root user (known bug). -sudo cloud-init status --wait +check_cloud_init() { + local max_retries=2 + local retry_count=0 + local exit_code + while [[ $retry_count -lt $max_retries ]]; do + if sudo cloud-init status --wait; then + echo "Cloud-init completed successfully" + return 0 + else + exit_code=$? + case $exit_code in + 1) + echo "cloud-init did not complete successfully. Exit code: $exit_code" 1>&2 + ;; + 2) + echo "Cloud-init completed successfully, but with errors. Exit code: $exit_code" 1>&2 + exit_code=0 + ;; + esac + echo "There were errors when executing cloud-init. Here are the logs for the failure:" + cat /var/log/cloud-init-* | grep "Failed" + retry_count=$((retry_count + 1)) + fi + done + return $exit_code +} + +check_cloud_init begin_time=$(date +%s) end_time=$((begin_time + TIMEOUT_SECONDS)) while [ "$(date +%s)" -lt "$end_time" ]; do @@ -116,3 +144,4 @@ while [ "$(date +%s)" -lt "$end_time" ]; do done fail "Timed out waiting for distro repos to be set up" + From cd6c60f7ca486856fb68c5061bdf238785143b5d Mon Sep 17 00:00:00 2001 From: "tin.vo" Date: Fri, 4 Oct 2024 10:22:18 -0700 Subject: [PATCH 2/7] adding logic to print failures and retry if there is an cloud-init error --- enos/modules/install_packages/scripts/synchronize-repos.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/enos/modules/install_packages/scripts/synchronize-repos.sh b/enos/modules/install_packages/scripts/synchronize-repos.sh index aedba103e184..ff97ab744b58 100644 --- a/enos/modules/install_packages/scripts/synchronize-repos.sh +++ b/enos/modules/install_packages/scripts/synchronize-repos.sh @@ -132,7 +132,12 @@ check_cloud_init() { return $exit_code } +# Checking cloud-init check_cloud_init +if [ $? -eq 1 ]; then + exit 1 +fi + begin_time=$(date +%s) end_time=$((begin_time + TIMEOUT_SECONDS)) while [ "$(date +%s)" -lt "$end_time" ]; do From 6ba32bdfa21119fd58c48163c38b1173ad624163 Mon Sep 17 00:00:00 2001 From: "tin.vo" Date: Fri, 4 Oct 2024 10:44:12 -0700 Subject: [PATCH 3/7] fixing timeout error --- enos/modules/install_packages/scripts/synchronize-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enos/modules/install_packages/scripts/synchronize-repos.sh b/enos/modules/install_packages/scripts/synchronize-repos.sh index ff97ab744b58..32dbc021addb 100644 --- a/enos/modules/install_packages/scripts/synchronize-repos.sh +++ b/enos/modules/install_packages/scripts/synchronize-repos.sh @@ -105,7 +105,7 @@ synchronize_repos() { # We run as sudo because Amazon Linux 2 throws Python 2.7 errors when running `cloud-init status` as # non-root user (known bug). check_cloud_init() { - local max_retries=2 + local max_retries=1 local retry_count=0 local exit_code From 31019d57b4b0fa1465a0e06ce1c227eb81e9b9e8 Mon Sep 17 00:00:00 2001 From: "tin.vo" Date: Fri, 4 Oct 2024 11:09:46 -0700 Subject: [PATCH 4/7] fixing timeout error --- enos/modules/install_packages/scripts/synchronize-repos.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/enos/modules/install_packages/scripts/synchronize-repos.sh b/enos/modules/install_packages/scripts/synchronize-repos.sh index 32dbc021addb..c5a594d458a7 100644 --- a/enos/modules/install_packages/scripts/synchronize-repos.sh +++ b/enos/modules/install_packages/scripts/synchronize-repos.sh @@ -133,6 +133,7 @@ check_cloud_init() { } # Checking cloud-init +echo $? check_cloud_init if [ $? -eq 1 ]; then exit 1 @@ -140,7 +141,9 @@ fi begin_time=$(date +%s) end_time=$((begin_time + TIMEOUT_SECONDS)) +echo "--begin---${begin_time}-----end--${end_time}-----$?" while [ "$(date +%s)" -lt "$end_time" ]; do + echo "in while loop------" if synchronize_repos; then exit 0 fi From 1b944d91a18d93086bcf7747ddbb80d0e4684698 Mon Sep 17 00:00:00 2001 From: "tin.vo" Date: Fri, 4 Oct 2024 12:06:16 -0700 Subject: [PATCH 5/7] fixing timeout error --- enos/modules/install_packages/scripts/synchronize-repos.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/enos/modules/install_packages/scripts/synchronize-repos.sh b/enos/modules/install_packages/scripts/synchronize-repos.sh index c5a594d458a7..ec420b292bde 100644 --- a/enos/modules/install_packages/scripts/synchronize-repos.sh +++ b/enos/modules/install_packages/scripts/synchronize-repos.sh @@ -105,7 +105,7 @@ synchronize_repos() { # We run as sudo because Amazon Linux 2 throws Python 2.7 errors when running `cloud-init status` as # non-root user (known bug). check_cloud_init() { - local max_retries=1 + local max_retries=0 local retry_count=0 local exit_code @@ -133,7 +133,6 @@ check_cloud_init() { } # Checking cloud-init -echo $? check_cloud_init if [ $? -eq 1 ]; then exit 1 @@ -141,9 +140,7 @@ fi begin_time=$(date +%s) end_time=$((begin_time + TIMEOUT_SECONDS)) -echo "--begin---${begin_time}-----end--${end_time}-----$?" while [ "$(date +%s)" -lt "$end_time" ]; do - echo "in while loop------" if synchronize_repos; then exit 0 fi From d88f79f9125b69d00fee9d587118763a898ffaeb Mon Sep 17 00:00:00 2001 From: "tin.vo" Date: Fri, 4 Oct 2024 12:29:03 -0700 Subject: [PATCH 6/7] fixing timeout error --- enos/modules/install_packages/scripts/synchronize-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enos/modules/install_packages/scripts/synchronize-repos.sh b/enos/modules/install_packages/scripts/synchronize-repos.sh index ec420b292bde..32dbc021addb 100644 --- a/enos/modules/install_packages/scripts/synchronize-repos.sh +++ b/enos/modules/install_packages/scripts/synchronize-repos.sh @@ -105,7 +105,7 @@ synchronize_repos() { # We run as sudo because Amazon Linux 2 throws Python 2.7 errors when running `cloud-init status` as # non-root user (known bug). check_cloud_init() { - local max_retries=0 + local max_retries=1 local retry_count=0 local exit_code From 4c32f82999bb0685073e35d398f8a5b14a1f9651 Mon Sep 17 00:00:00 2001 From: "tin.vo" Date: Fri, 4 Oct 2024 14:40:11 -0700 Subject: [PATCH 7/7] fixing timeout error --- enos/modules/install_packages/scripts/synchronize-repos.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/enos/modules/install_packages/scripts/synchronize-repos.sh b/enos/modules/install_packages/scripts/synchronize-repos.sh index 32dbc021addb..8e2b4ca10be5 100644 --- a/enos/modules/install_packages/scripts/synchronize-repos.sh +++ b/enos/modules/install_packages/scripts/synchronize-repos.sh @@ -149,4 +149,3 @@ while [ "$(date +%s)" -lt "$end_time" ]; do done fail "Timed out waiting for distro repos to be set up" -