From ca8d9f4a8dcb1bdb7be066866e96321cc5c1bf86 Mon Sep 17 00:00:00 2001
From: TerrenceMcGuinness-NOAA <terrence.mcguinness@cox.net>
Date: Wed, 16 Oct 2024 15:51:11 -0400
Subject: [PATCH] Github pipelines and utils for running CI on parallel works
 (#3007)

# Description

This PR has the GitHub Pipeline script in the `github/workflows`
directory for running CI tests
to be preformed an AWS virtual cluster. It is setup to be launched from
the dispatch action from the Actions tab.

For now it will only run C48_ATM

Resolves #3006

Once the yaml pipeline is in `.github/workflows` directory of the
default branch we can test it against [PR
2977](https://github.com/NOAA-EMC/global-workflow/pull/2977) which may
be needed to build on Parallel Works Centos AWS.

Code managers can check to see if the self-hosted runner
[globalworkflow_parallelworks](https://github.com/NOAA-EMC/global-workflow/settings/actions/runners/22)
is up and ready by checking the
[Running](https://github.com/NOAA-EMC/global-workflow/settings/actions/runners)
Settings.

In pending work we should also be able spin up the cluster on demand
from GitHub as well.

<!-- For more on writing good commit messages, see
https://cbea.ms/git-commit/ -->

# Type of change
- [ ] Bug fix (fixes something broken)
- [ ] New feature (adds functionality)
- [x] Maintenance (code refactor, clean-up, new CI test, etc.)

# Change characteristics
<!-- Choose YES or NO from each of the following and delete the other
-->
- Is this a breaking change (a change in existing functionality)? YES/NO
- Does this change require a documentation update? YES/NO
- Does this change require an update to any of the following submodules?
YES/NO (If YES, please add a link to any PRs that are pending.)
  - [ ] EMC verif-global <!-- NOAA-EMC/EMC_verif-global#1234 -->
  - [ ] GDAS <!-- NOAA-EMC/GDASApp#1234 -->
  - [ ] GFS-utils <!-- NOAA-EMC/gfs-utils#1234 -->
  - [ ] GSI <!-- NOAA-EMC/GSI#1234 -->
  - [ ] GSI-monitor <!-- NOAA-EMC/GSI-Monitor#1234 -->
  - [ ] GSI-utils <!-- NOAA-EMC/GSI-Utils#1234 -->
  - [ ] UFS-utils <!-- ufs-community/UFS_UTILS#1234 -->
  - [ ] UFS-weather-model <!-- ufs-community/ufs-weather-model#1234 -->
  - [ ] wxflow <!-- NOAA-EMC/wxflow#1234 -->

# How has this been tested?
<!-- Please list any test you conducted, including the machine.

CI Tests runs-end-to end on an AWS Centos based virtual cluster on
Parallel Works.

-->

# Checklist
- [ ] Any dependent changes have been merged and published
- [x] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my own code
- [ ] I have commented my code, particularly in hard-to-understand areas
- [ ] I have documented my code, including function, input, and output
descriptions
- [ ] My changes generate no new warnings
- [ ] New and existing tests pass with my changes
- [x] This change is covered by an existing CI test or a new one has
been added
- [ ] I have made corresponding changes to the system documentation if
necessary

---------

Co-authored-by: tmcguinness <terry.mcguinness@noaa.gov>
---
 ...balworkflow-ci.yaml => pw_aws_centos.yaml} | 46 ++++++++++---------
 .../parallel_works/UserBootstrap_centos7.txt  |  5 ++
 .../utils/parallel_works/provision_runner.sh  | 39 ++++++++++++++++
 3 files changed, 68 insertions(+), 22 deletions(-)
 rename .github/workflows/{globalworkflow-ci.yaml => pw_aws_centos.yaml} (63%)
 create mode 100644 ci/scripts/utils/parallel_works/UserBootstrap_centos7.txt
 create mode 100755 ci/scripts/utils/parallel_works/provision_runner.sh

diff --git a/.github/workflows/globalworkflow-ci.yaml b/.github/workflows/pw_aws_centos.yaml
similarity index 63%
rename from .github/workflows/globalworkflow-ci.yaml
rename to .github/workflows/pw_aws_centos.yaml
index 1474c79a48..549a3ea0fa 100644
--- a/.github/workflows/globalworkflow-ci.yaml
+++ b/.github/workflows/pw_aws_centos.yaml
@@ -1,4 +1,4 @@
-name: gw-ci-orion
+name: gw-ci-aws-centos
 
 on: [workflow_dispatch]
 
@@ -15,28 +15,31 @@ on: [workflow_dispatch]
 #         └── ${pslot}
 env:
   TEST_DIR: ${{ github.workspace }}/${{ github.run_id }}
-  MACHINE_ID: orion
+  MACHINE_ID: noaacloud
 
 jobs:
-  checkout-build-link:
-    runs-on: [self-hosted, orion-ready]
+  checkout:
+    runs-on: [self-hosted, aws, parallelworks, centos]
     timeout-minutes: 600
 
     steps:
+
     - name: Checkout global-workflow
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
-        path: ${{ github.run_id }}/HOMEgfs  # This path needs to be relative
+        path: ${{ github.run_id }}/HOMEgfs
+        submodules: 'recursive'
 
-    - name: Checkout components
-      run: |
-        cd ${{ env.TEST_DIR }}/HOMEgfs/sorc
-        ./checkout.sh -c -g  # Options e.g. -u can be added late
+  build-link:      
+    runs-on: [self-hosted, aws, parallelworks, centos]
+    needs: checkout
+
+    steps:
 
     - name: Build components
       run: |
         cd ${{ env.TEST_DIR }}/HOMEgfs/sorc
-        ./build_all.sh
+        ./build_all.sh -j 8
 
     - name: Link artifacts
       run: |
@@ -44,43 +47,42 @@ jobs:
         ./link_workflow.sh
 
   create-experiments:
-    needs: checkout-build-link
-    runs-on: [self-hosted, orion-ready]
+    needs: checkout
+    runs-on: [self-hosted, aws, parallelworks, centos]
     strategy:
       matrix:
-        case: ["C48_S2S", "C96_atm3DVar"]
+        case: ["C48_ATM"]
 
     steps:
       - name: Create Experiments ${{ matrix.case }}
         env:
-          HOMEgfs_PR: ${{ env.TEST_DIR }}/HOMEgfs
           RUNTESTS: ${{ env.TEST_DIR }}/RUNTESTS
           pslot: ${{ matrix.case }}.${{ github.run_id }}
         run: |
+          mkdir -p ${{ env.RUNTESTS }}
           cd ${{ env.TEST_DIR }}/HOMEgfs
           source workflow/gw_setup.sh
-          source ci/platforms/orion.sh
-          ./ci/scripts/create_experiment.py --yaml ci/cases/${{ matrix.case }}.yaml --dir ${{ env.HOMEgfs_PR }}
+          source ci/platforms/config.noaacloud
+          ./workflow/create_experiment.py --yaml ci/cases/pr/${{ matrix.case }}.yaml --overwrite
 
   run-experiments:
     needs: create-experiments
-    runs-on: [self-hosted, orion-ready]
+    runs-on: [self-hosted, aws, parallelworks, centos]
     strategy:
       max-parallel: 2
       matrix:
-        case: ["C48_S2S", "C96_atm3DVar"]
+        case: ["C48_ATM"]
     steps:
       - name: Run Experiment ${{ matrix.case }}
         run: |
           cd ${{ env.TEST_DIR }}/HOMEgfs
-          ./ci/scripts/run-check_ci.sh ${{ env.TEST_DIR }} ${{ matrix.case }}.${{ github.run_id }}
+          ./ci/scripts/run-check_ci.sh ${{ env.TEST_DIR }} ${{ matrix.case }}.${{ github.run_id }} HOMEgfs
 
   clean-up:
     needs: run-experiments
-    runs-on: [self-hosted, orion-ready]
+    runs-on: [self-hosted, aws, parallelworks, centos]
     steps:
       - name: Clean-up
         run: |
           cd ${{ github.workspace }}
           rm -rf ${{ github.run_id }}
-
diff --git a/ci/scripts/utils/parallel_works/UserBootstrap_centos7.txt b/ci/scripts/utils/parallel_works/UserBootstrap_centos7.txt
new file mode 100644
index 0000000000..ddc6b05706
--- /dev/null
+++ b/ci/scripts/utils/parallel_works/UserBootstrap_centos7.txt
@@ -0,0 +1,5 @@
+sudo yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm
+sudo yum -y install git
+/contrib/Terry.McGuinness/SETUP/provision_runner.sh
+ALLNODES
+/contrib/Terry.McGuinness/SETUP/mount-epic-contrib.sh
\ No newline at end of file
diff --git a/ci/scripts/utils/parallel_works/provision_runner.sh b/ci/scripts/utils/parallel_works/provision_runner.sh
new file mode 100755
index 0000000000..cac18c9315
--- /dev/null
+++ b/ci/scripts/utils/parallel_works/provision_runner.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+# This script provisions a GitHub Actions runner on a Rocky or CentOS system.
+# It performs the following steps:
+# 1. Checks the operating system from /etc/os-release.
+# 2. Verifies if the operating system is either Rocky or CentOS.
+# 3. Checks if an actions-runner process is already running for the current user.
+# 4. Copies the actions-runner tar file from a specified directory to the home directory.
+# 5. Extracts the tar file and starts the actions-runner in the background.
+#
+# The actions-runner tar file contains the necessary binaries and scripts to run
+# a GitHub Actions runner. It is specific to the operating system and is expected
+# to be located in the /contrib/${CI_USER}/SETUP/ directory.
+
+CI_USER="Terry.McGuinness"
+
+# Get the Operating System name from /etc/os-release
+OS_NAME=$(grep -E '^ID=' /etc/os-release | sed -E 's/ID="?([^"]*)"?/\1/') || true
+
+# Check if the OS is Rocky or CentOS
+if [[ "${OS_NAME}" == "rocky" || "${OS_NAME}" == "centos" ]]; then
+  echo "Operating System is ${OS_NAME}"
+else
+  echo "Unsupported Operating System: ${OS_NAME}"
+  exit 1
+fi
+
+running=$(pgrep -u "${USER}" run-helper -c) || true
+if [[ "${running}" -gt 0 ]]; then
+   echo "actions-runner is already running"
+   exit
+fi
+
+cp "/contrib/${CI_USER}/SETUP/actions-runner_${OS_NAME}.tar.gz" "${HOME}"
+cd "${HOME}" || exit
+tar -xf "actions-runner_${OS_NAME}.tar.gz"
+cd actions-runner || exit
+d=$(date +%Y-%m-%d-%H:%M)
+nohup ./run.sh >& "run_nohup${d}.log" &