Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jenkins node.env #309

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 30 additions & 68 deletions ci/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ def STATUS = 'Passed'

pipeline {

environment {
BASH_ENV = "${CUSTOM_WORKSPACE}/workflow/gw_setup.sh"
}

agent { label 'built-in' }

options {
Expand All @@ -22,7 +26,6 @@ pipeline {
stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR
// which is used to designate the Nodes in the Jenkins Controller by the agent label
// Each Jenkins Node is connected to said machine via an JAVA agent via an ssh tunnel
// no op 2

stage('1. Get Machine') {
agent { label 'built-in' }
Expand Down Expand Up @@ -83,8 +86,8 @@ pipeline {
GH = sh(script: "which gh || echo '~/bin/gh'", returnStdout: true).trim()
CUSTOM_WORKSPACE = "${WORKSPACE}"
HOMEgfs = "${CUSTOM_WORKSPACE}/global-workflow"
sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/global-workflow; mkdir -p ${CUSTOM_WORKSPACE}/global-workflow")
sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS; mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS")
//sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/global-workflow; mkdir -p ${CUSTOM_WORKSPACE}/global-workflow")
//sh(script: "rm -Rf ${CUSTOM_WORKSPACE}/RUNTESTS; mkdir -p ${CUSTOM_WORKSPACE}/RUNTESTS")
sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --add-label "CI-${Machine}-Building" --remove-label "CI-${Machine}-Ready" """)
}
echo "Building and running on ${Machine} in directory ${CUSTOM_WORKSPACE}"
Expand Down Expand Up @@ -120,9 +123,10 @@ pipeline {
def error_logs_message = ""
dir("${HOMEgfs}/sorc") {
try {
sh(script: './build_all.sh -kgu') // build the global-workflow executables for GFS variant (UFS-wx-model, WW3 pre/post executables)
sh(script: './build_ww3prepost.sh -w > ./logs/build_ww3prepost_gefs.log 2>&1') // build the WW3 pre/post processing executables for GEFS variant
sh(script: './build_ufs.sh -w -e gefs_model.x > ./logs/build_ufs_gefs.log 2>&1') // build the UFS-wx-model executable for GEFS variant
//sh(script: './build_all.sh -kgu') // build the global-workflow executables for GFS variant (UFS-wx-model, WW3 pre/post executables)
//sh(script: './build_ww3prepost.sh -w > ./logs/build_ww3prepost_gefs.log 2>&1') // build the WW3 pre/post processing executables for GEFS variant
//sh(script: './build_ufs.sh -w -e gefs_model.x > ./logs/build_ufs_gefs.log 2>&1') // build the UFS-wx-model executable for GEFS variant
echo "Building global-workflow on ${Machine} SKIPPED"
} catch (Exception error_build) {
echo "Failed to build global-workflow: ${error_build.getMessage()}"
if ( fileExists("logs/error.logs") ) {
Expand Down Expand Up @@ -159,9 +163,6 @@ pipeline {
echo "Failed to update label from Building to Running: ${e.getMessage()}"
}
}
// Get a list of CI cases to run
CI_CASES = sh(script: "${HOMEgfs}/ci/scripts/utils/get_host_case_list.py ${machine}", returnStdout: true).trim().split()
echo "Cases to run: ${CI_CASES}"
}
}
}
Expand All @@ -175,75 +176,36 @@ pipeline {
agent { label NodeName[machine].toLowerCase() }
steps {
script {
env.BASH_ENV = sh(script: "source ${HOMEgfs}/workflow/gw_setup.sh", returnStdout: true).trim()
env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS"
// Get a list of CI cases to run
CI_CASES = sh(script: "source ${HOMEgfs}/workflow/gw_setup.sh && ${HOMEgfs}/ci/scripts/utils/get_host_case_list.py ${machine}", returnStdout: true).trim().split()
echo "Cases to run: ${CI_CASES}"
def parallelStages = CI_CASES.collectEntries { caseName ->
["${caseName}": {
stage("Create ${caseName}") {
script {
env.RUNTESTS = "${CUSTOM_WORKSPACE}/RUNTESTS"
try {
error_output = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml", returnStdout: true).trim()
} catch (Exception error_create) {
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "${Case} **FAILED** to create experiment on ${Machine} in BUILD# ${env.BUILD_NUMBER}\n with the error:\n\\`\\`\\`\n${error_output}\\`\\`\\`" """)
error("Case ${caseName} failed to create experiment directory")
}
}
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${caseName}.yaml")
}
}

stage("Running ${caseName}") {
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
script {
def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${CUSTOM_WORKSPACE}/RUNTESTS ${caseName}", returnStdout: true).trim()
def error_file = "${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs"
sh(script: " rm -f ${error_file}")
try {
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow'")
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cleanup_experiment ${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}")
} catch (Exception error_experment) {
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cancel_batch_jobs ${pslot}")
ws(CUSTOM_WORKSPACE) {
def error_logs = ""
def error_logs_message = ""
if (fileExists(error_file)) {
def fileContent = readFile error_file
def lines = fileContent.readLines()
for (line in lines) {
echo "archiving: ${line}"
if (fileExists("${CUSTOM_WORKSPACE}/${line}") && readFile("${CUSTOM_WORKSPACE}/${line}").length() > 0) {
try {
archiveArtifacts artifacts: "${line}", fingerprint: true
error_logs = error_logs + "${CUSTOM_WORKSPACE}/${line} "
error_logs_message = error_logs_message + "${CUSTOM_WORKSPACE}/${line}\n"
} catch (Exception error_arch) {
echo "Failed to archive error log ${line}: ${error_arch.getMessage()}"
}
}
}
try {
gist_url = sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --gist PR_${env.CHANGE_ID}", returnStdout: true).trim()
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} with error logs:\n\\`\\`\\`\n${error_logs_message}\\`\\`\\`\n\nFollow link here to view the contents of the above file(s): [(link)](${gist_url})" """)
sh(script: "${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo PR_${env.CHANGE_ID}")
} catch (Exception error_comment) {
echo "Failed to comment on PR: ${error_comment.getMessage()}"
}
} else {
echo "No error logs found for failed cases in ${CUSTOM_WORKSPACE}/RUNTESTS/${pslot}_error.logs"
}
STATUS = 'Failed'
try {
sh(script: """${GH} pr edit ${env.CHANGE_ID} --repo ${repo_url} --remove-label "CI-${Machine}-Running" --add-label "CI-${Machine}-${STATUS}" """, returnStatus: true)
sh(script: """${GH} pr comment ${env.CHANGE_ID} --repo ${repo_url} --body "Experiment ${caseName} **FAILED** on ${Machine} in Build# ${env.BUILD_NUMBER} in\n\\`${CUSTOM_WORKSPACE}/RUNTESTS/EXPDIR/${pslot}\\`" """)
} catch (Exception e) {
echo "Failed to update label from Running to ${STATUS}: ${e.getMessage()}"
}
error("Failed to run experiments ${caseName} on ${Machine}")
}
}
}
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
script {
def pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${RUNTESTS} ${caseName}", returnStdout: true).trim()
echo "pslot: ${pslot}"
echo "CUSTOM_WORKSPACE: ${CUSTOM_WORKSPACE}"
echo "RUNTESTS: ${RUNTESTS}"
echo "HOMEgfs: ${HOMEgfs}"
def error_file = "${RUNTESTS}/${pslot}_error.logs"
sh(script: "rm -f ${error_file}")
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${CUSTOM_WORKSPACE} ${pslot} 'global-workflow'")
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh cleanup_experiment ${RUNTESTS}/EXPDIR/${pslot}")
}
}
}
}]
}
parallel parallelStages + [failFast: true]
parallel parallelStages + [failFast: true]
}
}
}
Expand Down
11 changes: 6 additions & 5 deletions ci/scripts/utils/launch_java_agent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@ controller_url="https://jenkins.epic.oarcloud.noaa.gov"
controller_user=${controller_user:-"terry.mcguinness"}
controller_user_auth_token="jenkins_token"

HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )"
declare -r HOMEGFS_
HOMEGFS_="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )"
host=$(hostname)

#########################################################################
# Set up runtime environment varibles for accounts on supproted machines
#########################################################################

source "${HOMEgfs}/ush/detect_machine.sh"
source "${HOMEGFS_}/ush/detect_machine.sh"
case ${MACHINE_ID} in
hera | orion | hercules | wcoss2 | gaea)
echo "Launch Jenkins Java Controler on ${MACHINE_ID}";;
Expand All @@ -84,10 +85,10 @@ esac
LOG=lanuched_agent-$(date +%Y%m%d%M).log
rm -f "${LOG}"

source "${HOMEgfs}/ush/module-setup.sh"
module use "${HOMEgfs}/modulefiles"
source "${HOMEGFS_}/ush/module-setup.sh"
module use "${HOMEGFS_}/modulefiles"
module load "module_gwsetup.${MACHINE_ID}"
source "${HOMEgfs}/ci/platforms/config.${MACHINE_ID}"
source "${HOMEGFS_}/ci/platforms/config.${MACHINE_ID}"

JAVA_HOME="${JENKINS_AGENT_LANUCH_DIR}/JAVA/jdk-17.0.10"
if [[ ! -d "${JAVA_HOME}" ]]; then
Expand Down
Loading