Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AutoRT Logging fixes and cleanup if SUCCESSFUL #452

Closed
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
9059ddb
Only commit logs that have the word 'SUCCESSFUL' in them, otherwise p…
BrianCurtis-NOAA Mar 4, 2021
0ea3535
PASSED: hera.gnu. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 5, 2021
0ae4fb6
Updated logging methods
BrianCurtis-NOAA Mar 5, 2021
41de594
Merge branch 'feature/autort-logfixes' of https://github.com/BrianCur…
BrianCurtis-NOAA Mar 5, 2021
009c946
PASSED: jet.intel. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 5, 2021
2c0e9ee
PASSED: gaea.intel. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 5, 2021
63711a3
PASSED: hera.gnu. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 5, 2021
6a7b90c
PASSED: hera.gnu. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 5, 2021
7fef157
Fixed incorrect method causing constant use of intel compiler
BrianCurtis-NOAA Mar 5, 2021
25f0294
PASSED: gaea.intel. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 6, 2021
478fdb4
PASSED: jet.intel. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 6, 2021
f646d12
PASSED: hera.gnu. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 6, 2021
b28523b
PASSED: hera.intel. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 6, 2021
29c1453
PASSED: hera.gnu. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 6, 2021
a3e9c95
PASSED: hera.gnu. Log file uploaded. skip-ci
BrianCurtis-NOAA Mar 6, 2021
78db904
Remove pr dir and run dir if RT successful
BrianCurtis-NOAA Mar 7, 2021
f31b2cc
Merge branch 'feature/autort-logfixes' of https://github.com/BrianCur…
BrianCurtis-NOAA Mar 7, 2021
53234f4
merge with upstream/develop
BrianCurtis-NOAA Mar 7, 2021
7835c01
Revert to using ufs-community
BrianCurtis-NOAA Mar 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 99 additions & 99 deletions tests/auto/rt_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
This script should be started through rt_auto.sh so that env vars are set up
prior to start.
"""
from __future__ import print_function
from github import Github as gh
import argparse
import datetime
Expand Down Expand Up @@ -65,27 +64,40 @@ def input_data(args):
}]
action_list_dict = [{
'name': 'RT',
'command': 'cd tests && /bin/bash --login ./rt.sh -e',
'callback_fnc': 'move_rt_logs'
'callback_fnc': 'rt_callback'
},
{
'name': 'BL',
'callback_fnc': 'bl_callback'
}]

return machine_dict, repo_list_dict, action_list_dict

def match_label_with_action(machine, actions, label):
def set_action_from_label(machine, actions, label):
''' Match the label that initiates a job with an action in the dict'''
# <machine>-<compiler>-<test> i.e. hera-gnu-RT
# RT = full regression test suite
logger = logging.getLogger('MATCH_LABEL_WITH_ACTIONS')
split_label = label.name.split('-')
if len(split_label) != 3: return False, False #Make sure it has three parts
if not re.match(split_label[0], machine['name']): return False, False #First check machine name matches
compiler = split_label[1]
if not str(compiler) in ["intel", "gnu"]: return False, False
action_match = next((action for action in actions if re.match(action['name'], split_label[2])), False)
action_match["command"] = f'export RT_COMPILER="{compiler}" && {action_match["command"]}'
if split_label[2] == "RT" and compiler == "gnu":
action_match["command"] = f'{action_match["command"]} -l rt_gnu.conf'
return compiler, action_match
label_machine = split_label[0]
label_compiler = split_label[1]
label_action = split_label[2]
if not re.match(label_machine, machine['name']): return False, False #check machine name matches
if not str(label_compiler) in ["intel", "gnu"]: return False, False #Compiler must be intel or gnu
action_match = next((action for action in actions if re.match(action['name'], label_action)), False)
if label_action == 'RT': # SET ACTIONS BASED ON RT COMMAND
if label_compiler == "intel":
action_match["command"] = f'export RT_COMPILER="intel" && cd tests && /bin/bash --login ./rt.sh -e'
elif label_compiler == "gnu":
action_match["command"] = f'export RT_COMPILER="gnu" && cd tests && /bin/bash --login ./rt.sh -e -l rt_gnu.conf'
elif label_action == 'BL': # SET ACTIONS BASED ON BL COMMAND
if label_compiler == "intel":
action_match["command"] = f'export RT_COMPILER="intel" && cd tests && /bin/bash --login ./rt.sh -e -c'
elif label_compiler == "gnu":
action_match["command"] = f'export RT_COMPILER="gnu" && cd tests && /bin/bash --login ./rt.sh -e -c -l rt_gnu.conf'

return label_compiler, action_match


def get_preqs_with_actions(repos, machine, ghinterface_obj, actions):
Expand All @@ -95,17 +107,15 @@ def get_preqs_with_actions(repos, machine, ghinterface_obj, actions):
each_pr = [preq for gh_preq in gh_preqs for preq in gh_preq]
preq_labels = [{'preq': pr, 'label': label} for pr in each_pr for label in pr.get_labels()]

for i, pr_label in enumerate(preq_labels):
compiler, match = match_label_with_action(machine, actions, pr_label['label'])
return_preq = []
for pr_label in preq_labels:
compiler, match = set_action_from_label(machine, actions, pr_label['label'])
if match:
preq_labels[i]['action'] = match
preq_labels[i]['compiler'] = compiler
else:
preq_labels[i] = False

preq_dict = [x for x in preq_labels if x]
pr_label['action'] = match.copy()
pr_label['compiler'] = compiler
return_preq.append(pr_label.copy())

return preq_dict
return return_preq

class Job:
'''
Expand Down Expand Up @@ -144,31 +154,6 @@ def check_label_before_job_start(self):

return label_match


def send_log_name_as_comment(self, log_filename):
logger = logging.getLogger('JOB/SEND_LOG_NAME_AS_COMMENT')

#Remove LAST MONTHS LOGS
logger.info('Removing last months logs (if any)')
last_month = datetime.date.today().replace(day=1) - datetime.timedelta(days=1)
rm_command = [[f'rm rt_auto_*_{last_month.strftime("%Y%m")}*.log', os.getcwd()]]
logger.info(f'Running "{rm_command}"')
try:
self.run_commands(rm_command)
except Exception as e:
logger.warning(f'"{rm_command}" failed with error:{e}')

# Add log information to PR.
comment_text = f'Log Name:{log_filename}\n'\
f'Log Location:{os.getcwd()}\n'\
'Logs are kept for one month'
try:
self.preq_dict['preq'].create_issue_comment(comment_text)
except Exception as e:
logger.warning('Creating comment with log location failed with:{e}')
else:
logger.info(f'{comment_text}')

def run_commands(self, commands_with_cwd):
logger = logging.getLogger('JOB/RUN_COMMANDS')
for command, in_cwd in commands_with_cwd:
Expand All @@ -179,19 +164,18 @@ def run_commands(self, commands_with_cwd):
out = [] if not out else out.decode('utf8').split('\n')
err = [] if not err else err.decode('utf8').split('\n')
except Exception as e:
logger.critical(e)
[logger.critical(f'stdout: {item}') for item in out if not None]
[logger.critical(f'stderr: {eitem}') for eitem in err if not None]
assert(e)
self.job_failed(logger, f'Command {command}', exception=e, out=out, err=err)
else:
logger.info(f'Finished running: {command}')
[logger.debug(f'stdout: {item}') for item in out if not None]
[logger.debug(f'stderr: {eitem}') for eitem in err if not None]

def remove_pr_dir(self):
logger = logging.getLogger('JOB/REMOVE_PR_DIR')
def remove_pr_data(self):
logger = logging.getLogger('JOB/REMOVE_PR_DATA')
pr_dir_str = f'{self.machine["workdir"]}/{str(self.preq_dict["preq"].id)}'
rm_command = [[f'rm -rf {pr_dir_str}', self.pr_repo_loc]]
rm_command = [
[f'rm -rf {self.rt_dir}', self.pr_repo_loc],
[f'rm -rf {pr_dir_str}', self.pr_repo_loc]
]
logger.info(f'Running "{rm_command}"')
self.run_commands(rm_command)

Expand All @@ -205,7 +189,7 @@ def clone_pr_repo(self):
logger.info(f'GIT URL: {git_url}')
logger.info('Starting repo clone')
repo_dir_str = f'{self.machine["workdir"]}/{str(self.preq_dict["preq"].id)}/{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}'

self.pr_repo_loc = repo_dir_str+"/"+repo_name
create_repo_commands = [
[f'mkdir -p "{repo_dir_str}"', self.machine['workdir']],
[f'git clone -b {self.branch} {git_url}', repo_dir_str],
Expand All @@ -215,69 +199,101 @@ def clone_pr_repo(self):
self.run_commands(create_repo_commands)

logger.info('Finished repo clone')
self.pr_repo_loc = repo_dir_str+"/"+repo_name
return self.pr_repo_loc

def run_function(self):
def execute_command(self):
''' Run the command associted with the label used to initiate this job '''
logger = logging.getLogger('JOB/RUN_FUNCTION')
logger = logging.getLogger('JOB/EXECUTE_COMMAND')
compiler = self.preq_dict['compiler']
logger.info(f'Compiler being used for command is {compiler}')
command = self.preq_dict["action"]["command"]

try:
logger.info(f'Running: "{command}" in "{self.pr_repo_loc}"')
output = subprocess.Popen(command, cwd=self.pr_repo_loc, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
out,err = output.communicate()
except Exception as e:
out = [] if not out else out.decode('utf8').split('\n')
err = [] if not err else err.decode('utf8').split('\n')
except Exception as e:
logger.critical(e)
[logger.critical(f'stdout: {item}') for item in out if not None]
[logger.critical(f'stderr: {eitem}') for eitem in err if not None]
assert(e)
self.job_failed(logger, f'Command {command}', exception=e, out=out, err=err)
else:
if output.returncode != 0:
comment_text = f'rt.sh failed \n'\
f'machine: {self.machine["name"]} \n'\
f'compiler: {self.preq_dict["compiler"]}\n'\
f'STDOUT: {out} \n'\
f'STDERR: {err}'
self.preq_dict['preq'].create_issue_comment(comment_text)
logger.critical(f'{command} Failed')
[logger.critical(f'stdout: {item}') for item in out if not None]
[logger.critical(f'stderr: {eitem}') for eitem in err if not None]
self.job_failed(logger, "Script rt.sh", exception=SystemExit, STDOUT=False)
else:
try:
logger.info(f'Attempting to run callback: {self.preq_dict["action"]["callback_fnc"]}')
getattr(self, self.preq_dict['action']['callback_fnc'])()
except Exception as e:
logger.critical(f'Callback function {self.preq_dict["action"]["callback_fnc"]} failed with "{e}"')
goodexit = False
assert(e)
self.job_failed(logger, f'Callback function {self.preq_dict["action"]["callback_fnc"]}', exception=e, STDOUT=False)
else:
logger.info(f'Finished callback {self.preq_dict["action"]["callback_fnc"]}')
[logger.debug(f'stdout: {item}') for item in out if not None]
[logger.debug(f'stderr: {eitem}') for eitem in err if not None]

def run(self):
logger = logging.getLogger('JOB/RUN')
logger.info(f'Starting Job: {self.preq_dict["label"]}')
if self.check_label_before_job_start():
try:
logger.info('Calling remove_pr_label')
self.remove_pr_label()
logger.info('Calling clone_pr_repo')
self.clone_pr_repo()
logger.info('Calling execute_command')
self.execute_command()
except Exception as e:
self.job_failed(logger, f'run()', exception=e, STDOUT=False)
else:
logger.info(f'Cannot find label {self.preq_dict["label"]}')

def job_failed(self, logger, job_name, exception=Exception, STDOUT=True, out=None, err=None):
comment_text = f'{job_name} FAILED \n'\
f'Repo location: {self.pr_repo_loc} \n'\
f'Machine: {self.machine["name"]} \n'\
f'Compiler: {self.preq_dict["compiler"]} \n'
if STDOUT:
comment_text=comment_text+'\n'\
f'STDOUT: {[item for item in out if not None]} \n'\
f'STDERR: {[eitem for eitem in err if not None]} \n'
comment_text = comment_text+'Please make changes and add the following label back: '\
f'{self.machine["name"]}-{self.preq_dict["compiler"]}-{self.preq_dict["action"]["name"]}'
logger.critical(comment_text)
self.preq_dict['preq'].create_issue_comment(comment_text)
raise exception

def process_logfile(self, logfile):
self.rt_dir = []
if os.path.exists(logfile):
with open(logfile) as f:
for line in f:
if 'working dir' in line and not self.rt_dir:
self.rt_dir = os.path.split(line.split()[-1])[0]
elif 'SUCCESSFUL' in line:
return True
self.job_failed(logger, "Regression Tests", STDOUT=False)
else:
logger.critical(f'Could not find {self.machine["name"]}.{self.preq_dict["compiler"]} RT log')
raise FileNotFoundError(f'Could not find {self.machine["name"]}.{self.preq_dict["compiler"]} RT log')

# Add Callback Functions After Here
def move_rt_logs(self):
def rt_callback(self):
''' This is the callback function associated with the "RT" command '''
logger = logging.getLogger('JOB/MOVE_RT_LOGS')
rt_log = f'tests/RegressionTests_{self.machine["name"]}.{self.preq_dict["compiler"]}.log'
filepath = f'{self.pr_repo_loc}/{rt_log}'
if os.path.exists(filepath):
logfile_pass = self.process_logfile(filepath)
if logfile_pass:
move_rt_commands = [
[f'git pull --ff-only origin {self.branch}', self.pr_repo_loc],
[f'git add {rt_log}', self.pr_repo_loc],
[f'git commit -m "Auto: Add RT Log file: {rt_log} skip-ci"', self.pr_repo_loc],
[f'git commit -m "PASSED: {self.machine["name"]}.{self.preq_dict["compiler"]}. Log file uploaded. skip-ci"', self.pr_repo_loc],
['sleep 10', self.pr_repo_loc],
[f'git push origin {self.branch}', self.pr_repo_loc]
]
self.run_commands(move_rt_commands)
self.remove_pr_data()

else:
logger.critical('Could not find Intel RT log')
raise FileNotFoundError('Could not find Intel RT log')
def bl_callback(self):
pass

def main():

Expand Down Expand Up @@ -308,23 +324,7 @@ def main():
# add Job objects and run them
logger.info('Adding all jobs to an object list and running them.')
jobs = [Job(pullreq, ghinterface_obj, machine) for pullreq in preq_dict]
for job in jobs:
logger.info(f'Starting Job: {job}')
if job.check_label_before_job_start():
try:
logger.info('Calling remove_pr_label')
job.remove_pr_label()
logger.info('Calling clone_pr_repo')
job.clone_pr_repo()
logger.info('Calling run_function')
job.run_function()
logger.info('Calling remove_pr_dir')
# job.remove_pr_dir()
# logger.info('Calling send_log_name_as_comment')
job.send_log_name_as_comment(log_filename)
except Exception as e:
logger.critical(e)
assert(e)
[job.run() for job in jobs]

logger.info('Script Finished')

Expand Down