-
Notifications
You must be signed in to change notification settings - Fork 29
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Executable Monitor Change #62
Changes from all commits
59a0095
fd1973c
4c3a090
233ee8f
4dc2b64
bb786e2
66f25cf
a04dd54
0af25d8
a92c35f
624268a
2019c40
40f065b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,10 @@ | |
type=int, | ||
required=False, | ||
help='Exit status that indicates that the executable completed successfully. Required if --success-line is not used.') | ||
parser.add_argument('--retry-attempts', | ||
type=int, | ||
required=False, | ||
help='Number of times to attempt re-running the executable if the correct exit condition is not found.') | ||
|
||
args = parser.parse_args() | ||
|
||
|
@@ -56,6 +60,11 @@ | |
if not os.path.exists(args.log_dir): | ||
os.makedirs(args.log_dir, exist_ok = True) | ||
|
||
if not args.retry_attempts: | ||
retryAttempts = 0 | ||
else: | ||
retryAttempts = args.retry_attempts | ||
|
||
# Convert any relative path (like './') in passed argument to absolute path. | ||
exe_abs_path = os.path.abspath(args.exe_path) | ||
log_dir = os.path.abspath(args.log_dir) | ||
|
@@ -72,76 +81,114 @@ | |
logging.info(f"Running executable: {exe_abs_path} ") | ||
logging.info(f"Storing logs in: {log_dir}") | ||
logging.info(f"Timeout (seconds): {args.timeout_seconds}") | ||
logging.info(f"Searching for success line: {args.success_line}") | ||
logging.info(f"Will re-try the run {retryAttempts} times") | ||
if args.success_exit_status is not None: | ||
logging.info("Looking for exit status {0}".format(args.success_exit_status )) | ||
for attempts in range(0,retryAttempts + 1): | ||
|
||
# Initialize values | ||
success_line = "" | ||
timeout_occurred = False | ||
exe_exit_status = None | ||
exe_exitted = False | ||
success_line_found = False | ||
exit_condition_met = False | ||
wait_for_exit = args.success_exit_status is not None | ||
|
||
# Create two file descriptors. The subprocess writes to one, the parent task reads from the other | ||
# This is a workaround to avoid the fact that calling readline() on the stdout of the subprocess is | ||
# a blocking call. Where if the subprocess is running but hasn't printed anything, readline will never time out. | ||
# The approach uses the underlying file system to not block on data that hasn't been written. | ||
WriteOutputFile = open("output.log", "w") | ||
ReadOutputFile = open("output.log", "r") | ||
|
||
# Launch the executable | ||
exe = subprocess.Popen([exe_abs_path], stdout=WriteOutputFile, stderr=WriteOutputFile, universal_newlines=True, env=os.environ) | ||
|
||
# Initialize values | ||
timeout_occurred = False | ||
exe_exit_status = None | ||
exe_exitted = False | ||
success_line_found = False | ||
wait_for_exit = args.success_exit_status is not None | ||
|
||
# Launch the executable | ||
exe = subprocess.Popen([exe_abs_path], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) | ||
|
||
cur_time_seconds = time.time() | ||
timeout_time_seconds = cur_time_seconds + args.timeout_seconds | ||
|
||
logging.info("START OF DEVICE OUTPUT\n") | ||
|
||
while not (timeout_occurred or exe_exitted or (not wait_for_exit and success_line_found)): | ||
|
||
# Check if executable exitted | ||
exe_exit_status = exe.poll() | ||
if exe_exit_status is not None: | ||
exe_exitted = True | ||
|
||
# Read executable's stdout and write to stdout and logfile | ||
exe_stdout_line = exe.stdout.readline() | ||
logging.info(exe_stdout_line) | ||
|
||
# Check if the executable printed out it's success line | ||
if args.success_line is not None and args.success_line in exe_stdout_line: | ||
success_line_found = True | ||
|
||
# Check for timeout | ||
cur_time_seconds = time.time() | ||
if cur_time_seconds >= timeout_time_seconds: | ||
timeout_occurred = True | ||
|
||
# Sleep for a short duration between loops to not steal all system resources | ||
time.sleep(.1) | ||
|
||
if not exe_exitted: | ||
exe.kill() | ||
|
||
# Capture remaining output and check for the successful line | ||
for exe_stdout_line in exe.stdout.readlines(): | ||
logging.info(exe_stdout_line) | ||
if args.success_line is not None and args.success_line in exe_stdout_line: | ||
success_line_found = True | ||
|
||
logging.info("END OF DEVICE OUTPUT\n") | ||
|
||
logging.info("EXECUTABLE RUN SUMMARY:\n") | ||
timeout_time_seconds = cur_time_seconds + args.timeout_seconds | ||
|
||
logging.info("START OF DEVICE OUTPUT\n") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Spurious newline. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
|
||
# While a timeout hasn't happened, the executable is running, and an exit condition has not been met | ||
while ( not exit_condition_met ): | ||
# Sleep for a short duration between loops to not steal all system resources | ||
time.sleep(.05) | ||
|
||
# Check if executable exitted | ||
exe_exit_status = exe.poll() | ||
if exe_exit_status is not None: | ||
logging.info(f"EXECUTABLE CLOSED WITH STATUS: {exe_exit_status}") | ||
exe_exitted = True | ||
exit_condition_met = True | ||
|
||
# Read executable's stdout and write to stdout and logfile | ||
# A potential improvement here would be to do readlines() on the file, then truncate() | ||
# This might be cleaner than this approach of reading a single line each loop. | ||
exe_stdout_line = ReadOutputFile.readline() | ||
if(exe_stdout_line is not None) and (len(exe_stdout_line.strip()) > 1): | ||
# Check if the executable printed out its success line | ||
if ( args.success_line is not None ) and ( args.success_line in exe_stdout_line ) : | ||
logging.info(f"SUCCESS_LINE_FOUND: {exe_stdout_line}") | ||
success_line_found = True | ||
success_line = exe_stdout_line | ||
if( not wait_for_exit ): | ||
exit_condition_met = True | ||
else: | ||
logging.info(exe_stdout_line) | ||
|
||
# Check for timeout | ||
cur_time_seconds = time.time() | ||
if cur_time_seconds >= timeout_time_seconds: | ||
logging.info(f"TIMEOUT OF {args.timeout_seconds} SECONDS HIT") | ||
timeout_occurred = True | ||
exit_condition_met = True | ||
|
||
if not exe_exitted: | ||
logging.info(f"EXECUTABLE DID NOT EXIT, MANUALLY KILLING NOW") | ||
exe.kill() | ||
|
||
if not exit_condition_met: | ||
logging.info(f"PARSING REST OF LOG") | ||
# Capture remaining output and check for the successful line | ||
for exe_stdout_line in ReadOutputFile.readlines(): | ||
logging.info(exe_stdout_line) | ||
if args.success_line is not None and args.success_line in exe_stdout_line: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should success_line be a regex rather than a particular line? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a bad idea, but think for now want to stick with this instead of the regex. Can add that in down the line if we think it would be useful. |
||
success_line_found = True | ||
success_line = exe_stdout_line | ||
logging.info(f"SUCCESS_LINE_FOUND: {exe_stdout_line}") | ||
|
||
# Close the files | ||
WriteOutputFile.close() | ||
ReadOutputFile.close() | ||
|
||
logging.info("END OF DEVICE OUTPUT") | ||
|
||
logging.info("EXECUTABLE RUN SUMMARY:\n") | ||
|
||
exit_status = 0 | ||
|
||
if args.success_line is not None: | ||
if not success_line_found: | ||
logging.error("Success Line: Success line not output.\n") | ||
exit_status = 1 | ||
|
||
exit_status = 0 | ||
if args.success_exit_status is not None: | ||
if exe_exitted: | ||
if exe_exit_status != args.success_exit_status: | ||
exit_status = 1 | ||
logging.info(f"Exit Status: {exe_exit_status}") | ||
else: | ||
logging.error("Exit Status: Executable did not exit by itself.\n") | ||
exit_status = 1 | ||
|
||
if args.success_line is not None: | ||
if success_line_found: | ||
logging.info("Success Line: Found.\n") | ||
else: | ||
logging.error("Success Line: Success line not output.\n") | ||
exit_status = 1 | ||
if( exit_status == 0 ): | ||
logging.info(f"Run found a valid success metric\n") | ||
sys.exit(exit_status) | ||
|
||
if args.success_exit_status is not None: | ||
if exe_exitted: | ||
if exe_exit_status != args.success_exit_status: | ||
exit_status = 1 | ||
logging.info(f"Exit Status: {exe_exit_status}") | ||
else: | ||
logging.error("Exit Status: Executable did not exit.\n") | ||
exe_status = 1 | ||
|
||
elif( attempts < retryAttempts ): | ||
logging.info(f"Did not succeed, trying re-attempt {attempts+1} of {retryAttempts}\n") | ||
|
||
# Report if executable executed successfully to workflow | ||
# Report final exit status if no successful run occured | ||
sys.exit(exit_status) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not a fan of having retry attempts built in here. We should require that checks running in CI run reliably every time.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's actually entirely there for if doing an external network test where a failure due to not being able to connect is something that could happen. Where thought that having the ability to retry for something like that might be valuable in the future.