Skip to content

Commit

Permalink
Executable Monitor Change (#62)
Browse files Browse the repository at this point in the history
* Use two files descriptors to capture subprocess output. Use golang for the echo server
* Added retry logic to the executable monitor, swapping back to python echo server for test
* Adding success line check changes. Changing the while loop, printing exit status if we look for it
* Some more info about why this is the way it is, and making it so we only print the rest of the log if the run doesn't find an exit condition originally.
---------
  • Loading branch information
Skptak authored Jul 11, 2023
1 parent 1f59a76 commit 31f9331
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 69 deletions.
9 changes: 7 additions & 2 deletions executable-monitor/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ inputs:
timeout-seconds:
description: 'Maximum amount of time to run the executable. Default is 600.'
required: false
default: 600
default: 300
retry-attempts:
description: 'Number of times to re-launch the binary to check for success.'
required: false
default: 0

runs:
using: "composite"
Expand All @@ -23,5 +27,6 @@ runs:
run: pip install -r $GITHUB_ACTION_PATH/requirements.txt
shell: bash
- name: Run executable with monitoring script
run: python3 $GITHUB_ACTION_PATH/executable-monitor.py --exe-path=${{ inputs.exe-path }} --timeout-seconds=${{ inputs.timeout-seconds }} --success-line="${{ inputs.success-line }}" --log-dir=${{ inputs.log-dir }}
run: |
python3 $GITHUB_ACTION_PATH/executable-monitor.py --exe-path=${{ inputs.exe-path }} --timeout-seconds=${{ inputs.timeout-seconds }} --success-line="${{ inputs.success-line }}" --log-dir=${{ inputs.log-dir }} --retry-attempts=${{ inputs.retry-attempts }}
shell: bash
179 changes: 113 additions & 66 deletions executable-monitor/executable-monitor.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@
type=int,
required=False,
help='Exit status that indicates that the executable completed successfully. Required if --success-line is not used.')
parser.add_argument('--retry-attempts',
type=int,
required=False,
help='Number of times to attempt re-running the executable if the correct exit condition is not found.')

args = parser.parse_args()

Expand All @@ -56,6 +60,11 @@
if not os.path.exists(args.log_dir):
os.makedirs(args.log_dir, exist_ok = True)

if not args.retry_attempts:
retryAttempts = 0
else:
retryAttempts = args.retry_attempts

# Convert any relative path (like './') in passed argument to absolute path.
exe_abs_path = os.path.abspath(args.exe_path)
log_dir = os.path.abspath(args.log_dir)
Expand All @@ -72,76 +81,114 @@
logging.info(f"Running executable: {exe_abs_path} ")
logging.info(f"Storing logs in: {log_dir}")
logging.info(f"Timeout (seconds): {args.timeout_seconds}")
logging.info(f"Searching for success line: {args.success_line}")
logging.info(f"Will re-try the run {retryAttempts} times")
if args.success_exit_status is not None:
logging.info("Looking for exit status {0}".format(args.success_exit_status ))
for attempts in range(0,retryAttempts + 1):

# Initialize values
success_line = ""
timeout_occurred = False
exe_exit_status = None
exe_exitted = False
success_line_found = False
exit_condition_met = False
wait_for_exit = args.success_exit_status is not None

# Create two file descriptors. The subprocess writes to one, the parent task reads from the other
# This is a workaround to avoid the fact that calling readline() on the stdout of the subprocess is
# a blocking call. Where if the subprocess is running but hasn't printed anything, readline will never time out.
# The approach uses the underlying file system to not block on data that hasn't been written.
WriteOutputFile = open("output.log", "w")
ReadOutputFile = open("output.log", "r")

# Launch the executable
exe = subprocess.Popen([exe_abs_path], stdout=WriteOutputFile, stderr=WriteOutputFile, universal_newlines=True, env=os.environ)

# Initialize values
timeout_occurred = False
exe_exit_status = None
exe_exitted = False
success_line_found = False
wait_for_exit = args.success_exit_status is not None

# Launch the executable
exe = subprocess.Popen([exe_abs_path], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)

cur_time_seconds = time.time()
timeout_time_seconds = cur_time_seconds + args.timeout_seconds

logging.info("START OF DEVICE OUTPUT\n")

while not (timeout_occurred or exe_exitted or (not wait_for_exit and success_line_found)):

# Check if executable exitted
exe_exit_status = exe.poll()
if exe_exit_status is not None:
exe_exitted = True

# Read executable's stdout and write to stdout and logfile
exe_stdout_line = exe.stdout.readline()
logging.info(exe_stdout_line)

# Check if the executable printed out it's success line
if args.success_line is not None and args.success_line in exe_stdout_line:
success_line_found = True

# Check for timeout
cur_time_seconds = time.time()
if cur_time_seconds >= timeout_time_seconds:
timeout_occurred = True

# Sleep for a short duration between loops to not steal all system resources
time.sleep(.1)

if not exe_exitted:
exe.kill()

# Capture remaining output and check for the successful line
for exe_stdout_line in exe.stdout.readlines():
logging.info(exe_stdout_line)
if args.success_line is not None and args.success_line in exe_stdout_line:
success_line_found = True

logging.info("END OF DEVICE OUTPUT\n")

logging.info("EXECUTABLE RUN SUMMARY:\n")
timeout_time_seconds = cur_time_seconds + args.timeout_seconds

logging.info("START OF DEVICE OUTPUT\n")

# While a timeout hasn't happened, the executable is running, and an exit condition has not been met
while ( not exit_condition_met ):
# Sleep for a short duration between loops to not steal all system resources
time.sleep(.05)

# Check if executable exitted
exe_exit_status = exe.poll()
if exe_exit_status is not None:
logging.info(f"EXECUTABLE CLOSED WITH STATUS: {exe_exit_status}")
exe_exitted = True
exit_condition_met = True

# Read executable's stdout and write to stdout and logfile
# A potential improvement here would be to do readlines() on the file, then truncate()
# This might be cleaner than this approach of reading a single line each loop.
exe_stdout_line = ReadOutputFile.readline()
if(exe_stdout_line is not None) and (len(exe_stdout_line.strip()) > 1):
# Check if the executable printed out its success line
if ( args.success_line is not None ) and ( args.success_line in exe_stdout_line ) :
logging.info(f"SUCCESS_LINE_FOUND: {exe_stdout_line}")
success_line_found = True
success_line = exe_stdout_line
if( not wait_for_exit ):
exit_condition_met = True
else:
logging.info(exe_stdout_line)

# Check for timeout
cur_time_seconds = time.time()
if cur_time_seconds >= timeout_time_seconds:
logging.info(f"TIMEOUT OF {args.timeout_seconds} SECONDS HIT")
timeout_occurred = True
exit_condition_met = True

if not exe_exitted:
logging.info(f"EXECUTABLE DID NOT EXIT, MANUALLY KILLING NOW")
exe.kill()

if not exit_condition_met:
logging.info(f"PARSING REST OF LOG")
# Capture remaining output and check for the successful line
for exe_stdout_line in ReadOutputFile.readlines():
logging.info(exe_stdout_line)
if args.success_line is not None and args.success_line in exe_stdout_line:
success_line_found = True
success_line = exe_stdout_line
logging.info(f"SUCCESS_LINE_FOUND: {exe_stdout_line}")

# Close the files
WriteOutputFile.close()
ReadOutputFile.close()

logging.info("END OF DEVICE OUTPUT")

logging.info("EXECUTABLE RUN SUMMARY:\n")

exit_status = 0

if args.success_line is not None:
if not success_line_found:
logging.error("Success Line: Success line not output.\n")
exit_status = 1

exit_status = 0
if args.success_exit_status is not None:
if exe_exitted:
if exe_exit_status != args.success_exit_status:
exit_status = 1
logging.info(f"Exit Status: {exe_exit_status}")
else:
logging.error("Exit Status: Executable did not exit by itself.\n")
exit_status = 1

if args.success_line is not None:
if success_line_found:
logging.info("Success Line: Found.\n")
else:
logging.error("Success Line: Success line not output.\n")
exit_status = 1
if( exit_status == 0 ):
logging.info(f"Run found a valid success metric\n")
sys.exit(exit_status)

if args.success_exit_status is not None:
if exe_exitted:
if exe_exit_status != args.success_exit_status:
exit_status = 1
logging.info(f"Exit Status: {exe_exit_status}")
else:
logging.error("Exit Status: Executable did not exit.\n")
exe_status = 1

elif( attempts < retryAttempts ):
logging.info(f"Did not succeed, trying re-attempt {attempts+1} of {retryAttempts}\n")

# Report if executable executed successfully to workflow
# Report final exit status if no successful run occured
sys.exit(exit_status)
3 changes: 2 additions & 1 deletion localhost-echo-server/local_echo_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ async def echo_handler(reader, writer):
args = parser.parse_args()
logging.basicConfig()
logger.setLevel(logging.DEBUG)
loop = asyncio.get_event_loop()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
factory = asyncio.start_server(
echo_handler,
os.environ.get('HOST'),
Expand Down

0 comments on commit 31f9331

Please sign in to comment.