Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Executable Monitor Change #62

Merged
merged 13 commits into from
Jul 11, 2023
9 changes: 7 additions & 2 deletions executable-monitor/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ inputs:
timeout-seconds:
description: 'Maximum amount of time to run the executable. Default is 600.'
required: false
default: 600
default: 300
retry-attempts:
description: 'Number of times to re-launch the binary to check for success.'
required: false
default: 0

runs:
using: "composite"
Expand All @@ -23,5 +27,6 @@ runs:
run: pip install -r $GITHUB_ACTION_PATH/requirements.txt
shell: bash
- name: Run executable with monitoring script
run: python3 $GITHUB_ACTION_PATH/executable-monitor.py --exe-path=${{ inputs.exe-path }} --timeout-seconds=${{ inputs.timeout-seconds }} --success-line="${{ inputs.success-line }}" --log-dir=${{ inputs.log-dir }}
run: |
python3 $GITHUB_ACTION_PATH/executable-monitor.py --exe-path=${{ inputs.exe-path }} --timeout-seconds=${{ inputs.timeout-seconds }} --success-line="${{ inputs.success-line }}" --log-dir=${{ inputs.log-dir }} --retry-attempts=${{ inputs.retry-attempts }}
shell: bash
179 changes: 113 additions & 66 deletions executable-monitor/executable-monitor.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@
type=int,
required=False,
help='Exit status that indicates that the executable completed successfully. Required if --success-line is not used.')
parser.add_argument('--retry-attempts',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a fan of having retry attempts built in here. We should require that checks running in CI run reliably every time.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's actually entirely there for if doing an external network test where a failure due to not being able to connect is something that could happen. Where thought that having the ability to retry for something like that might be valuable in the future.

type=int,
required=False,
help='Number of times to attempt re-running the executable if the correct exit condition is not found.')

args = parser.parse_args()

Expand All @@ -56,6 +60,11 @@
if not os.path.exists(args.log_dir):
os.makedirs(args.log_dir, exist_ok = True)

if not args.retry_attempts:
retryAttempts = 0
else:
retryAttempts = args.retry_attempts

# Convert any relative path (like './') in passed argument to absolute path.
exe_abs_path = os.path.abspath(args.exe_path)
log_dir = os.path.abspath(args.log_dir)
Expand All @@ -72,76 +81,114 @@
logging.info(f"Running executable: {exe_abs_path} ")
logging.info(f"Storing logs in: {log_dir}")
logging.info(f"Timeout (seconds): {args.timeout_seconds}")
logging.info(f"Searching for success line: {args.success_line}")
logging.info(f"Will re-try the run {retryAttempts} times")
if args.success_exit_status is not None:
logging.info("Looking for exit status {0}".format(args.success_exit_status ))
for attempts in range(0,retryAttempts + 1):

# Initialize values
success_line = ""
timeout_occurred = False
exe_exit_status = None
exe_exitted = False
success_line_found = False
exit_condition_met = False
wait_for_exit = args.success_exit_status is not None

# Create two file descriptors. The subprocess writes to one, the parent task reads from the other
# This is a workaround to avoid the fact that calling readline() on the stdout of the subprocess is
# a blocking call. Where if the subprocess is running but hasn't printed anything, readline will never time out.
# The approach uses the underlying file system to not block on data that hasn't been written.
WriteOutputFile = open("output.log", "w")
ReadOutputFile = open("output.log", "r")

# Launch the executable
exe = subprocess.Popen([exe_abs_path], stdout=WriteOutputFile, stderr=WriteOutputFile, universal_newlines=True, env=os.environ)

# Initialize values
timeout_occurred = False
exe_exit_status = None
exe_exitted = False
success_line_found = False
wait_for_exit = args.success_exit_status is not None

# Launch the executable
exe = subprocess.Popen([exe_abs_path], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)

cur_time_seconds = time.time()
timeout_time_seconds = cur_time_seconds + args.timeout_seconds

logging.info("START OF DEVICE OUTPUT\n")

while not (timeout_occurred or exe_exitted or (not wait_for_exit and success_line_found)):

# Check if executable exitted
exe_exit_status = exe.poll()
if exe_exit_status is not None:
exe_exitted = True

# Read executable's stdout and write to stdout and logfile
exe_stdout_line = exe.stdout.readline()
logging.info(exe_stdout_line)

# Check if the executable printed out it's success line
if args.success_line is not None and args.success_line in exe_stdout_line:
success_line_found = True

# Check for timeout
cur_time_seconds = time.time()
if cur_time_seconds >= timeout_time_seconds:
timeout_occurred = True

# Sleep for a short duration between loops to not steal all system resources
time.sleep(.1)

if not exe_exitted:
exe.kill()

# Capture remaining output and check for the successful line
for exe_stdout_line in exe.stdout.readlines():
logging.info(exe_stdout_line)
if args.success_line is not None and args.success_line in exe_stdout_line:
success_line_found = True

logging.info("END OF DEVICE OUTPUT\n")

logging.info("EXECUTABLE RUN SUMMARY:\n")
timeout_time_seconds = cur_time_seconds + args.timeout_seconds

logging.info("START OF DEVICE OUTPUT\n")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spurious newline.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed


# While a timeout hasn't happened, the executable is running, and an exit condition has not been met
while ( not exit_condition_met ):
# Sleep for a short duration between loops to not steal all system resources
time.sleep(.05)

# Check if executable exitted
exe_exit_status = exe.poll()
if exe_exit_status is not None:
logging.info(f"EXECUTABLE CLOSED WITH STATUS: {exe_exit_status}")
exe_exitted = True
exit_condition_met = True

# Read executable's stdout and write to stdout and logfile
# A potential improvement here would be to do readlines() on the file, then truncate()
# This might be cleaner than this approach of reading a single line each loop.
exe_stdout_line = ReadOutputFile.readline()
if(exe_stdout_line is not None) and (len(exe_stdout_line.strip()) > 1):
# Check if the executable printed out its success line
if ( args.success_line is not None ) and ( args.success_line in exe_stdout_line ) :
logging.info(f"SUCCESS_LINE_FOUND: {exe_stdout_line}")
success_line_found = True
success_line = exe_stdout_line
if( not wait_for_exit ):
exit_condition_met = True
else:
logging.info(exe_stdout_line)

# Check for timeout
cur_time_seconds = time.time()
if cur_time_seconds >= timeout_time_seconds:
logging.info(f"TIMEOUT OF {args.timeout_seconds} SECONDS HIT")
timeout_occurred = True
exit_condition_met = True

if not exe_exitted:
logging.info(f"EXECUTABLE DID NOT EXIT, MANUALLY KILLING NOW")
exe.kill()

if not exit_condition_met:
logging.info(f"PARSING REST OF LOG")
# Capture remaining output and check for the successful line
for exe_stdout_line in ReadOutputFile.readlines():
logging.info(exe_stdout_line)
if args.success_line is not None and args.success_line in exe_stdout_line:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should success_line be a regex rather than a particular line?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a bad idea, but think for now want to stick with this instead of the regex. Can add that in down the line if we think it would be useful.

success_line_found = True
success_line = exe_stdout_line
logging.info(f"SUCCESS_LINE_FOUND: {exe_stdout_line}")

# Close the files
WriteOutputFile.close()
ReadOutputFile.close()

logging.info("END OF DEVICE OUTPUT")

logging.info("EXECUTABLE RUN SUMMARY:\n")

exit_status = 0

if args.success_line is not None:
if not success_line_found:
logging.error("Success Line: Success line not output.\n")
exit_status = 1

exit_status = 0
if args.success_exit_status is not None:
if exe_exitted:
if exe_exit_status != args.success_exit_status:
exit_status = 1
logging.info(f"Exit Status: {exe_exit_status}")
else:
logging.error("Exit Status: Executable did not exit by itself.\n")
exit_status = 1

if args.success_line is not None:
if success_line_found:
logging.info("Success Line: Found.\n")
else:
logging.error("Success Line: Success line not output.\n")
exit_status = 1
if( exit_status == 0 ):
logging.info(f"Run found a valid success metric\n")
sys.exit(exit_status)

if args.success_exit_status is not None:
if exe_exitted:
if exe_exit_status != args.success_exit_status:
exit_status = 1
logging.info(f"Exit Status: {exe_exit_status}")
else:
logging.error("Exit Status: Executable did not exit.\n")
exe_status = 1

elif( attempts < retryAttempts ):
logging.info(f"Did not succeed, trying re-attempt {attempts+1} of {retryAttempts}\n")

# Report if executable executed successfully to workflow
# Report final exit status if no successful run occured
sys.exit(exit_status)
3 changes: 2 additions & 1 deletion localhost-echo-server/local_echo_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ async def echo_handler(reader, writer):
args = parser.parse_args()
logging.basicConfig()
logger.setLevel(logging.DEBUG)
loop = asyncio.get_event_loop()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
factory = asyncio.start_server(
echo_handler,
os.environ.get('HOST'),
Expand Down