FreeRTOS · Skptak · Jul 11, 2023 · Jul 6, 2023 · Jul 6, 2023 · Jul 6, 2023
diff --git a/executable-monitor/action.yml b/executable-monitor/action.yml
@@ -14,7 +14,11 @@ inputs:
   timeout-seconds:
     description: 'Maximum amount of time to run the executable. Default is 600.'
     required: false
-    default: 600
+    default: 300
+  retry-attempts:
+    description: 'Number of times to re-launch the binary to check for success.'
+    required: false
+    default: 0
 
 runs:
   using: "composite"
@@ -23,5 +27,6 @@ runs:
         run: pip install -r $GITHUB_ACTION_PATH/requirements.txt
         shell: bash
       - name: Run executable with monitoring script
-        run: python3 $GITHUB_ACTION_PATH/executable-monitor.py --exe-path=${{ inputs.exe-path }} --timeout-seconds=${{ inputs.timeout-seconds }} --success-line="${{ inputs.success-line }}" --log-dir=${{ inputs.log-dir }}
+        run: |
+          python3 $GITHUB_ACTION_PATH/executable-monitor.py --exe-path=${{ inputs.exe-path }} --timeout-seconds=${{ inputs.timeout-seconds }} --success-line="${{ inputs.success-line }}" --log-dir=${{ inputs.log-dir }} --retry-attempts=${{ inputs.retry-attempts }}
         shell: bash
diff --git a/executable-monitor/executable-monitor.py b/executable-monitor/executable-monitor.py
@@ -41,6 +41,10 @@
                         type=int,
                         required=False,
                         help='Exit status that indicates that the executable completed successfully. Required if --success-line is not used.')
+    parser.add_argument('--retry-attempts',
+                        type=int,
+                        required=False,
+                        help='Number of times to attempt re-running the executable if the correct exit condition is not found.')
 
     args = parser.parse_args()
 
@@ -56,6 +60,11 @@
     if not os.path.exists(args.log_dir):
         os.makedirs(args.log_dir, exist_ok = True)
 
+    if not args.retry_attempts:
+        retryAttempts = 0
+    else:
+        retryAttempts = args.retry_attempts
+
     # Convert any relative path (like './') in passed argument to absolute path.
     exe_abs_path = os.path.abspath(args.exe_path)
     log_dir = os.path.abspath(args.log_dir)
@@ -72,76 +81,114 @@
     logging.info(f"Running executable: {exe_abs_path} ")
     logging.info(f"Storing logs in: {log_dir}")
     logging.info(f"Timeout (seconds): {args.timeout_seconds}")
+    logging.info(f"Searching for success line: {args.success_line}")
+    logging.info(f"Will re-try the run {retryAttempts} times")
+    if args.success_exit_status is not None:
+        logging.info("Looking for exit status {0}".format(args.success_exit_status ))
+    for attempts in range(0,retryAttempts + 1):
+
+        # Initialize values
+        success_line = ""
+        timeout_occurred = False
+        exe_exit_status = None
+        exe_exitted = False
+        success_line_found = False
+        exit_condition_met = False
+        wait_for_exit = args.success_exit_status is not None
+
+        # Create two file descriptors. The subprocess writes to one, the parent task reads from the other
+        # This is a workaround to avoid the fact that calling readline() on the stdout of the subprocess is
+        # a blocking call. Where if the subprocess is running but hasn't printed anything, readline will never time out.
+        # The approach uses the underlying file system to not block on data that hasn't been written.
+        WriteOutputFile = open("output.log", "w")
+        ReadOutputFile = open("output.log", "r")
+
+        # Launch the executable
+        exe = subprocess.Popen([exe_abs_path], stdout=WriteOutputFile, stderr=WriteOutputFile, universal_newlines=True, env=os.environ)
 
-    # Initialize values
-    timeout_occurred = False
-    exe_exit_status = None
-    exe_exitted = False
-    success_line_found = False
-    wait_for_exit = args.success_exit_status is not None
-
-    # Launch the executable
-    exe = subprocess.Popen([exe_abs_path], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
-
-    cur_time_seconds = time.time()
-    timeout_time_seconds = cur_time_seconds + args.timeout_seconds
-
-    logging.info("START OF DEVICE OUTPUT\n")
-
-    while not (timeout_occurred or exe_exitted or (not wait_for_exit and success_line_found)):
-
-        # Check if executable exitted
-        exe_exit_status = exe.poll()
-        if exe_exit_status is not None:
-            exe_exitted = True
-
-        # Read executable's stdout and write to stdout and logfile
-        exe_stdout_line = exe.stdout.readline()
-        logging.info(exe_stdout_line)
-
-        # Check if the executable printed out it's success line
-        if args.success_line is not None and args.success_line in exe_stdout_line:
-            success_line_found = True
-
-        # Check for timeout
         cur_time_seconds = time.time()
-        if cur_time_seconds >= timeout_time_seconds:
-            timeout_occurred = True
-
-        # Sleep for a short duration between loops to not steal all system resources
-        time.sleep(.1)
-
-    if not exe_exitted:
-        exe.kill()
-
-    # Capture remaining output and check for the successful line
-    for exe_stdout_line in exe.stdout.readlines():
-        logging.info(exe_stdout_line)
-        if args.success_line is not None and args.success_line in exe_stdout_line:
-            success_line_found = True
-
-    logging.info("END OF DEVICE OUTPUT\n")
-
-    logging.info("EXECUTABLE RUN SUMMARY:\n")
+        timeout_time_seconds = cur_time_seconds + args.timeout_seconds
+
+        logging.info("START OF DEVICE OUTPUT\n")
+
+        # While a timeout hasn't happened, the executable is running, and an exit condition has not been met
+        while ( not exit_condition_met ):
+            # Sleep for a short duration between loops to not steal all system resources
+            time.sleep(.05)
+
+            # Check if executable exitted
+            exe_exit_status = exe.poll()
+            if exe_exit_status is not None:
+                logging.info(f"EXECUTABLE CLOSED WITH STATUS: {exe_exit_status}")
+                exe_exitted = True
+                exit_condition_met = True
+
+            # Read executable's stdout and write to stdout and logfile
+            # A potential improvement here would be to do readlines() on the file, then truncate()
+            # This might be cleaner than this approach of reading a single line each loop.
+            exe_stdout_line = ReadOutputFile.readline()
+            if(exe_stdout_line is not None) and (len(exe_stdout_line.strip()) > 1):
+                # Check if the executable printed out its success line
+                if ( args.success_line is not None ) and ( args.success_line in exe_stdout_line ) :
+                    logging.info(f"SUCCESS_LINE_FOUND: {exe_stdout_line}")
+                    success_line_found = True
+                    success_line = exe_stdout_line
+                    if( not wait_for_exit ):
+                        exit_condition_met = True
+                else:
+                    logging.info(exe_stdout_line)
+
+            # Check for timeout
+            cur_time_seconds = time.time()
+            if cur_time_seconds >= timeout_time_seconds:
+                logging.info(f"TIMEOUT OF {args.timeout_seconds} SECONDS HIT")
+                timeout_occurred = True
+                exit_condition_met = True
+
+        if not exe_exitted:
+            logging.info(f"EXECUTABLE DID NOT EXIT, MANUALLY KILLING NOW")
+            exe.kill()
+
+        if not exit_condition_met:
+            logging.info(f"PARSING REST OF LOG")
+            # Capture remaining output and check for the successful line
+            for exe_stdout_line in ReadOutputFile.readlines():
+                logging.info(exe_stdout_line)
+                if args.success_line is not None and args.success_line in exe_stdout_line:
+                    success_line_found = True
+                    success_line = exe_stdout_line
+                    logging.info(f"SUCCESS_LINE_FOUND: {exe_stdout_line}")
+
+        # Close the files
+        WriteOutputFile.close()
+        ReadOutputFile.close()
+
+        logging.info("END OF DEVICE OUTPUT")
+
+        logging.info("EXECUTABLE RUN SUMMARY:\n")
+
+        exit_status = 0
+
+        if args.success_line is not None:
+            if not success_line_found:
+                logging.error("Success Line: Success line not output.\n")
+                exit_status = 1
 
-    exit_status = 0
+        if args.success_exit_status is not None:
+            if exe_exitted:
+                if exe_exit_status != args.success_exit_status:
+                    exit_status = 1
+                logging.info(f"Exit Status: {exe_exit_status}")
+            else:
+                logging.error("Exit Status: Executable did not exit by itself.\n")
+                exit_status = 1
 
-    if args.success_line is not None:
-        if success_line_found:
-            logging.info("Success Line: Found.\n")
-        else:
-            logging.error("Success Line: Success line not output.\n")
-            exit_status = 1
+        if( exit_status == 0 ):
+            logging.info(f"Run found a valid success metric\n")
+            sys.exit(exit_status)
 
-    if args.success_exit_status is not None:
-        if exe_exitted:
-            if exe_exit_status != args.success_exit_status:
-                exit_status = 1
-            logging.info(f"Exit Status: {exe_exit_status}")
-        else:
-            logging.error("Exit Status: Executable did not exit.\n")
-            exe_status = 1
-
+        elif( attempts < retryAttempts ):
+            logging.info(f"Did not succeed, trying re-attempt {attempts+1} of {retryAttempts}\n")
 
-    # Report if executable executed successfully to workflow
+    # Report final exit status if no successful run occured
     sys.exit(exit_status)
diff --git a/localhost-echo-server/local_echo_server.py b/localhost-echo-server/local_echo_server.py
@@ -24,7 +24,8 @@ async def echo_handler(reader, writer):
   args = parser.parse_args()
   logging.basicConfig()
   logger.setLevel(logging.DEBUG)
-  loop = asyncio.get_event_loop()
+  loop = asyncio.new_event_loop()
+  asyncio.set_event_loop(loop)
   factory = asyncio.start_server(
     echo_handler,
     os.environ.get('HOST'),