Merge pull request #158 from AutomatingSciencePipeline/Clarifying-Var…

…iable-Names Clarifying variable names
AutomatingSciencePipeline · Mar 10, 2023 · 5bf14f7 · 5bf14f7
2 parents 6e1c80d + 99ab74e
commit 5bf14f7
Show file tree

Hide file tree

Showing 14 changed files with 146 additions and 110 deletions.
diff --git a/Monorepo.wiki b/Monorepo.wiki
diff --git a/apps/backend/app.py b/apps/backend/app.py
@@ -17,7 +17,6 @@
 from modules.output.plots import generateScatterPlot
 from modules.configs import generate_config_files
 
-
 try:
     import magic  # Crashes on windows if you're missing the 'python-magic-bin' python package
 except ImportError:
@@ -48,13 +47,14 @@
 flaskApp = Flask(__name__)
 CORS(flaskApp)
 
-runner = ProcessPoolExecutor(1)
+MAX_WORKERS = 1
+runner = ProcessPoolExecutor(MAX_WORKERS)
 
 
 ### FLASK API ENDPOINT
 @flaskApp.post("/experiment")
 def recv_experiment():
-    runner.submit(run_batch, request.get_json())
+    runner.submit(handle_exceptions_from_run, request.get_json())
     return 'OK'
 
 
@@ -63,19 +63,30 @@ def glados_custom_flask_error(error):
     return jsonify(error.to_dict()), error.status_code
 
 
+def handle_exceptions_from_run(data):
+    try:
+        run_batch(data)
+    except Exception as err:
+        print(f"Unexpected exception while trying to run the experiment, this was not caught by our own code and needs to be handled better: {err}")
+        logging.exception(err)
+        raise err
+
+
 def run_batch(data):
     print(f'Run_Batch starting with data {data}')
     experiments = firebaseDb.collection('Experiments')
 
-    #Parsing the argument data
+    # Obtain most basic experiment info
     expId = data['experiment']['id']
     print(f'received {expId}')
     expRef = experiments.document(expId)
+
+    # Parsing the argument data
     experiment = expRef.get().to_dict()
     print(f"Experiment info: {experiment}")
     experiment['id'] = expId
-    experimentOutput = experiment['fileOutput']
-    resultOutput = experiment['resultOutput']
+    trialExtraFile = experiment['trialExtraFile']
+    trialResult = experiment['trialResult']
     keepLogs = experiment['keepLogs']
     scatterPlot = experiment['scatter']
     dumbTextArea = experiment['consts']
@@ -84,32 +95,10 @@ def run_batch(data):
     #Downloading Experiment File
     os.makedirs(f'ExperimentFiles/{expId}')
     os.chdir(f'ExperimentFiles/{expId}')
-    if experimentOutput != '' or postProcess != '' or keepLogs:
-        print('There will be experiment outputs')
-        os.makedirs('ResCsvs')
-    print(f'Downloading file for {expId}')
-    try:
-        filepath = experiment['file']
-    except KeyError:
-        filepath = f'experiment{expId}'
-        print(f"No filepath specified so defaulting to {filepath}")
-    print(f"Downloading {filepath} to ExperimentFiles/{expId}/{filepath}")
-    try:
-        filedata = firebaseBucket.blob(filepath)
-        filedata.download_to_filename(filepath)
-    except Exception as err:
-        raise GladosInternalError('Failed to download experiment files') from err
-
-    #Determining FileType
-    rawfiletype = magic.from_file(filepath)
-    filetype = 'unknown'
-    if 'Python script' in rawfiletype:
-        filetype = 'python'
-    elif 'Java archive data (JAR)' in rawfiletype:
-        filetype = 'java'
+    filepath = download_experiment_files(expId, experiment, trialExtraFile, keepLogs, postProcess)
 
-    if filetype == 'unknown':
-        raise NotImplementedError("Unknown experiment file type")
+    #Determining experiment FileType -> how we need to execute it
+    rawfiletype, filetype = determine_experiment_file_type(filepath)
     print(f"Raw Filetype: {rawfiletype}\n Filtered Filetype: {filetype}")
 
     #Generating Configs from hyperparameters
@@ -124,33 +113,13 @@ def run_batch(data):
 
     try:
         #Running the Experiment
-        conduct_experiment(expId, expRef, experimentOutput, resultOutput, filepath, filetype, numExperimentsToRun)
+        conduct_experiment(expId, expRef, trialExtraFile, trialResult, filepath, filetype, numExperimentsToRun, keepLogs)
 
         # Post Processing
-        if postProcess:
-            print("Beginning post processing")
-            try:
-                if scatterPlot:
-                    print("Creating Scatter Plot")
-                    depVar = experiment['scatterDepVar']
-                    indVar = experiment['scatterIndVar']
-                    generateScatterPlot(indVar, depVar, 'results.csv', expId)
-            except KeyError as err:
-                raise GladosInternalError("Error during plot generation") from err
+        post_process_experiment(expId, experiment, scatterPlot, postProcess)
 
         #Uploading Experiment Results
-        print('Uploading Results to the frontend')
-        uploadBlob = firebaseBucket.blob(f"results/result{expId}.csv")
-        uploadBlob.upload_from_filename('results.csv')
-
-        if experimentOutput != '' or postProcess:
-            print('Uploading Result Csvs')
-            try:
-                shutil.make_archive('ResultCsvs', 'zip', 'ResCsvs')
-                uploadBlob = firebaseBucket.blob(f"results/result{expId}.zip")
-                uploadBlob.upload_from_filename('ResultCsvs.zip')
-            except Exception as err:
-                raise GladosInternalError("Error uploading to firebase") from err
+        upload_experiment_results(expId, trialExtraFile, postProcess)
     except ExperimentAbort as err:
         print(f'Experiment {expId} critical failure, not doing any result uploading or post processing')
         logging.exception(err)
@@ -165,6 +134,70 @@ def run_batch(data):
         os.chdir('../..')
 
 
+def determine_experiment_file_type(filepath):
+    rawfiletype = magic.from_file(filepath)
+    print(rawfiletype)
+    filetype = 'unknown'
+    if 'Python script' in rawfiletype or 'python3' in rawfiletype:
+        filetype = 'python'
+    elif 'Java archive data (JAR)' in rawfiletype:
+        filetype = 'java'
+
+    if filetype == 'unknown':
+        print(f"{rawfiletype} could not be mapped to python or java, if it should consider updating the matching statements")
+        raise NotImplementedError("Unknown experiment file type")
+    return rawfiletype, filetype
+
+
+def download_experiment_files(expId, experiment, trialExtraFile, keepLogs, postProcess):
+    if trialExtraFile != '' or postProcess != '' or keepLogs:
+        print('There will be experiment outputs')
+        os.makedirs('ResCsvs')
+    print(f'Downloading file for {expId}')
+    try:
+        filepath = experiment['file']
+    except KeyError:
+        filepath = f'experiment{expId}'
+        print(f"No filepath specified so defaulting to {filepath}")
+    print(f"Downloading {filepath} to ExperimentFiles/{expId}/{filepath}")
+    try:
+        filedata = firebaseBucket.blob(filepath)
+        filedata.download_to_filename(filepath)
+    except Exception as err:
+        print(f"Error {err} occurred while trying to download experiment file")
+        raise GladosInternalError('Failed to download experiment files') from err
+    print(f"Downloaded {filepath} to ExperimentFiles/{expId}/{filepath}")
+    return filepath
+
+
+def upload_experiment_results(expId, trialExtraFile, postProcess):
+    print('Uploading Results to the frontend')
+    uploadBlob = firebaseBucket.blob(f"results/result{expId}.csv")
+    uploadBlob.upload_from_filename('results.csv')
+
+    if trialExtraFile != '' or postProcess:
+        print('Uploading Result Csvs')
+        try:
+            shutil.make_archive('ResultCsvs', 'zip', 'ResCsvs')
+            uploadBlob = firebaseBucket.blob(f"results/result{expId}.zip")
+            uploadBlob.upload_from_filename('ResultCsvs.zip')
+        except Exception as err:
+            raise GladosInternalError("Error uploading to firebase") from err
+
+
+def post_process_experiment(expId, experiment, scatterPlot, postProcess):
+    if postProcess:
+        print("Beginning post processing")
+        try:
+            if scatterPlot:
+                print("Creating Scatter Plot")
+                depVar = experiment['scatterDepVar']
+                indVar = experiment['scatterIndVar']
+                generateScatterPlot(indVar, depVar, 'results.csv', expId)
+        except KeyError as err:
+            raise GladosInternalError("Error during plot generation") from err
+
+
 if __name__ == '__main__':
     logging.getLogger().setLevel(logging.INFO)
     os.chdir('/app/GLADOS_HOME')

diff --git a/apps/backend/tests/legacy/conftest.py → apps/backend/legacy-tests/conftest.py b/apps/backend/tests/legacy/conftest.py → apps/backend/legacy-tests/conftest.py
diff --git a/apps/backend/legacy-tests/readme.md b/apps/backend/legacy-tests/readme.md
@@ -0,0 +1,3 @@
+# Legacy Tests
+
+TODO These tests are from the first team, we don't use them any more, they should be removed as soon as we're done pulling inspiration from them.
diff --git a/...ests/legacy/test_app_input_integration.py → ...egacy-tests/test_app_input_integration.py b/...ests/legacy/test_app_input_integration.py → ...egacy-tests/test_app_input_integration.py
diff --git a/apps/backend/tests/legacy/test_app_unit.py → apps/backend/legacy-tests/test_app_unit.py b/apps/backend/tests/legacy/test_app_unit.py → apps/backend/legacy-tests/test_app_unit.py
diff --git a/apps/backend/modules/output/plots.py b/apps/backend/modules/output/plots.py
@@ -17,14 +17,14 @@ def generateScatterPlot(independentVar, dependantVar, resultFile, expId):
             x.append(float(row[indIndex]))
             y.append(float(row[depIndex]))
 
-    fig, ax = plt.subplots()
+    figure, axes = plt.subplots()
     print(np.array(x))
     print(np.array(y))
     m, b = np.polyfit(np.array(x), np.array(y), 1)
-    sc = ax.scatter(x,y)
+    sc = axes.scatter(x,y)
     plt.plot(x, m*np.array(x) + b)
-    ax.set_ylabel(dependantVar, loc='top')
-    ax.set_xlabel(independentVar, loc='left')
+    axes.set_ylabel(dependantVar, loc='top')
+    axes.set_xlabel(independentVar, loc='left')
     try:
         os.chdir('ResCsvs')
         plt.savefig(f'scatter{expId}.png')

diff --git a/apps/backend/modules/runner.py b/apps/backend/modules/runner.py
@@ -14,36 +14,36 @@
 OUTPUT_INDICATOR_USING_CSVS = "In ResCsvs"
 
 
-def get_data(process: 'Popen[str]', trialRun: int):
+def get_data(process: 'Popen[str]', trialRun: int, keepLogs: bool):
     try:
         data = process.communicate()
-        os.chdir('ResCsvs')
-        with open(f"log{trialRun}.txt", 'w', encoding='utf8') as f:
-            f.write(data[0])
-            if data[1]:
-                f.write(data[1])
-            f.close()
-        os.chdir('..')
-        print(data)
+        if keepLogs:
+            os.chdir('ResCsvs')
+            with open(f"log{trialRun}.txt", 'w', encoding='utf8') as f:
+                f.write(data[0])
+                if data[1]:
+                    f.write(data[1])
+                f.close()
+            os.chdir('..')
         if data[1]:
             print(f'errors returned from pipe is {data[1]}')
             return PIPE_OUTPUT_ERROR_MESSAGE
-    except Exception as e:
-        print(e)
-        raise InternalTrialFailedError("Encountered another exception while reading pipe") from e
+    except Exception as err:
+        print("Encountered another exception while reading pipe: {err}")
+        raise InternalTrialFailedError("Encountered another exception while reading pipe") from err
     result = data[0].split('\n')[0]
-    print(f"result data: {result}")
+    print(f"trial#{trialRun} result data: {result}")
     return result
 
 
-def run_trial(experiment_path, config_path, filetype, trialRun: int):
+def run_trial(experiment_path, config_path, filetype, trialRun: int, keepLogs: bool):
     #make sure that the cwd is ExperimentsFiles/{ExperimentId}
     if filetype == 'python':
         with Popen(['python', experiment_path, config_path], stdout=PIPE, stdin=PIPE, stderr=PIPE, encoding='utf8') as process:
-            return get_data(process, trialRun)
+            return get_data(process, trialRun, keepLogs)
     elif filetype == 'java':
         with Popen(['java', '-jar', experiment_path, config_path], stdout=PIPE, stdin=PIPE, stderr=PIPE, encoding='utf8') as process:
-            return get_data(process, trialRun)
+            return get_data(process, trialRun, keepLogs)
 
 
 def get_first_line_of_trial_results_csv(filename):
@@ -76,7 +76,7 @@ def add_to_output_batch(fileOutput, ExpRun):
         raise FileHandlingError("Failed to copy results csv") from err
 
 
-def conduct_experiment(expId, expRef, experimentOutput, resultOutput, filepath, filetype, numTrialsToRun):
+def conduct_experiment(expId, expRef, trialExtraFile, trialResult, filepath, filetype, numTrialsToRun, keepLogs):
     print(f"Running Experiment {expId}")
 
     passes = 0
@@ -90,11 +90,11 @@ def conduct_experiment(expId, expRef, experimentOutput, resultOutput, filepath,
         expRef.update({"startedAtEpochMillis": int(startSeconds * 1000)})
         try:
             print("Running the first trial...")
-            firstTrial = run_trial(filepath, f'configFiles/{0}.ini', filetype, 0)
-            if resultOutput == '':
+            firstTrial = run_trial(filepath, f'configFiles/{0}.ini', filetype, 0, keepLogs)
+            if trialResult == '':
                 writer.writerow(["Experiment Run", "Result"] + paramNames)
             else:
-                if (output := get_first_line_of_trial_results_csv(resultOutput)) is None:
+                if (output := get_first_line_of_trial_results_csv(trialResult)) is None:
                     raise InternalTrialFailedError("Nothing returned when trying to get header results (David, improve this error message please)")
                 writer.writerow(["Experiment Run"] + output + paramNames)
         except Exception as err:
@@ -121,32 +121,32 @@ def conduct_experiment(expId, expRef, experimentOutput, resultOutput, filepath,
         if numTrialsToRun > 0:
             #Running the rest of the experiments
 
-            print(f"Continuing now running {numTrialsToRun}")
-            if experimentOutput != '':
-                add_to_output_batch(experimentOutput, 0)
+            print(f"Continuing with running the {numTrialsToRun} other trials...")
+            if trialExtraFile != '':
+                add_to_output_batch(trialExtraFile, 0)
                 firstTrial = OUTPUT_INDICATOR_USING_CSVS
-            if resultOutput == '':
+            if trialResult == '':
                 writer.writerow(["0", firstTrial] + get_configs_ordered(f'configFiles/{0}.ini', paramNames))
             else:
-                if (output := get_output_results(resultOutput)) is None:
+                if (output := get_output_results(trialResult)) is None:
                     raise InternalTrialFailedError("Nothing returned when trying to get first non-header line of results (the first run?) (David, improve this error message please)")
                 writer.writerow(["0"] + output + get_configs_ordered(f'configFiles/{0}.ini', paramNames))
             for i in range(1, numTrialsToRun + 1):
                 try:
-                    response_data = run_trial(filepath, f'configFiles/{i}.ini', filetype, i)
+                    response_data = run_trial(filepath, f'configFiles/{i}.ini', filetype, i, keepLogs)
                 except InternalTrialFailedError:
                     print('The trial failed for some internal reason?')  # TODO should this halt all further experiment runs?
                     fails += 1
                     expRef.update({'fails': fails})
                     continue
 
-                if experimentOutput != '':
+                if trialExtraFile != '':
                     response_data = OUTPUT_INDICATOR_USING_CSVS
-                    add_to_output_batch(experimentOutput, i)
-                if resultOutput == '':
+                    add_to_output_batch(trialExtraFile, i)
+                if trialResult == '':
                     writer.writerow([i, response_data] + get_configs_ordered(f'configFiles/{i}.ini', paramNames))
                 else:
-                    output = get_output_results(resultOutput)
+                    output = get_output_results(trialResult)
                     if output is None:
                         raise InternalTrialFailedError("Nothing returned when trying to get first non-header line of results (the rest of the runs?) (David, improve this error message please)")
                     writer.writerow([i] + output + get_configs_ordered(f'configFiles/{i}.ini', paramNames))
@@ -156,4 +156,4 @@ def conduct_experiment(expId, expRef, experimentOutput, resultOutput, filepath,
                 else:
                     fails += 1
                     expRef.update({'fails': fails})
-        print("Finished running Experiments")
+        print("Finished running Trials")
diff --git a/apps/frontend/components/NewExp.tsx b/apps/frontend/components/NewExp.tsx
@@ -66,8 +66,8 @@ const NewExp = ({ formState, setFormState, copyID, setCopyId, ...rest }) => {
 			parameters: formList([] as any[]), // TODO type for parameters will remove the need for `any` here
 			name: '',
 			description: '',
-			fileOutput: '',
-			resultOutput: '',
+			trialExtraFile: '',
+			trialResult: '',
 			scatterIndVar: '',
 			scatterDepVar: '',
 			dumbTextArea: '',
@@ -90,8 +90,8 @@ const NewExp = ({ formState, setFormState, copyID, setCopyId, ...rest }) => {
 						parameters: formList(params),
 						name: expInfo['name'],
 						description: expInfo['description'],
-						fileOutput: expInfo['fileOutput'],
-						resultOutput: expInfo['resultOutput'],
+						trialExtraFile: expInfo['trialExtraFile'],
+						trialResult: expInfo['trialResult'],
 						verbose: expInfo['verbose'],
 						nWorkers: expInfo['workers'],
 						scatter: expInfo['scatter'],
+2 −0		Repository-Guide.md
+125 −18		Running-An-Experiment.md
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Legacy Tests

		TODO These tests are from the first team, we don't use them any more, they should be removed as soon as we're done pulling inspiration from them.