diff --git a/rebench/model/benchmark.py b/rebench/model/benchmark.py index c625a05c..9e6e159e 100644 --- a/rebench/model/benchmark.py +++ b/rebench/model/benchmark.py @@ -107,3 +107,6 @@ def as_dict(self): def from_str_list(cls, data_store, str_list): return data_store.get_config(str_list[0], str_list[1], str_list[2], None if str_list[3] == '' else str_list[3]) + @classmethod + def get_column_headers(cls): + return ["benchmark", "executor", "suite", "extraArgs"] diff --git a/rebench/model/measurement.py b/rebench/model/measurement.py index 9e0e29ce..1e220080 100644 --- a/rebench/model/measurement.py +++ b/rebench/model/measurement.py @@ -58,6 +58,10 @@ def from_str_list(cls, data_store, str_list, line_number=None, filename=None): return Measurement(invocation, iteration, value, unit, run_id, criterion, line_number, filename) + @classmethod + def get_column_headers(cls): + run_id_headers = RunId.get_column_headers() + return ["invocation", "iteration", "value", "unit", "criterion"] + run_id_headers def as_dict(self): return { diff --git a/rebench/model/run_id.py b/rebench/model/run_id.py index 6eec51a9..e06507f9 100644 --- a/rebench/model/run_id.py +++ b/rebench/model/run_id.py @@ -338,6 +338,11 @@ def from_str_list(cls, data_store, str_list): return data_store.create_run_id( benchmark, str_list[-4], str_list[-3], str_list[-2], str_list[-1]) + @classmethod + def get_column_headers(cls): + benchmark_headers = Benchmark.get_column_headers() + return benchmark_headers + ["cores", "inputSize", "varValue", "machine"] + def __str__(self): return "RunId(%s, %s, %s, %s, %s, %s, %d)" % ( self.benchmark.name, diff --git a/rebench/persistence.py b/rebench/persistence.py index 812d0d59..c71426d3 100644 --- a/rebench/persistence.py +++ b/rebench/persistence.py @@ -304,22 +304,25 @@ def _parse_data_line( data_point = DataPoint(run_id) return data_point, previous_run_id + _SEP = "\t" # separator between serialized parts of a measurement + def _open_file_and_append_execution_comment(self): """ Append a shebang (#!/path/to/executable) to the data file. This allows it theoretically to be executable. But more importantly also records execution metadata to reproduce the data. """ - shebang_line = "#!%s\n" % (subprocess.list2cmdline(sys.argv)) - shebang_line += _START_TIME_LINE + self._start_time + "\n" - shebang_line += "# Environment: " + json.dumps(determine_environment()) + "\n" - shebang_line += "# Source: " + json.dumps( + shebang_with_metadata = "#!%s\n" % (subprocess.list2cmdline(sys.argv)) + shebang_with_metadata += _START_TIME_LINE + self._start_time + "\n" + shebang_with_metadata += "# Environment: " + json.dumps(determine_environment()) + "\n" + shebang_with_metadata += "# Source: " + json.dumps( determine_source_details(self._configurator)) + "\n" + shebang_with_metadata += self._SEP.join(Measurement.get_column_headers()) + "\n" try: # pylint: disable-next=unspecified-encoding,consider-using-with data_file = open(self._data_filename, 'a+') - data_file.write(shebang_line) + data_file.write(shebang_with_metadata) data_file.flush() return data_file except Exception as err: # pylint: disable=broad-except @@ -328,7 +331,6 @@ def _open_file_and_append_execution_comment(self): os.getcwd(), err), err) - _SEP = "\t" # separator between serialized parts of a measurement def _persists_data_point_in_open_file(self, data_point): for measurement in data_point.get_measurements(): diff --git a/rebench/tests/persistency_test.py b/rebench/tests/persistency_test.py index 43401f62..ef981df6 100644 --- a/rebench/tests/persistency_test.py +++ b/rebench/tests/persistency_test.py @@ -17,6 +17,10 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. +import subprocess +import json +import sys +from datetime import datetime from unittest import skipIf from .mock_http_server import MockHTTPServer from .rebench_test_case import ReBenchTestCase @@ -36,7 +40,6 @@ class PersistencyTest(ReBenchTestCase): - def test_de_serialization(self): data_store = DataStore(self.ui) executor = ExecutorConf("MyVM", '', '', @@ -67,7 +70,6 @@ def test_iteration_invocation_semantics(self): cnf = Configurator(load_config(self._path + '/persistency.conf'), ds, self.ui, data_file=self._tmp_file) ds.load_data(None, False) - self._assert_runs(cnf, 1, 0, 0) ex = Executor(cnf.get_runs(), False, self.ui) @@ -168,3 +170,46 @@ def _exec_rebench_db(self, cmd_config, server): run = list(cnf.get_runs())[0] run.close_files() + + def test_check_file_lines(self): + ds = DataStore(self.ui) + cnf = Configurator(load_config(self._path + '/persistency.conf'), + ds, self.ui, data_file=self._tmp_file) + ds.load_data(None, False) + ex = Executor(cnf.get_runs(), False, self.ui) + ex.execute() + with open(self._tmp_file, 'r') as file: # pylint: disable=unspecified-encoding + lines = file.readlines() + command = self.get_line_after_char('#!', lines[0]) + self.assertEqual(command, subprocess.list2cmdline(sys.argv)) + time = self.get_line_after_char('Start:', lines[1]) + self.assertTrue(self.is_valid_time(time)) + json_code = self.get_line_after_char('Environment:', lines[2]) + self.assertTrue(self.is_valid_json(json_code)) + json_code = self.get_line_after_char('Source:', lines[3]) + self.assertTrue(self.is_valid_json(json_code)) + line = lines[4].split("\t") + line[-1] = line[-1].rstrip('\n') + words = Measurement.get_column_headers() + self.assertEqual(line, words) + self.assertEqual(len((lines[5]).split("\t")) ,len(line)) + + def get_line_after_char(self, char, line): + if char in line: + get_line = line.split(char) + return (get_line[1]).strip() + return None + + def is_valid_time(self, time_str): + try: + datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%S.%f%z') + return True + except ValueError: + return False + + def is_valid_json(self, json_str): + try: + json.loads(json_str) + return True + except json.JSONDecodeError: + return False