Print warnings in detailed reports only in rule corpus tester (#404)

* collect warnings and print them only in reports * update CHANGELOG.md * add test * fix black
fkie-cad · Jul 14, 2023 · 1d7fda6 · 1d7fda6
1 parent 5e6d7ae
commit 1d7fda6
Show file tree

Hide file tree

Showing 3 changed files with 79 additions and 5 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,10 @@
 ## next release
 ### Features
 ### Improvements
+
+* Print logprep warnings in the rule corpus tester only in the detailed reports instead of the
+summary.
+
 ### Bugfix
 
 * Fix error when writing too large documents into Opensearch/Elasticsearch

diff --git a/logprep/util/auto_rule_tester/auto_rule_corpus_tester.py b/logprep/util/auto_rule_tester/auto_rule_corpus_tester.py
@@ -78,9 +78,12 @@
 
 If one or more test cases fail this tester ends with an exit code of 1, otherwise 0.
 """
+import io
+
 # pylint: enable=anomalous-backslash-in-string
 # pylint: disable=protected-access
 import json
+import logging
 import os
 import re
 import shutil
@@ -144,10 +147,12 @@ class TestCase:
         generated_extra_output: dict = field(validator=validators.instance_of(list), default=[])
         failed: bool = field(validator=validators.instance_of(bool), default=False)
         report: List = Factory(list)
+        warnings: str = field(default="")
 
     def __init__(self, config_path, input_test_data_path):
         self._original_config_path = config_path
         self._input_test_data_path = input_test_data_path
+        self.log_capture_string = None
 
     @cached_property
     def _tmp_dir(self):
@@ -168,6 +173,17 @@ def _test_cases(self):
             raise ValueError(f"The following TestCases have no input documents: {no_input_files}")
         return dict(sorted(test_cases.items()))
 
+    @cached_property
+    def _logprep_logger(self):
+        logprep_logger = getLogger("logprep-rule-corpus-tester")
+        logprep_logger.propagate = False
+        logprep_logger.setLevel(logging.WARNING)
+        self.log_capture_string = io.StringIO()
+        self.stream_handler = logging.StreamHandler(self.log_capture_string)
+        self.stream_handler.setLevel(logging.WARNING)
+        logprep_logger.addHandler(self.stream_handler)
+        return logprep_logger
+
     @cached_property
     def _pipeline(self):
         merged_input_file_path = Path(self._tmp_dir) / "input.json"
@@ -180,6 +196,7 @@ def _pipeline(self):
         config.verify_pipeline_without_processor_outputs(getLogger("logprep"))
         del config["output"]
         pipeline = Pipeline(config=config)
+        pipeline.logger = self._logprep_logger
         return pipeline
 
     def run(self):
@@ -206,13 +223,24 @@ def _run_pipeline_per_test_case(self):
         for test_case_id, test_case in self._test_cases.items():
             _ = [processor.setup() for processor in self._pipeline._pipeline]
             parsed_event, extra_outputs = self._pipeline.process_pipeline()
+            test_case.warnings = self._retrieve_log_capture()
             extra_outputs = align_extra_output_formats(extra_outputs)
             test_case.generated_output = parsed_event
             test_case.generated_extra_output = extra_outputs
             self._compare_logprep_outputs(test_case_id, parsed_event)
             self._compare_extra_data_output(test_case_id, extra_outputs)
             self._print_pass_fail_statements(test_case_id)
 
+    def _retrieve_log_capture(self):
+        log_capture = self.log_capture_string.getvalue()
+        # set new log_capture to clear previous entries
+        self.log_capture_string = io.StringIO()
+        self.stream_handler = logging.StreamHandler(self.log_capture_string)
+        self.stream_handler.setLevel(logging.WARNING)
+        self._logprep_logger.handlers.clear()
+        self._logprep_logger.addHandler(self.stream_handler)
+        return log_capture
+
     def _compare_logprep_outputs(self, test_case_id, logprep_output):
         test_case = self._test_cases.get(test_case_id, {})
         if test_case.expected_output:
@@ -319,29 +347,41 @@ def _print_pass_fail_statements(self, test_case_id):
             status = f"{Style.BRIGHT}{Fore.RESET} SKIPPED - (no expected output given)"
         elif len(test_case.report) > 0:
             status = f"{Style.BRIGHT}{Fore.RED} FAILED"
+        elif test_case.warnings:
+            status = f"{Style.BRIGHT}{Fore.YELLOW} PASSED - (with warnings)"
+
         print(f"{Fore.BLUE} Test Case: {Fore.CYAN}{test_case_id} {status}{Style.RESET_ALL}")
 
     def _print_test_reports(self):
         if not any(case.failed for case in self._test_cases.values()):
             return
         print(Style.BRIGHT + "# Test Cases Detailed Reports:" + Style.RESET_ALL)
         for test_case_id, test_case in self._test_cases.items():
-            if test_case.report and test_case.expected_output:
+            if (test_case.warnings or test_case.report) and test_case.expected_output:
                 self._print_long_test_result(test_case_id, test_case)
                 print()
 
     def _print_long_test_result(self, test_case_id, test_case):
         report_title = f"test report for '{test_case_id}'"
         print(f"{Fore.RED}{Style.BRIGHT}↓ {report_title} ↓ {Style.RESET_ALL}")
+        print_logprep_output = True
+        if test_case.warnings and not test_case.report:
+            print(Fore.GREEN + "Test passed, but with following warnings:" + Fore.RESET)
+            print(test_case.warnings)
+            print_logprep_output = False
+        if test_case.warnings and test_case.report:
+            print(Fore.RED + "Logprep Warnings:" + Fore.RESET)
+            print(test_case.warnings)
         for statement in test_case.report:
             if isinstance(statement, (dict, list)):
                 pprint(statement)
             else:
                 print(statement)
-        print(Fore.RED + "Logprep Event Output:" + Fore.RESET)
-        pprint(test_case.generated_output)
-        print(Fore.RED + "Logprep Extra Data Output:" + Fore.RESET)
-        pprint(test_case.generated_extra_output)
+        if print_logprep_output:
+            print(Fore.RED + "Logprep Event Output:" + Fore.RESET)
+            pprint(test_case.generated_output)
+            print(Fore.RED + "Logprep Extra Data Output:" + Fore.RESET)
+            pprint(test_case.generated_extra_output)
         print(f"{Fore.RED}{Style.BRIGHT}↑ {report_title} ↑ {Style.RESET_ALL}")
 
     def _print_test_summary(self):

diff --git a/tests/unit/util/test_auto_rule_corpus_tester.py b/tests/unit/util/test_auto_rule_corpus_tester.py
@@ -3,6 +3,7 @@
 # pylint: disable=too-many-arguments
 import json
 import os
+import re
 from json import JSONDecodeError
 from unittest import mock
 
@@ -465,3 +466,32 @@ def test_corpus_tests_dont_share_cache_between_runs_by_resetting_processors(
         for expected_print in expected_prints:
             assert expected_print in console_output
         mock_exit.assert_called_with(0)
+
+    @mock.patch("logprep.util.auto_rule_tester.auto_rule_corpus_tester.sys.exit")
+    def test_warnings_are_printed_inside_the_detailed_reports(self, mock_exit, tmp_path, capsys):
+        test_case_data = {
+            "input": {
+                "winlog": {"event_id": "2222", "event_data": {"Test1": 1, "Test2": 2}},
+                "test_normalized": "exists already",
+            },
+            "expected_output": {
+                "winlog": {"event_id": "2222", "event_data": {"Test1": 1, "Test2": 2}},
+                "test_normalized": {"test": {"field1": 1, "field2": 2}},
+            },
+            "expected_extra_output": [],
+        }
+        test_data_dir = tmp_path / "test_data"
+        os.makedirs(test_data_dir, exist_ok=True)
+        write_test_case_data_tmp_files(test_data_dir, "test_case_one", test_case_data)
+        config_path = "tests/testdata/config/config.yml"
+        corpus_tester = RuleCorpusTester(config_path, test_data_dir)
+        corpus_tester.run()
+        console_output, console_error = capsys.readouterr()
+        assert console_error == ""
+        warnings_inside_details_pattern = (
+            r".*Test Cases Detailed Reports.*test_case_one.*"
+            r"Logprep Warnings.*FieldExistsWarning.*test_case_one.*"
+            r"Test Overview"
+        )
+        assert re.match(warnings_inside_details_pattern, console_output, flags=re.DOTALL)
+        mock_exit.assert_called_with(1)