Skip to content

Commit

Permalink
Print warnings in detailed reports only in rule corpus tester (#404)
Browse files Browse the repository at this point in the history
* collect warnings and print them only in reports

* update CHANGELOG.md

* add test

* fix black
  • Loading branch information
dtrai2 authored Jul 14, 2023
1 parent 5e6d7ae commit 1d7fda6
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 5 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
## next release
### Features
### Improvements

* Print logprep warnings in the rule corpus tester only in the detailed reports instead of the
summary.

### Bugfix

* Fix error when writing too large documents into Opensearch/Elasticsearch
Expand Down
50 changes: 45 additions & 5 deletions logprep/util/auto_rule_tester/auto_rule_corpus_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,12 @@
If one or more test cases fail this tester ends with an exit code of 1, otherwise 0.
"""
import io

# pylint: enable=anomalous-backslash-in-string
# pylint: disable=protected-access
import json
import logging
import os
import re
import shutil
Expand Down Expand Up @@ -144,10 +147,12 @@ class TestCase:
generated_extra_output: dict = field(validator=validators.instance_of(list), default=[])
failed: bool = field(validator=validators.instance_of(bool), default=False)
report: List = Factory(list)
warnings: str = field(default="")

def __init__(self, config_path, input_test_data_path):
self._original_config_path = config_path
self._input_test_data_path = input_test_data_path
self.log_capture_string = None

@cached_property
def _tmp_dir(self):
Expand All @@ -168,6 +173,17 @@ def _test_cases(self):
raise ValueError(f"The following TestCases have no input documents: {no_input_files}")
return dict(sorted(test_cases.items()))

@cached_property
def _logprep_logger(self):
logprep_logger = getLogger("logprep-rule-corpus-tester")
logprep_logger.propagate = False
logprep_logger.setLevel(logging.WARNING)
self.log_capture_string = io.StringIO()
self.stream_handler = logging.StreamHandler(self.log_capture_string)
self.stream_handler.setLevel(logging.WARNING)
logprep_logger.addHandler(self.stream_handler)
return logprep_logger

@cached_property
def _pipeline(self):
merged_input_file_path = Path(self._tmp_dir) / "input.json"
Expand All @@ -180,6 +196,7 @@ def _pipeline(self):
config.verify_pipeline_without_processor_outputs(getLogger("logprep"))
del config["output"]
pipeline = Pipeline(config=config)
pipeline.logger = self._logprep_logger
return pipeline

def run(self):
Expand All @@ -206,13 +223,24 @@ def _run_pipeline_per_test_case(self):
for test_case_id, test_case in self._test_cases.items():
_ = [processor.setup() for processor in self._pipeline._pipeline]
parsed_event, extra_outputs = self._pipeline.process_pipeline()
test_case.warnings = self._retrieve_log_capture()
extra_outputs = align_extra_output_formats(extra_outputs)
test_case.generated_output = parsed_event
test_case.generated_extra_output = extra_outputs
self._compare_logprep_outputs(test_case_id, parsed_event)
self._compare_extra_data_output(test_case_id, extra_outputs)
self._print_pass_fail_statements(test_case_id)

def _retrieve_log_capture(self):
log_capture = self.log_capture_string.getvalue()
# set new log_capture to clear previous entries
self.log_capture_string = io.StringIO()
self.stream_handler = logging.StreamHandler(self.log_capture_string)
self.stream_handler.setLevel(logging.WARNING)
self._logprep_logger.handlers.clear()
self._logprep_logger.addHandler(self.stream_handler)
return log_capture

def _compare_logprep_outputs(self, test_case_id, logprep_output):
test_case = self._test_cases.get(test_case_id, {})
if test_case.expected_output:
Expand Down Expand Up @@ -319,29 +347,41 @@ def _print_pass_fail_statements(self, test_case_id):
status = f"{Style.BRIGHT}{Fore.RESET} SKIPPED - (no expected output given)"
elif len(test_case.report) > 0:
status = f"{Style.BRIGHT}{Fore.RED} FAILED"
elif test_case.warnings:
status = f"{Style.BRIGHT}{Fore.YELLOW} PASSED - (with warnings)"

print(f"{Fore.BLUE} Test Case: {Fore.CYAN}{test_case_id} {status}{Style.RESET_ALL}")

def _print_test_reports(self):
if not any(case.failed for case in self._test_cases.values()):
return
print(Style.BRIGHT + "# Test Cases Detailed Reports:" + Style.RESET_ALL)
for test_case_id, test_case in self._test_cases.items():
if test_case.report and test_case.expected_output:
if (test_case.warnings or test_case.report) and test_case.expected_output:
self._print_long_test_result(test_case_id, test_case)
print()

def _print_long_test_result(self, test_case_id, test_case):
report_title = f"test report for '{test_case_id}'"
print(f"{Fore.RED}{Style.BRIGHT}{report_title}{Style.RESET_ALL}")
print_logprep_output = True
if test_case.warnings and not test_case.report:
print(Fore.GREEN + "Test passed, but with following warnings:" + Fore.RESET)
print(test_case.warnings)
print_logprep_output = False
if test_case.warnings and test_case.report:
print(Fore.RED + "Logprep Warnings:" + Fore.RESET)
print(test_case.warnings)
for statement in test_case.report:
if isinstance(statement, (dict, list)):
pprint(statement)
else:
print(statement)
print(Fore.RED + "Logprep Event Output:" + Fore.RESET)
pprint(test_case.generated_output)
print(Fore.RED + "Logprep Extra Data Output:" + Fore.RESET)
pprint(test_case.generated_extra_output)
if print_logprep_output:
print(Fore.RED + "Logprep Event Output:" + Fore.RESET)
pprint(test_case.generated_output)
print(Fore.RED + "Logprep Extra Data Output:" + Fore.RESET)
pprint(test_case.generated_extra_output)
print(f"{Fore.RED}{Style.BRIGHT}{report_title}{Style.RESET_ALL}")

def _print_test_summary(self):
Expand Down
30 changes: 30 additions & 0 deletions tests/unit/util/test_auto_rule_corpus_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# pylint: disable=too-many-arguments
import json
import os
import re
from json import JSONDecodeError
from unittest import mock

Expand Down Expand Up @@ -465,3 +466,32 @@ def test_corpus_tests_dont_share_cache_between_runs_by_resetting_processors(
for expected_print in expected_prints:
assert expected_print in console_output
mock_exit.assert_called_with(0)

@mock.patch("logprep.util.auto_rule_tester.auto_rule_corpus_tester.sys.exit")
def test_warnings_are_printed_inside_the_detailed_reports(self, mock_exit, tmp_path, capsys):
test_case_data = {
"input": {
"winlog": {"event_id": "2222", "event_data": {"Test1": 1, "Test2": 2}},
"test_normalized": "exists already",
},
"expected_output": {
"winlog": {"event_id": "2222", "event_data": {"Test1": 1, "Test2": 2}},
"test_normalized": {"test": {"field1": 1, "field2": 2}},
},
"expected_extra_output": [],
}
test_data_dir = tmp_path / "test_data"
os.makedirs(test_data_dir, exist_ok=True)
write_test_case_data_tmp_files(test_data_dir, "test_case_one", test_case_data)
config_path = "tests/testdata/config/config.yml"
corpus_tester = RuleCorpusTester(config_path, test_data_dir)
corpus_tester.run()
console_output, console_error = capsys.readouterr()
assert console_error == ""
warnings_inside_details_pattern = (
r".*Test Cases Detailed Reports.*test_case_one.*"
r"Logprep Warnings.*FieldExistsWarning.*test_case_one.*"
r"Test Overview"
)
assert re.match(warnings_inside_details_pattern, console_output, flags=re.DOTALL)
mock_exit.assert_called_with(1)

0 comments on commit 1d7fda6

Please sign in to comment.