Skip to content

Commit

Permalink
feat(tests): verify that selected filings exist in the test repo
Browse files Browse the repository at this point in the history
  • Loading branch information
Elijas committed Dec 16, 2023
1 parent 64e416a commit aad0261
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 9 deletions.
6 changes: 5 additions & 1 deletion tests/accuracy/structure_and_text/selected-filings.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
accession_numbers:
- 0000320193-23-000077 # 10-Q AAPL Apple Inc.
- 0000320193-23-000077 # 10-Q AAPL Apple Inc. 2023-08-04
- 0000950170-23-014423 # 10-Q MSFT Microsoft Corp 2023-04-25
- 0001652044-23-000094 # 10-Q GOOG Alphabet Inc. 2023-10-25
- 0001652044-23-000070 # 10-Q GOOG Alphabet Inc. 2023-07-26
- 0001326801-19-000037 # 10-Q META Meta Platforms Inc. 2019-04-25
3 changes: 2 additions & 1 deletion tests/accuracy/structure_and_text/summarize_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def main():
# STEP: Show and save the summary
print("# Selected filings:", [r.identifier for r in filings])
print(
"# Summary:", json.dumps(summary, indent=4, sort_keys=False, ensure_ascii=False)
"# Summary:",
json.dumps(summary, indent=4, sort_keys=False, ensure_ascii=False),
)
with LAST_ACCURACY_TEST_RESULT_PATH.open("w") as file:
json.dump(summary, file, indent=4, sort_keys=False, ensure_ascii=False)
Expand Down
21 changes: 21 additions & 0 deletions tests/accuracy/structure_and_text/test_selected_filings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pathlib import Path

import yaml

from tests.utils import traverse_repository_for_filings

DEFAULT_YAML = Path(__file__).parent / "selected-filings.yaml"


def test_filings_exist():
with DEFAULT_YAML.open("r") as file:
selected_filings = yaml.safe_load(file)
accession_numbers = list(selected_filings["accession_numbers"])
assert accession_numbers, "No accession numbers found in YAML file."

existing_numbers = {
report.accession_number for report in traverse_repository_for_filings()
}

for accession_number in accession_numbers:
assert accession_number in existing_numbers, f"Missing {accession_number}"
14 changes: 10 additions & 4 deletions tests/accuracy/structure_and_text/test_structure_and_text.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import warnings
from collections import Counter
from pathlib import Path
from pprint import pprint
Expand Down Expand Up @@ -52,11 +53,16 @@ def test_structure_and_text(

# STEP: Load (or save) the expected elements
if not report.expected_structure_and_text.exists():
with report.expected_structure_and_text.open("w") as f:
json.dump(actual_json, f, sort_keys=True, indent=4, ensure_ascii=False)
if request.config.getoption("--create-missing-files"):
with report.expected_structure_and_text.open("w") as f:
json.dump(actual_json, f, sort_keys=True, indent=4, ensure_ascii=False)
warnings.warn(
f"Created {report.expected_structure_and_text.name}. Please manually review it and commit the file.",
stacklevel=0,
)
else:
pytest.fail(
f"Expected structure and text file did not exist. "
f"Created {report.expected_structure_and_text}. Please review and commit the file.",
f"File {report.expected_structure_and_text.name} does not exist. Use --create-missing-files to create it.",
)
with report.expected_structure_and_text.open("r") as f:
expected_elements_json = json.load(f)
Expand Down
21 changes: 21 additions & 0 deletions tests/accuracy/tables/test_selected_filings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pathlib import Path

import yaml

from tests.utils import traverse_repository_for_filings

DEFAULT_YAML = Path(__file__).parent / "selected-filings.yaml"


def test_filings_exist():
with DEFAULT_YAML.open("r") as file:
selected_filings = yaml.safe_load(file)
accession_numbers = list(selected_filings["accession_numbers"])
assert accession_numbers, "No accession numbers found in YAML file."

existing_numbers = {
report.accession_number for report in traverse_repository_for_filings()
}

for accession_number in accession_numbers:
assert accession_number in existing_numbers, f"Missing {accession_number}"
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ def pytest_addoption(parser):
default=False,
help="Print the output of the tests to the console.",
)
parser.addoption(
"--create-missing-files",
action="store_true",
default=False,
help="Create missing files.",
)


@pytest.fixture(scope="session")
Expand Down
17 changes: 16 additions & 1 deletion tests/snapshot/manage_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ def manage_snapshots(
generation_results: list[OverwriteResult] = []
items_not_matching_filters_count = 0
processed_documents = 0
for report_detail in traverse_repository_for_filings(Path(data_dir)):
reports = list(traverse_repository_for_filings(Path(data_dir)))
if accession_numbers:
test_filings_exist(accession_numbers)
for report_detail in reports:
if (
(report_detail.document_type not in document_types)
and (report_detail.company_name not in company_names)
Expand Down Expand Up @@ -280,3 +283,15 @@ def diff_lines(expected, actual, identifier, verbose):
unexpected_count += 1
line_number_actual += 1
return missing_count, unexpected_count, diff_output.strip()


def test_filings_exist(accession_numbers):
accession_numbers = list(accession_numbers)
assert accession_numbers, "No accession numbers found in YAML file."

existing_numbers = {
report.accession_number for report in traverse_repository_for_filings()
}

for accession_number in accession_numbers:
assert accession_number in existing_numbers, f"Missing {accession_number}"
7 changes: 5 additions & 2 deletions tests/snapshot/selected-filings.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
accession_numbers:
- 0000320193-23-000077 # 10-Q AAPL Apple
- 0001652044-23-000070 # 10-Q GOOG Alphabet
- 0000320193-23-000077 # 10-Q AAPL Apple Inc. 2023-08-04
- 0000950170-23-014423 # 10-Q MSFT Microsoft Corp 2023-04-25
- 0001652044-23-000094 # 10-Q GOOG Alphabet Inc. 2023-10-25
- 0001652044-23-000070 # 10-Q GOOG Alphabet Inc. 2023-07-26
- 0001326801-19-000037 # 10-Q META Meta Platforms Inc. 2019-04-25
7 changes: 7 additions & 0 deletions tests/snapshot/test_selected_filings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pathlib import Path

import yaml

from tests.utils import traverse_repository_for_filings

DEFAULT_YAML = Path(__file__).parent / "selected-filings.yaml"

0 comments on commit aad0261

Please sign in to comment.