Skip to content

Commit

Permalink
feat(pdf): Test Result PDF report
Browse files Browse the repository at this point in the history
  • Loading branch information
rboni-dk committed Sep 30, 2024
1 parent 4ead21b commit dea50a0
Show file tree
Hide file tree
Showing 8 changed files with 418 additions and 214 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ dependencies = [
"concurrent_log_handler==0.9.25",
"cryptography==42.0.8",
"validators==0.33.0",
"reportlab==4.2.2",
]

[project.optional-dependencies]
Expand Down
34 changes: 34 additions & 0 deletions testgen/ui/components/widgets/download_dialog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from collections.abc import Callable
from typing import Any

import streamlit as st


def download_dialog(
dialog_title: str,
file_name: str,
mime_type: str,
file_content_func: Callable[[], Any],
):
"""Wrapping a dialog and a download button together to allow generating the file contents only when needed."""

def _dialog_content():
# Encapsulating the dialog content in a container just to force its height and avoid the dialog to
# have its height changed when the button is rendered.
with st.container(height=55, border=False):
spinner_col, button_col, _ = st.columns([.3, .4, .3])

with spinner_col:
with st.spinner(text="Generating file..."):
data = file_content_func()

with button_col:
st.download_button(
label=":material/download: Download",
data=data,
file_name=file_name,
mime=mime_type,
use_container_width=True
)

return st.dialog(title=dialog_title, width="small")(_dialog_content)()
Empty file added testgen/ui/pdf/__init__.py
Empty file.
130 changes: 130 additions & 0 deletions testgen/ui/pdf/test_result_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from reportlab.lib import enums
from reportlab.lib.styles import ParagraphStyle
from reportlab.platypus import Paragraph, SimpleDocTemplate, Table, TableStyle

from testgen.ui.services.database_service import get_schema
from testgen.ui.services.test_results_service import (
do_source_data_lookup,
do_source_data_lookup_custom,
get_test_result_history,
)

PARA_STYLE_DEFAULT = ParagraphStyle(
"default",
fontSize=8,
)

PARA_STYLE_INFO = PARA_STYLE_DEFAULT


PARA_STYLE_ERROR = PARA_STYLE_DEFAULT


PARA_STYLE_MONO = ParagraphStyle(
"heading_1",
PARA_STYLE_DEFAULT,

)


PARA_STYLE_H1 = ParagraphStyle(
"heading_1",
PARA_STYLE_DEFAULT,
fontSize=12,
leading=16,
)

PARA_STYLE_TITLE = ParagraphStyle(
"title",
PARA_STYLE_DEFAULT,
fontSize=18,
leading=30,
alignment=enums.TA_CENTER,
)

TABLE_STYLE_SUMMARY = TableStyle(
(
# All cells
("ALIGN", (0, 0), (-1, -1), "LEFT"),
("VALIGN", (0, 0), (-1, -1), "TOP"),
("FONT", (0, 0), (-1, -1), "Helvetica", 7),

# Header
("FONT", (0, 0), (0, -1), "Helvetica-Bold"),
("ALIGN", (0, 0), (0, -1), "RIGHT"),
)
)

def get_report_content(tr_data):

yield Paragraph(f"TestGen Issue Report: {tr_data['result_status']}", PARA_STYLE_TITLE)

yield Paragraph("Summary", PARA_STYLE_H1)

summary_table_data = [
("Date", tr_data["test_date"]),
("Database/Schema", tr_data["schema_name"]),
("Table", tr_data["table_name"]),
("Column", tr_data["column_names"]),
("Table Group", tr_data["table_groups_name"]),
("Test Suite", tr_data["test_suite"]),
("Issue Type", "Test Result"),
("Risk Level", tr_data["severity"]),
("Data Quality Dimension", tr_data["dq_dimension"]),
("Test", f"""{tr_data["test_name_short"]}: {tr_data["test_name_long"]}\n{tr_data["test_description"]}"""),
("Result Measure", tr_data["result_measure"]),
("Threshold Value", f"""{tr_data["threshold_value"]} {tr_data["threshold_description"]}"""),
]
if tr_data["measure_uom_description"]:
summary_table_data.append(("Units", tr_data["measure_uom_description"]))

yield Table(summary_table_data, style=TABLE_STYLE_SUMMARY, hAlign="LEFT")

yield Paragraph("Usage Notes", PARA_STYLE_H1)
yield Paragraph(tr_data["usage_notes"], PARA_STYLE_DEFAULT)

yield Paragraph("Result History", PARA_STYLE_H1)

history_data = get_test_result_history(get_schema(), tr_data)

history_table_data = [
(r["test_date"], r["threshold_value"], r["result_measure"], r["result_status"])
for _, r in history_data.iterrows()
]

yield Table(history_table_data)

yield Paragraph("Sample Data", PARA_STYLE_H1)

if tr_data["test_type"] == "CUSTOM":
bad_data_status, bad_data_msg, lookup_query, sample_data = do_source_data_lookup_custom(get_schema(), tr_data)
else:
bad_data_status, bad_data_msg, lookup_query, sample_data = do_source_data_lookup(get_schema(), tr_data)
if bad_data_status in {"ND", "NA"}:
yield Paragraph(bad_data_msg, style=PARA_STYLE_INFO)
elif bad_data_status == "ERR":
yield Paragraph(bad_data_msg, style=PARA_STYLE_ERROR)
elif sample_data is None:
yield Paragraph("An unknown error was encountered.", style=PARA_STYLE_ERROR)
else:
if bad_data_msg:
yield Paragraph(bad_data_msg, style=PARA_STYLE_DEFAULT)

sample_data.fillna("[NULL]", inplace=True)

yield Table(
(
[col.replace("_", " ").title() for col in sample_data.columns],
*(data for _, data in sample_data.iterrows()),
)
)


yield Paragraph("SQL Query", PARA_STYLE_H1)

yield Paragraph(lookup_query, PARA_STYLE_MONO)


def create_report(filename, test_result_id):
doc = SimpleDocTemplate(filename)
doc.build(flowables=list(get_report_content(test_result_id)))
2 changes: 1 addition & 1 deletion testgen/ui/services/form_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import pandas as pd
import streamlit as st
import validators
from attrs import validators
from pandas.api.types import is_datetime64_any_dtype
from st_aggrid import AgGrid, ColumnsAutoSizeMode, DataReturnMode, GridOptionsBuilder, GridUpdateMode, JsCode
from streamlit_extras.no_default_selectbox import selectbox
Expand Down
21 changes: 21 additions & 0 deletions testgen/ui/services/test_definition_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,27 @@ def get_test_definitions(
)


def get_test_definition(db_schema, test_def_id):
str_sql = f"""
SELECT d.id::VARCHAR, tt.test_name_short as test_name, tt.test_name_long as full_name,
tt.test_description as description, tt.usage_notes,
d.column_name,
d.baseline_value, d.baseline_ct, d.baseline_avg, d.baseline_sd, d.threshold_value,
d.subset_condition, d.groupby_names, d.having_condition, d.match_schema_name,
d.match_table_name, d.match_column_names, d.match_subset_condition,
d.match_groupby_names, d.match_having_condition,
d.window_date_column, d.window_days::VARCHAR as window_days,
d.custom_query,
d.severity, tt.default_severity,
d.test_active, d.lock_refresh, d.last_manual_update
FROM {db_schema}.test_definitions d
INNER JOIN {db_schema}.test_types tt
ON (d.test_type = tt.test_type)
WHERE d.id = '{test_def_id}';
"""
return database_service.retrieve_data(str_sql)


def delete(test_definition_ids, dry_run=False):
schema = st.session_state["dbschema"]
usage_result = test_definition_queries.get_test_definition_usage(schema, test_definition_ids)
Expand Down
184 changes: 184 additions & 0 deletions testgen/ui/services/test_results_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import pandas as pd

from testgen.common import ConcatColumnList
from testgen.ui.services import database_service as db
from testgen.ui.services.string_service import empty_if_null
from testgen.ui.services.test_definition_service import get_test_definition


def get_test_result_history(db_schema, tr_data):
if tr_data["auto_gen"]:
str_where = f"""
WHERE test_suite_id = '{tr_data["test_suite_id"]}'
AND table_name = '{tr_data["table_name"]}'
AND column_names = '{tr_data["column_names"]}'
AND test_type = '{tr_data["test_type"]}'
AND auto_gen = TRUE
"""
else:
str_where = f"""
WHERE test_definition_id_runtime = '{tr_data["test_definition_id_runtime"]}'
"""

str_sql = f"""
SELECT test_date, test_type,
test_name_short, test_name_long, measure_uom, test_operator,
threshold_value::NUMERIC, result_measure, result_status
FROM {db_schema}.v_test_results {str_where}
ORDER BY test_date DESC;
"""

df = db.retrieve_data(str_sql)
# Clean Up
df["test_date"] = pd.to_datetime(df["test_date"])

return df


def do_source_data_lookup_custom(db_schema, tr_data):
# Define the query
str_sql = f"""
SELECT d.custom_query as lookup_query, tg.table_group_schema, c.project_qc_schema,
c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted,
c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase
FROM {db_schema}.test_definitions d
INNER JOIN {db_schema}.table_groups tg
ON ('{tr_data["table_groups_id"]}'::UUID = tg.id)
INNER JOIN {db_schema}.connections c
ON (tg.connection_id = c.connection_id)
WHERE d.id = '{tr_data["test_definition_id_current"]}';
"""

try:
# Retrieve SQL for customer lookup
lst_query = db.retrieve_data_list(str_sql)

# Retrieve and return data as df
if lst_query:
str_sql = lst_query[0]["lookup_query"]
str_sql = str_sql.replace("{DATA_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"]))
df = db.retrieve_target_db_df(
lst_query[0]["sql_flavor"],
lst_query[0]["project_host"],
lst_query[0]["project_port"],
lst_query[0]["project_db"],
lst_query[0]["project_user"],
lst_query[0]["project_pw_encrypted"],
str_sql,
lst_query[0]["url"],
lst_query[0]["connect_by_url"],
lst_query[0]["connect_by_key"],
lst_query[0]["private_key"],
lst_query[0]["private_key_passphrase"],
)
if df.empty:
return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
else:
return "OK", None, str_sql, df
else:
return "NA", "A source data lookup for this Test is not available.", None, None

except Exception as e:
return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}", str_sql, None


def do_source_data_lookup(db_schema, tr_data, sql_only=False):
# Define the query
str_sql = f"""
SELECT t.lookup_query, tg.table_group_schema, c.project_qc_schema,
c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted,
c.url, c.connect_by_url,
c.connect_by_key, c.private_key, c.private_key_passphrase
FROM {db_schema}.target_data_lookups t
INNER JOIN {db_schema}.table_groups tg
ON ('{tr_data["table_groups_id"]}'::UUID = tg.id)
INNER JOIN {db_schema}.connections c
ON (tg.connection_id = c.connection_id)
AND (t.sql_flavor = c.sql_flavor)
WHERE t.error_type = 'Test Results'
AND t.test_id = '{tr_data["test_type_id"]}'
AND t.lookup_query > '';
"""

def replace_parms(df_test, str_query):
if df_test.empty:
raise ValueError("This test definition is no longer present.")

str_query = str_query.replace("{TARGET_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"]))
str_query = str_query.replace("{TABLE_NAME}", empty_if_null(tr_data["table_name"]))
str_query = str_query.replace("{COLUMN_NAME}", empty_if_null(tr_data["column_names"]))
str_query = str_query.replace("{DATA_QC_SCHEMA}", empty_if_null(lst_query[0]["project_qc_schema"]))
str_query = str_query.replace("{TEST_DATE}", str(empty_if_null(tr_data["test_date"])))

str_query = str_query.replace("{CUSTOM_QUERY}", empty_if_null(df_test.at[0, "custom_query"]))
str_query = str_query.replace("{BASELINE_VALUE}", empty_if_null(df_test.at[0, "baseline_value"]))
str_query = str_query.replace("{BASELINE_CT}", empty_if_null(df_test.at[0, "baseline_ct"]))
str_query = str_query.replace("{BASELINE_AVG}", empty_if_null(df_test.at[0, "baseline_avg"]))
str_query = str_query.replace("{BASELINE_SD}", empty_if_null(df_test.at[0, "baseline_sd"]))
str_query = str_query.replace("{THRESHOLD_VALUE}", empty_if_null(df_test.at[0, "threshold_value"]))

str_substitute = empty_if_null(df_test.at[0, "subset_condition"])
str_substitute = "1=1" if str_substitute == "" else str_substitute
str_query = str_query.replace("{SUBSET_CONDITION}", str_substitute)

str_query = str_query.replace("{GROUPBY_NAMES}", empty_if_null(df_test.at[0, "groupby_names"]))
str_query = str_query.replace("{HAVING_CONDITION}", empty_if_null(df_test.at[0, "having_condition"]))
str_query = str_query.replace("{MATCH_SCHEMA_NAME}", empty_if_null(df_test.at[0, "match_schema_name"]))
str_query = str_query.replace("{MATCH_TABLE_NAME}", empty_if_null(df_test.at[0, "match_table_name"]))
str_query = str_query.replace("{MATCH_COLUMN_NAMES}", empty_if_null(df_test.at[0, "match_column_names"]))

str_substitute = empty_if_null(df_test.at[0, "match_subset_condition"])
str_substitute = "1=1" if str_substitute == "" else str_substitute
str_query = str_query.replace("{MATCH_SUBSET_CONDITION}", str_substitute)

str_query = str_query.replace("{MATCH_GROUPBY_NAMES}", empty_if_null(df_test.at[0, "match_groupby_names"]))
str_query = str_query.replace("{MATCH_HAVING_CONDITION}", empty_if_null(df_test.at[0, "match_having_condition"]))
str_query = str_query.replace("{COLUMN_NAME_NO_QUOTES}", empty_if_null(tr_data["column_names"]))

str_query = str_query.replace("{WINDOW_DATE_COLUMN}", empty_if_null(df_test.at[0, "window_date_column"]))
str_query = str_query.replace("{WINDOW_DAYS}", empty_if_null(df_test.at[0, "window_days"]))

str_substitute = ConcatColumnList(tr_data["column_names"], "<NULL>")
str_query = str_query.replace("{CONCAT_COLUMNS}", str_substitute)
str_substitute = ConcatColumnList(df_test.at[0, "match_groupby_names"], "<NULL>")
str_query = str_query.replace("{CONCAT_MATCH_GROUPBY}", str_substitute)

if str_query is None or str_query == "":
raise ValueError("Lookup query is not defined for this Test Type.")
return str_query

try:
# Retrieve SQL for customer lookup
lst_query = db.retrieve_data_list(str_sql)

if sql_only:
return lst_query, replace_parms, None

# Retrieve and return data as df
if lst_query:
df_test = get_test_definition(db_schema, tr_data["test_definition_id_current"])

str_sql = replace_parms(df_test, lst_query[0]["lookup_query"])
df = db.retrieve_target_db_df(
lst_query[0]["sql_flavor"],
lst_query[0]["project_host"],
lst_query[0]["project_port"],
lst_query[0]["project_db"],
lst_query[0]["project_user"],
lst_query[0]["project_pw_encrypted"],
str_sql,
lst_query[0]["url"],
lst_query[0]["connect_by_url"],
lst_query[0]["connect_by_key"],
lst_query[0]["private_key"],
lst_query[0]["private_key_passphrase"],
)
if df.empty:
return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
else:
return "OK", None, str_sql, df
else:
return "NA", "A source data lookup for this Test is not available.", None, None

except Exception as e:
return "ERR", f"Source data lookup query caused:\n\n{e.args[0]}", str_sql, None
Loading

0 comments on commit dea50a0

Please sign in to comment.