diff --git a/pyproject.toml b/pyproject.toml index 6e06b0d..038d591 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ dependencies = [ "concurrent_log_handler==0.9.25", "cryptography==42.0.8", "validators==0.33.0", + "reportlab==4.2.2", ] [project.optional-dependencies] diff --git a/testgen/ui/components/widgets/download_dialog.py b/testgen/ui/components/widgets/download_dialog.py new file mode 100644 index 0000000..34ec928 --- /dev/null +++ b/testgen/ui/components/widgets/download_dialog.py @@ -0,0 +1,34 @@ +from collections.abc import Callable +from typing import Any + +import streamlit as st + + +def download_dialog( + dialog_title: str, + file_name: str, + mime_type: str, + file_content_func: Callable[[], Any], +): + """Wrapping a dialog and a download button together to allow generating the file contents only when needed.""" + + def _dialog_content(): + # Encapsulating the dialog content in a container just to force its height and avoid the dialog to + # have its height changed when the button is rendered. + with st.container(height=55, border=False): + spinner_col, button_col, _ = st.columns([.3, .4, .3]) + + with spinner_col: + with st.spinner(text="Generating file..."): + data = file_content_func() + + with button_col: + st.download_button( + label=":material/download: Download", + data=data, + file_name=file_name, + mime=mime_type, + use_container_width=True + ) + + return st.dialog(title=dialog_title, width="small")(_dialog_content)() diff --git a/testgen/ui/pdf/__init__.py b/testgen/ui/pdf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py new file mode 100644 index 0000000..464beaa --- /dev/null +++ b/testgen/ui/pdf/test_result_report.py @@ -0,0 +1,130 @@ +from reportlab.lib import enums +from reportlab.lib.styles import ParagraphStyle +from reportlab.platypus import Paragraph, SimpleDocTemplate, Table, TableStyle + +from testgen.ui.services.database_service import get_schema +from testgen.ui.services.test_results_service import ( + do_source_data_lookup, + do_source_data_lookup_custom, + get_test_result_history, +) + +PARA_STYLE_DEFAULT = ParagraphStyle( + "default", + fontSize=8, +) + +PARA_STYLE_INFO = PARA_STYLE_DEFAULT + + +PARA_STYLE_ERROR = PARA_STYLE_DEFAULT + + +PARA_STYLE_MONO = ParagraphStyle( + "heading_1", + PARA_STYLE_DEFAULT, + +) + + +PARA_STYLE_H1 = ParagraphStyle( + "heading_1", + PARA_STYLE_DEFAULT, + fontSize=12, + leading=16, +) + +PARA_STYLE_TITLE = ParagraphStyle( + "title", + PARA_STYLE_DEFAULT, + fontSize=18, + leading=30, + alignment=enums.TA_CENTER, +) + +TABLE_STYLE_SUMMARY = TableStyle( + ( + # All cells + ("ALIGN", (0, 0), (-1, -1), "LEFT"), + ("VALIGN", (0, 0), (-1, -1), "TOP"), + ("FONT", (0, 0), (-1, -1), "Helvetica", 7), + + # Header + ("FONT", (0, 0), (0, -1), "Helvetica-Bold"), + ("ALIGN", (0, 0), (0, -1), "RIGHT"), + ) +) + +def get_report_content(tr_data): + + yield Paragraph(f"TestGen Issue Report: {tr_data['result_status']}", PARA_STYLE_TITLE) + + yield Paragraph("Summary", PARA_STYLE_H1) + + summary_table_data = [ + ("Date", tr_data["test_date"]), + ("Database/Schema", tr_data["schema_name"]), + ("Table", tr_data["table_name"]), + ("Column", tr_data["column_names"]), + ("Table Group", tr_data["table_groups_name"]), + ("Test Suite", tr_data["test_suite"]), + ("Issue Type", "Test Result"), + ("Risk Level", tr_data["severity"]), + ("Data Quality Dimension", tr_data["dq_dimension"]), + ("Test", f"""{tr_data["test_name_short"]}: {tr_data["test_name_long"]}\n{tr_data["test_description"]}"""), + ("Result Measure", tr_data["result_measure"]), + ("Threshold Value", f"""{tr_data["threshold_value"]} {tr_data["threshold_description"]}"""), + ] + if tr_data["measure_uom_description"]: + summary_table_data.append(("Units", tr_data["measure_uom_description"])) + + yield Table(summary_table_data, style=TABLE_STYLE_SUMMARY, hAlign="LEFT") + + yield Paragraph("Usage Notes", PARA_STYLE_H1) + yield Paragraph(tr_data["usage_notes"], PARA_STYLE_DEFAULT) + + yield Paragraph("Result History", PARA_STYLE_H1) + + history_data = get_test_result_history(get_schema(), tr_data) + + history_table_data = [ + (r["test_date"], r["threshold_value"], r["result_measure"], r["result_status"]) + for _, r in history_data.iterrows() + ] + + yield Table(history_table_data) + + yield Paragraph("Sample Data", PARA_STYLE_H1) + + if tr_data["test_type"] == "CUSTOM": + bad_data_status, bad_data_msg, lookup_query, sample_data = do_source_data_lookup_custom(get_schema(), tr_data) + else: + bad_data_status, bad_data_msg, lookup_query, sample_data = do_source_data_lookup(get_schema(), tr_data) + if bad_data_status in {"ND", "NA"}: + yield Paragraph(bad_data_msg, style=PARA_STYLE_INFO) + elif bad_data_status == "ERR": + yield Paragraph(bad_data_msg, style=PARA_STYLE_ERROR) + elif sample_data is None: + yield Paragraph("An unknown error was encountered.", style=PARA_STYLE_ERROR) + else: + if bad_data_msg: + yield Paragraph(bad_data_msg, style=PARA_STYLE_DEFAULT) + + sample_data.fillna("[NULL]", inplace=True) + + yield Table( + ( + [col.replace("_", " ").title() for col in sample_data.columns], + *(data for _, data in sample_data.iterrows()), + ) + ) + + + yield Paragraph("SQL Query", PARA_STYLE_H1) + + yield Paragraph(lookup_query, PARA_STYLE_MONO) + + +def create_report(filename, test_result_id): + doc = SimpleDocTemplate(filename) + doc.build(flowables=list(get_report_content(test_result_id))) diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index ba07527..819c81d 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -11,7 +11,7 @@ import pandas as pd import streamlit as st -import validators +from attrs import validators from pandas.api.types import is_datetime64_any_dtype from st_aggrid import AgGrid, ColumnsAutoSizeMode, DataReturnMode, GridOptionsBuilder, GridUpdateMode, JsCode from streamlit_extras.no_default_selectbox import selectbox diff --git a/testgen/ui/services/test_definition_service.py b/testgen/ui/services/test_definition_service.py index 3d7d64b..d8315cd 100644 --- a/testgen/ui/services/test_definition_service.py +++ b/testgen/ui/services/test_definition_service.py @@ -22,6 +22,27 @@ def get_test_definitions( ) +def get_test_definition(db_schema, test_def_id): + str_sql = f""" + SELECT d.id::VARCHAR, tt.test_name_short as test_name, tt.test_name_long as full_name, + tt.test_description as description, tt.usage_notes, + d.column_name, + d.baseline_value, d.baseline_ct, d.baseline_avg, d.baseline_sd, d.threshold_value, + d.subset_condition, d.groupby_names, d.having_condition, d.match_schema_name, + d.match_table_name, d.match_column_names, d.match_subset_condition, + d.match_groupby_names, d.match_having_condition, + d.window_date_column, d.window_days::VARCHAR as window_days, + d.custom_query, + d.severity, tt.default_severity, + d.test_active, d.lock_refresh, d.last_manual_update + FROM {db_schema}.test_definitions d + INNER JOIN {db_schema}.test_types tt + ON (d.test_type = tt.test_type) + WHERE d.id = '{test_def_id}'; + """ + return database_service.retrieve_data(str_sql) + + def delete(test_definition_ids, dry_run=False): schema = st.session_state["dbschema"] usage_result = test_definition_queries.get_test_definition_usage(schema, test_definition_ids) diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py new file mode 100644 index 0000000..e64ef0c --- /dev/null +++ b/testgen/ui/services/test_results_service.py @@ -0,0 +1,184 @@ +import pandas as pd + +from testgen.common import ConcatColumnList +from testgen.ui.services import database_service as db +from testgen.ui.services.string_service import empty_if_null +from testgen.ui.services.test_definition_service import get_test_definition + + +def get_test_result_history(db_schema, tr_data): + if tr_data["auto_gen"]: + str_where = f""" + WHERE test_suite_id = '{tr_data["test_suite_id"]}' + AND table_name = '{tr_data["table_name"]}' + AND column_names = '{tr_data["column_names"]}' + AND test_type = '{tr_data["test_type"]}' + AND auto_gen = TRUE + """ + else: + str_where = f""" + WHERE test_definition_id_runtime = '{tr_data["test_definition_id_runtime"]}' + """ + + str_sql = f""" + SELECT test_date, test_type, + test_name_short, test_name_long, measure_uom, test_operator, + threshold_value::NUMERIC, result_measure, result_status + FROM {db_schema}.v_test_results {str_where} + ORDER BY test_date DESC; + """ + + df = db.retrieve_data(str_sql) + # Clean Up + df["test_date"] = pd.to_datetime(df["test_date"]) + + return df + + +def do_source_data_lookup_custom(db_schema, tr_data): + # Define the query + str_sql = f""" + SELECT d.custom_query as lookup_query, tg.table_group_schema, c.project_qc_schema, + c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, + c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase + FROM {db_schema}.test_definitions d + INNER JOIN {db_schema}.table_groups tg + ON ('{tr_data["table_groups_id"]}'::UUID = tg.id) + INNER JOIN {db_schema}.connections c + ON (tg.connection_id = c.connection_id) + WHERE d.id = '{tr_data["test_definition_id_current"]}'; + """ + + try: + # Retrieve SQL for customer lookup + lst_query = db.retrieve_data_list(str_sql) + + # Retrieve and return data as df + if lst_query: + str_sql = lst_query[0]["lookup_query"] + str_sql = str_sql.replace("{DATA_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"])) + df = db.retrieve_target_db_df( + lst_query[0]["sql_flavor"], + lst_query[0]["project_host"], + lst_query[0]["project_port"], + lst_query[0]["project_db"], + lst_query[0]["project_user"], + lst_query[0]["project_pw_encrypted"], + str_sql, + lst_query[0]["url"], + lst_query[0]["connect_by_url"], + lst_query[0]["connect_by_key"], + lst_query[0]["private_key"], + lst_query[0]["private_key_passphrase"], + ) + if df.empty: + return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None + else: + return "OK", None, str_sql, df + else: + return "NA", "A source data lookup for this Test is not available.", None, None + + except Exception as e: + return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}", str_sql, None + + +def do_source_data_lookup(db_schema, tr_data, sql_only=False): + # Define the query + str_sql = f""" + SELECT t.lookup_query, tg.table_group_schema, c.project_qc_schema, + c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, + c.url, c.connect_by_url, + c.connect_by_key, c.private_key, c.private_key_passphrase + FROM {db_schema}.target_data_lookups t + INNER JOIN {db_schema}.table_groups tg + ON ('{tr_data["table_groups_id"]}'::UUID = tg.id) + INNER JOIN {db_schema}.connections c + ON (tg.connection_id = c.connection_id) + AND (t.sql_flavor = c.sql_flavor) + WHERE t.error_type = 'Test Results' + AND t.test_id = '{tr_data["test_type_id"]}' + AND t.lookup_query > ''; + """ + + def replace_parms(df_test, str_query): + if df_test.empty: + raise ValueError("This test definition is no longer present.") + + str_query = str_query.replace("{TARGET_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"])) + str_query = str_query.replace("{TABLE_NAME}", empty_if_null(tr_data["table_name"])) + str_query = str_query.replace("{COLUMN_NAME}", empty_if_null(tr_data["column_names"])) + str_query = str_query.replace("{DATA_QC_SCHEMA}", empty_if_null(lst_query[0]["project_qc_schema"])) + str_query = str_query.replace("{TEST_DATE}", str(empty_if_null(tr_data["test_date"]))) + + str_query = str_query.replace("{CUSTOM_QUERY}", empty_if_null(df_test.at[0, "custom_query"])) + str_query = str_query.replace("{BASELINE_VALUE}", empty_if_null(df_test.at[0, "baseline_value"])) + str_query = str_query.replace("{BASELINE_CT}", empty_if_null(df_test.at[0, "baseline_ct"])) + str_query = str_query.replace("{BASELINE_AVG}", empty_if_null(df_test.at[0, "baseline_avg"])) + str_query = str_query.replace("{BASELINE_SD}", empty_if_null(df_test.at[0, "baseline_sd"])) + str_query = str_query.replace("{THRESHOLD_VALUE}", empty_if_null(df_test.at[0, "threshold_value"])) + + str_substitute = empty_if_null(df_test.at[0, "subset_condition"]) + str_substitute = "1=1" if str_substitute == "" else str_substitute + str_query = str_query.replace("{SUBSET_CONDITION}", str_substitute) + + str_query = str_query.replace("{GROUPBY_NAMES}", empty_if_null(df_test.at[0, "groupby_names"])) + str_query = str_query.replace("{HAVING_CONDITION}", empty_if_null(df_test.at[0, "having_condition"])) + str_query = str_query.replace("{MATCH_SCHEMA_NAME}", empty_if_null(df_test.at[0, "match_schema_name"])) + str_query = str_query.replace("{MATCH_TABLE_NAME}", empty_if_null(df_test.at[0, "match_table_name"])) + str_query = str_query.replace("{MATCH_COLUMN_NAMES}", empty_if_null(df_test.at[0, "match_column_names"])) + + str_substitute = empty_if_null(df_test.at[0, "match_subset_condition"]) + str_substitute = "1=1" if str_substitute == "" else str_substitute + str_query = str_query.replace("{MATCH_SUBSET_CONDITION}", str_substitute) + + str_query = str_query.replace("{MATCH_GROUPBY_NAMES}", empty_if_null(df_test.at[0, "match_groupby_names"])) + str_query = str_query.replace("{MATCH_HAVING_CONDITION}", empty_if_null(df_test.at[0, "match_having_condition"])) + str_query = str_query.replace("{COLUMN_NAME_NO_QUOTES}", empty_if_null(tr_data["column_names"])) + + str_query = str_query.replace("{WINDOW_DATE_COLUMN}", empty_if_null(df_test.at[0, "window_date_column"])) + str_query = str_query.replace("{WINDOW_DAYS}", empty_if_null(df_test.at[0, "window_days"])) + + str_substitute = ConcatColumnList(tr_data["column_names"], "") + str_query = str_query.replace("{CONCAT_COLUMNS}", str_substitute) + str_substitute = ConcatColumnList(df_test.at[0, "match_groupby_names"], "") + str_query = str_query.replace("{CONCAT_MATCH_GROUPBY}", str_substitute) + + if str_query is None or str_query == "": + raise ValueError("Lookup query is not defined for this Test Type.") + return str_query + + try: + # Retrieve SQL for customer lookup + lst_query = db.retrieve_data_list(str_sql) + + if sql_only: + return lst_query, replace_parms, None + + # Retrieve and return data as df + if lst_query: + df_test = get_test_definition(db_schema, tr_data["test_definition_id_current"]) + + str_sql = replace_parms(df_test, lst_query[0]["lookup_query"]) + df = db.retrieve_target_db_df( + lst_query[0]["sql_flavor"], + lst_query[0]["project_host"], + lst_query[0]["project_port"], + lst_query[0]["project_db"], + lst_query[0]["project_user"], + lst_query[0]["project_pw_encrypted"], + str_sql, + lst_query[0]["url"], + lst_query[0]["connect_by_url"], + lst_query[0]["connect_by_key"], + lst_query[0]["private_key"], + lst_query[0]["private_key_passphrase"], + ) + if df.empty: + return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None + else: + return "OK", None, str_sql, df + else: + return "NA", "A source data lookup for this Test is not available.", None, None + + except Exception as e: + return "ERR", f"Source data lookup query caused:\n\n{e.args[0]}", str_sql, None diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 2101359..6b86ea6 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -1,3 +1,4 @@ +import tempfile import typing from datetime import date @@ -9,11 +10,25 @@ import testgen.ui.services.database_service as db import testgen.ui.services.form_service as fm import testgen.ui.services.query_service as dq -from testgen.common import ConcatColumnList, date_service +from testgen.common import date_service from testgen.ui.components import widgets as testgen +from testgen.ui.components.widgets.download_dialog import download_dialog from testgen.ui.navigation.page import Page +from testgen.ui.pdf.test_result_report import create_report from testgen.ui.services import authentication_service, project_service from testgen.ui.services.string_service import empty_if_null +from testgen.ui.services.test_definition_service import ( + get_test_definition as get_test_definition_uncached, +) +from testgen.ui.services.test_results_service import ( + do_source_data_lookup as do_source_data_lookup_uncached, +) +from testgen.ui.services.test_results_service import ( + do_source_data_lookup_custom as do_source_data_lookup_custom_uncached, +) +from testgen.ui.services.test_results_service import ( + get_test_result_history as get_test_result_history_uncached, +) from testgen.ui.session import session from testgen.ui.views.profiling_modal import view_profiling_button from testgen.ui.views.test_definitions import show_test_form_by_id @@ -200,6 +215,7 @@ def get_test_results_uncached(str_schema, str_run_id, str_sel_test_status, test_ r.schema_name, r.column_names, r.test_time::DATE as test_date, r.test_type, tt.id as test_type_id, tt.test_name_short, tt.test_name_long, r.test_description, tt.measure_uom, tt.measure_uom_description, c.test_operator, r.threshold_value::NUMERIC(16, 5), r.result_measure::NUMERIC(16, 5), r.result_status, + tt.threshold_description, tt.usage_notes, -- These are used in the PDF report CASE WHEN r.result_code <> 1 THEN r.disposition ELSE 'Passed' @@ -333,221 +349,28 @@ def get_test_result_summary(run_id): ] -@st.cache_data(show_spinner=ALWAYS_SPIN) -def get_test_result_history(str_test_type, str_test_suite_id, str_table_name, str_column_names, - str_test_definition_id, auto_gen): - str_schema = st.session_state["dbschema"] - - if auto_gen: - str_where = f""" - WHERE test_suite_id = '{str_test_suite_id}' - AND table_name = '{str_table_name}' - AND column_names = '{str_column_names}' - AND test_type = '{str_test_type}' - AND auto_gen = TRUE - """ - else: - str_where = f""" - WHERE test_definition_id_runtime = '{str_test_definition_id}' - """ - - str_sql = f""" - SELECT test_date, test_type, - test_name_short, test_name_long, measure_uom, test_operator, - threshold_value::NUMERIC, result_measure, result_status - FROM {str_schema}.v_test_results {str_where} - ORDER BY test_date DESC; - """ - - df = db.retrieve_data(str_sql) - # Clean Up - df["test_date"] = pd.to_datetime(df["test_date"]) - - return df - - @st.cache_data(show_spinner=ALWAYS_SPIN) def get_test_definition(str_test_def_id): str_schema = st.session_state["dbschema"] return get_test_definition_uncached(str_schema, str_test_def_id) -def get_test_definition_uncached(str_schema, str_test_def_id): - str_sql = f""" - SELECT d.id::VARCHAR, tt.test_name_short as test_name, tt.test_name_long as full_name, - tt.test_description as description, tt.usage_notes, - d.column_name, - d.baseline_value, d.baseline_ct, d.baseline_avg, d.baseline_sd, d.threshold_value, - d.subset_condition, d.groupby_names, d.having_condition, d.match_schema_name, - d.match_table_name, d.match_column_names, d.match_subset_condition, - d.match_groupby_names, d.match_having_condition, - d.window_date_column, d.window_days::VARCHAR as window_days, - d.custom_query, - d.severity, tt.default_severity, - d.test_active, d.lock_refresh, d.last_manual_update - FROM {str_schema}.test_definitions d - INNER JOIN {str_schema}.test_types tt - ON (d.test_type = tt.test_type) - WHERE d.id = '{str_test_def_id}'; - """ - return db.retrieve_data(str_sql) - - @st.cache_data(show_spinner=False) def do_source_data_lookup(selected_row): schema = st.session_state["dbschema"] return do_source_data_lookup_uncached(schema, selected_row) -def do_source_data_lookup_uncached(str_schema, selected_row, sql_only=False): - # Define the query - str_sql = f""" - SELECT t.lookup_query, tg.table_group_schema, c.project_qc_schema, - c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, - c.url, c.connect_by_url, - c.connect_by_key, c.private_key, c.private_key_passphrase - FROM {str_schema}.target_data_lookups t - INNER JOIN {str_schema}.table_groups tg - ON ('{selected_row["table_groups_id"]}'::UUID = tg.id) - INNER JOIN {str_schema}.connections c - ON (tg.connection_id = c.connection_id) - AND (t.sql_flavor = c.sql_flavor) - WHERE t.error_type = 'Test Results' - AND t.test_id = '{selected_row["test_type_id"]}' - AND t.lookup_query > ''; - """ - - def replace_parms(df_test, str_query): - if df_test.empty: - raise ValueError("This test definition is no longer present.") - - str_query = str_query.replace("{TARGET_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"])) - str_query = str_query.replace("{TABLE_NAME}", empty_if_null(selected_row["table_name"])) - str_query = str_query.replace("{COLUMN_NAME}", empty_if_null(selected_row["column_names"])) - str_query = str_query.replace("{DATA_QC_SCHEMA}", empty_if_null(lst_query[0]["project_qc_schema"])) - str_query = str_query.replace("{TEST_DATE}", str(empty_if_null(selected_row["test_date"]))) - - str_query = str_query.replace("{CUSTOM_QUERY}", empty_if_null(df_test.at[0, "custom_query"])) - str_query = str_query.replace("{BASELINE_VALUE}", empty_if_null(df_test.at[0, "baseline_value"])) - str_query = str_query.replace("{BASELINE_CT}", empty_if_null(df_test.at[0, "baseline_ct"])) - str_query = str_query.replace("{BASELINE_AVG}", empty_if_null(df_test.at[0, "baseline_avg"])) - str_query = str_query.replace("{BASELINE_SD}", empty_if_null(df_test.at[0, "baseline_sd"])) - str_query = str_query.replace("{THRESHOLD_VALUE}", empty_if_null(df_test.at[0, "threshold_value"])) - - str_substitute = empty_if_null(df_test.at[0, "subset_condition"]) - str_substitute = "1=1" if str_substitute == "" else str_substitute - str_query = str_query.replace("{SUBSET_CONDITION}", str_substitute) - - str_query = str_query.replace("{GROUPBY_NAMES}", empty_if_null(df_test.at[0, "groupby_names"])) - str_query = str_query.replace("{HAVING_CONDITION}", empty_if_null(df_test.at[0, "having_condition"])) - str_query = str_query.replace("{MATCH_SCHEMA_NAME}", empty_if_null(df_test.at[0, "match_schema_name"])) - str_query = str_query.replace("{MATCH_TABLE_NAME}", empty_if_null(df_test.at[0, "match_table_name"])) - str_query = str_query.replace("{MATCH_COLUMN_NAMES}", empty_if_null(df_test.at[0, "match_column_names"])) - - str_substitute = empty_if_null(df_test.at[0, "match_subset_condition"]) - str_substitute = "1=1" if str_substitute == "" else str_substitute - str_query = str_query.replace("{MATCH_SUBSET_CONDITION}", str_substitute) - - str_query = str_query.replace("{MATCH_GROUPBY_NAMES}", empty_if_null(df_test.at[0, "match_groupby_names"])) - str_query = str_query.replace("{MATCH_HAVING_CONDITION}", empty_if_null(df_test.at[0, "match_having_condition"])) - str_query = str_query.replace("{COLUMN_NAME_NO_QUOTES}", empty_if_null(selected_row["column_names"])) - - str_query = str_query.replace("{WINDOW_DATE_COLUMN}", empty_if_null(df_test.at[0, "window_date_column"])) - str_query = str_query.replace("{WINDOW_DAYS}", empty_if_null(df_test.at[0, "window_days"])) - - str_substitute = ConcatColumnList(selected_row["column_names"], "") - str_query = str_query.replace("{CONCAT_COLUMNS}", str_substitute) - str_substitute = ConcatColumnList(df_test.at[0, "match_groupby_names"], "") - str_query = str_query.replace("{CONCAT_MATCH_GROUPBY}", str_substitute) - - if str_query is None or str_query == "": - raise ValueError("Lookup query is not defined for this Test Type.") - return str_query - - try: - # Retrieve SQL for customer lookup - lst_query = db.retrieve_data_list(str_sql) - - if sql_only: - return lst_query, replace_parms, None - - # Retrieve and return data as df - if lst_query: - df_test = get_test_definition(selected_row["test_definition_id_current"]) - - str_sql = replace_parms(df_test, lst_query[0]["lookup_query"]) - df = db.retrieve_target_db_df( - lst_query[0]["sql_flavor"], - lst_query[0]["project_host"], - lst_query[0]["project_port"], - lst_query[0]["project_db"], - lst_query[0]["project_user"], - lst_query[0]["project_pw_encrypted"], - str_sql, - lst_query[0]["url"], - lst_query[0]["connect_by_url"], - lst_query[0]["connect_by_key"], - lst_query[0]["private_key"], - lst_query[0]["private_key_passphrase"], - ) - if df.empty: - return "ND", "Data that violates Test criteria is not present in the current dataset.", None - else: - return "OK", None, df - else: - return "NA", "A source data lookup for this Test is not available.", None - - except Exception as e: - return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}\n\n{str_sql}", None - - @st.cache_data(show_spinner=False) def do_source_data_lookup_custom(selected_row): - str_schema = st.session_state["dbschema"] - # Define the query - str_sql = f""" - SELECT d.custom_query as lookup_query, tg.table_group_schema, c.project_qc_schema, - c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, - c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase - FROM {str_schema}.test_definitions d - INNER JOIN {str_schema}.table_groups tg - ON ('{selected_row["table_groups_id"]}'::UUID = tg.id) - INNER JOIN {str_schema}.connections c - ON (tg.connection_id = c.connection_id) - WHERE d.id = '{selected_row["test_definition_id_current"]}'; - """ + schema = st.session_state["dbschema"] + return do_source_data_lookup_custom_uncached(schema, selected_row) - try: - # Retrieve SQL for customer lookup - lst_query = db.retrieve_data_list(str_sql) - - # Retrieve and return data as df - if lst_query: - str_sql = lst_query[0]["lookup_query"] - str_sql = str_sql.replace("{DATA_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"])) - df = db.retrieve_target_db_df( - lst_query[0]["sql_flavor"], - lst_query[0]["project_host"], - lst_query[0]["project_port"], - lst_query[0]["project_db"], - lst_query[0]["project_user"], - lst_query[0]["project_pw_encrypted"], - str_sql, - lst_query[0]["url"], - lst_query[0]["connect_by_url"], - lst_query[0]["connect_by_key"], - lst_query[0]["private_key"], - lst_query[0]["private_key_passphrase"], - ) - if df.empty: - return "ND", "Data that violates Test criteria is not present in the current dataset.", None - else: - return "OK", None, df - else: - return "NA", "A source data lookup for this Test is not available.", None - except Exception as e: - return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}\n\n{str_sql}", None +@st.cache_data(show_spinner=False) +def get_test_result_history(selected_row): + schema = st.session_state["dbschema"] + return get_test_result_history_uncached(schema, selected_row) def show_test_def_detail(str_test_def_id): @@ -698,14 +521,7 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co st.markdown(":orange[Select a record to see more information.]") else: selected_row = selected_rows[len(selected_rows) - 1] - dfh = get_test_result_history( - selected_row["test_type"], - selected_row["test_suite_id"], - selected_row["table_name"], - selected_row["column_names"], - selected_row["test_definition_id_runtime"], - selected_row["auto_gen"] - ) + dfh = get_test_result_history(selected_row) show_hist_columns = ["test_date", "threshold_value", "result_measure", "result_status"] time_columns = ["test_date"] @@ -714,7 +530,7 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co pg_col1, pg_col2 = st.columns([0.5, 0.5]) with pg_col2: - v_col1, v_col2, v_col3 = st.columns([0.33, 0.33, 0.33]) + v_col1, v_col2, v_col3, v_col4 = st.columns([.25, .25, .25, .25]) if authentication_service.current_user_has_edit_role(): view_edit_test(v_col1, selected_row["test_definition_id_current"]) if selected_row["test_scope"] == "column": @@ -724,6 +540,24 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co ) view_bad_data(v_col3, selected_row) + with v_col4: + if st.button( + ":material/file_save: Report", + use_container_width=True, + ): + + def _generate(): + with tempfile.NamedTemporaryFile() as pdf_file: + create_report(pdf_file.name, selected_row) + return pdf_file.read() + + download_dialog( + dialog_title="Download Issue Report", + file_name="testgen_issue_report.pdf", + mime_type="application/pdf", + file_content_func=_generate, + ) + with pg_col1: fm.show_subheader(selected_row["test_name_short"]) st.markdown(f"###### {selected_row['test_description']}") @@ -837,7 +671,7 @@ def do_disposition_update(selected, str_new_status): def view_bad_data(button_container, selected_row): with button_container: if st.button( - "Source Data →", help="Review current source data for highlighted result", use_container_width=True + "Source Data →", help="Review current source data for highlighted result", use_container_width=True ): source_data_dialog(selected_row) @@ -855,13 +689,13 @@ def source_data_dialog(selected_row): with st.spinner("Retrieving source data..."): if selected_row["test_type"] == "CUSTOM": - bad_data_status, bad_data_msg, df_bad = do_source_data_lookup_custom(selected_row) + bad_data_status, bad_data_msg, query, df_bad = do_source_data_lookup_custom(selected_row) else: - bad_data_status, bad_data_msg, df_bad = do_source_data_lookup(selected_row) + bad_data_status, bad_data_msg, query, df_bad = do_source_data_lookup(selected_row) if bad_data_status in {"ND", "NA"}: st.info(bad_data_msg) elif bad_data_status == "ERR": - st.error(bad_data_msg) + st.error(f"{bad_data_msg}\n\n{query}") elif df_bad is None: st.error("An unknown error was encountered.") else: