From dea50a05fb3635962a84b14bf0c93460b7631ff0 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Fri, 27 Sep 2024 17:45:59 -0400 Subject: [PATCH 01/91] feat(pdf): Test Result PDF report --- pyproject.toml | 1 + .../ui/components/widgets/download_dialog.py | 34 +++ testgen/ui/pdf/__init__.py | 0 testgen/ui/pdf/test_result_report.py | 130 +++++++++ testgen/ui/services/form_service.py | 2 +- .../ui/services/test_definition_service.py | 21 ++ testgen/ui/services/test_results_service.py | 184 +++++++++++++ testgen/ui/views/test_results.py | 260 ++++-------------- 8 files changed, 418 insertions(+), 214 deletions(-) create mode 100644 testgen/ui/components/widgets/download_dialog.py create mode 100644 testgen/ui/pdf/__init__.py create mode 100644 testgen/ui/pdf/test_result_report.py create mode 100644 testgen/ui/services/test_results_service.py diff --git a/pyproject.toml b/pyproject.toml index 6e06b0d..038d591 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ dependencies = [ "concurrent_log_handler==0.9.25", "cryptography==42.0.8", "validators==0.33.0", + "reportlab==4.2.2", ] [project.optional-dependencies] diff --git a/testgen/ui/components/widgets/download_dialog.py b/testgen/ui/components/widgets/download_dialog.py new file mode 100644 index 0000000..34ec928 --- /dev/null +++ b/testgen/ui/components/widgets/download_dialog.py @@ -0,0 +1,34 @@ +from collections.abc import Callable +from typing import Any + +import streamlit as st + + +def download_dialog( + dialog_title: str, + file_name: str, + mime_type: str, + file_content_func: Callable[[], Any], +): + """Wrapping a dialog and a download button together to allow generating the file contents only when needed.""" + + def _dialog_content(): + # Encapsulating the dialog content in a container just to force its height and avoid the dialog to + # have its height changed when the button is rendered. + with st.container(height=55, border=False): + spinner_col, button_col, _ = st.columns([.3, .4, .3]) + + with spinner_col: + with st.spinner(text="Generating file..."): + data = file_content_func() + + with button_col: + st.download_button( + label=":material/download: Download", + data=data, + file_name=file_name, + mime=mime_type, + use_container_width=True + ) + + return st.dialog(title=dialog_title, width="small")(_dialog_content)() diff --git a/testgen/ui/pdf/__init__.py b/testgen/ui/pdf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py new file mode 100644 index 0000000..464beaa --- /dev/null +++ b/testgen/ui/pdf/test_result_report.py @@ -0,0 +1,130 @@ +from reportlab.lib import enums +from reportlab.lib.styles import ParagraphStyle +from reportlab.platypus import Paragraph, SimpleDocTemplate, Table, TableStyle + +from testgen.ui.services.database_service import get_schema +from testgen.ui.services.test_results_service import ( + do_source_data_lookup, + do_source_data_lookup_custom, + get_test_result_history, +) + +PARA_STYLE_DEFAULT = ParagraphStyle( + "default", + fontSize=8, +) + +PARA_STYLE_INFO = PARA_STYLE_DEFAULT + + +PARA_STYLE_ERROR = PARA_STYLE_DEFAULT + + +PARA_STYLE_MONO = ParagraphStyle( + "heading_1", + PARA_STYLE_DEFAULT, + +) + + +PARA_STYLE_H1 = ParagraphStyle( + "heading_1", + PARA_STYLE_DEFAULT, + fontSize=12, + leading=16, +) + +PARA_STYLE_TITLE = ParagraphStyle( + "title", + PARA_STYLE_DEFAULT, + fontSize=18, + leading=30, + alignment=enums.TA_CENTER, +) + +TABLE_STYLE_SUMMARY = TableStyle( + ( + # All cells + ("ALIGN", (0, 0), (-1, -1), "LEFT"), + ("VALIGN", (0, 0), (-1, -1), "TOP"), + ("FONT", (0, 0), (-1, -1), "Helvetica", 7), + + # Header + ("FONT", (0, 0), (0, -1), "Helvetica-Bold"), + ("ALIGN", (0, 0), (0, -1), "RIGHT"), + ) +) + +def get_report_content(tr_data): + + yield Paragraph(f"TestGen Issue Report: {tr_data['result_status']}", PARA_STYLE_TITLE) + + yield Paragraph("Summary", PARA_STYLE_H1) + + summary_table_data = [ + ("Date", tr_data["test_date"]), + ("Database/Schema", tr_data["schema_name"]), + ("Table", tr_data["table_name"]), + ("Column", tr_data["column_names"]), + ("Table Group", tr_data["table_groups_name"]), + ("Test Suite", tr_data["test_suite"]), + ("Issue Type", "Test Result"), + ("Risk Level", tr_data["severity"]), + ("Data Quality Dimension", tr_data["dq_dimension"]), + ("Test", f"""{tr_data["test_name_short"]}: {tr_data["test_name_long"]}\n{tr_data["test_description"]}"""), + ("Result Measure", tr_data["result_measure"]), + ("Threshold Value", f"""{tr_data["threshold_value"]} {tr_data["threshold_description"]}"""), + ] + if tr_data["measure_uom_description"]: + summary_table_data.append(("Units", tr_data["measure_uom_description"])) + + yield Table(summary_table_data, style=TABLE_STYLE_SUMMARY, hAlign="LEFT") + + yield Paragraph("Usage Notes", PARA_STYLE_H1) + yield Paragraph(tr_data["usage_notes"], PARA_STYLE_DEFAULT) + + yield Paragraph("Result History", PARA_STYLE_H1) + + history_data = get_test_result_history(get_schema(), tr_data) + + history_table_data = [ + (r["test_date"], r["threshold_value"], r["result_measure"], r["result_status"]) + for _, r in history_data.iterrows() + ] + + yield Table(history_table_data) + + yield Paragraph("Sample Data", PARA_STYLE_H1) + + if tr_data["test_type"] == "CUSTOM": + bad_data_status, bad_data_msg, lookup_query, sample_data = do_source_data_lookup_custom(get_schema(), tr_data) + else: + bad_data_status, bad_data_msg, lookup_query, sample_data = do_source_data_lookup(get_schema(), tr_data) + if bad_data_status in {"ND", "NA"}: + yield Paragraph(bad_data_msg, style=PARA_STYLE_INFO) + elif bad_data_status == "ERR": + yield Paragraph(bad_data_msg, style=PARA_STYLE_ERROR) + elif sample_data is None: + yield Paragraph("An unknown error was encountered.", style=PARA_STYLE_ERROR) + else: + if bad_data_msg: + yield Paragraph(bad_data_msg, style=PARA_STYLE_DEFAULT) + + sample_data.fillna("[NULL]", inplace=True) + + yield Table( + ( + [col.replace("_", " ").title() for col in sample_data.columns], + *(data for _, data in sample_data.iterrows()), + ) + ) + + + yield Paragraph("SQL Query", PARA_STYLE_H1) + + yield Paragraph(lookup_query, PARA_STYLE_MONO) + + +def create_report(filename, test_result_id): + doc = SimpleDocTemplate(filename) + doc.build(flowables=list(get_report_content(test_result_id))) diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index ba07527..819c81d 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -11,7 +11,7 @@ import pandas as pd import streamlit as st -import validators +from attrs import validators from pandas.api.types import is_datetime64_any_dtype from st_aggrid import AgGrid, ColumnsAutoSizeMode, DataReturnMode, GridOptionsBuilder, GridUpdateMode, JsCode from streamlit_extras.no_default_selectbox import selectbox diff --git a/testgen/ui/services/test_definition_service.py b/testgen/ui/services/test_definition_service.py index 3d7d64b..d8315cd 100644 --- a/testgen/ui/services/test_definition_service.py +++ b/testgen/ui/services/test_definition_service.py @@ -22,6 +22,27 @@ def get_test_definitions( ) +def get_test_definition(db_schema, test_def_id): + str_sql = f""" + SELECT d.id::VARCHAR, tt.test_name_short as test_name, tt.test_name_long as full_name, + tt.test_description as description, tt.usage_notes, + d.column_name, + d.baseline_value, d.baseline_ct, d.baseline_avg, d.baseline_sd, d.threshold_value, + d.subset_condition, d.groupby_names, d.having_condition, d.match_schema_name, + d.match_table_name, d.match_column_names, d.match_subset_condition, + d.match_groupby_names, d.match_having_condition, + d.window_date_column, d.window_days::VARCHAR as window_days, + d.custom_query, + d.severity, tt.default_severity, + d.test_active, d.lock_refresh, d.last_manual_update + FROM {db_schema}.test_definitions d + INNER JOIN {db_schema}.test_types tt + ON (d.test_type = tt.test_type) + WHERE d.id = '{test_def_id}'; + """ + return database_service.retrieve_data(str_sql) + + def delete(test_definition_ids, dry_run=False): schema = st.session_state["dbschema"] usage_result = test_definition_queries.get_test_definition_usage(schema, test_definition_ids) diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py new file mode 100644 index 0000000..e64ef0c --- /dev/null +++ b/testgen/ui/services/test_results_service.py @@ -0,0 +1,184 @@ +import pandas as pd + +from testgen.common import ConcatColumnList +from testgen.ui.services import database_service as db +from testgen.ui.services.string_service import empty_if_null +from testgen.ui.services.test_definition_service import get_test_definition + + +def get_test_result_history(db_schema, tr_data): + if tr_data["auto_gen"]: + str_where = f""" + WHERE test_suite_id = '{tr_data["test_suite_id"]}' + AND table_name = '{tr_data["table_name"]}' + AND column_names = '{tr_data["column_names"]}' + AND test_type = '{tr_data["test_type"]}' + AND auto_gen = TRUE + """ + else: + str_where = f""" + WHERE test_definition_id_runtime = '{tr_data["test_definition_id_runtime"]}' + """ + + str_sql = f""" + SELECT test_date, test_type, + test_name_short, test_name_long, measure_uom, test_operator, + threshold_value::NUMERIC, result_measure, result_status + FROM {db_schema}.v_test_results {str_where} + ORDER BY test_date DESC; + """ + + df = db.retrieve_data(str_sql) + # Clean Up + df["test_date"] = pd.to_datetime(df["test_date"]) + + return df + + +def do_source_data_lookup_custom(db_schema, tr_data): + # Define the query + str_sql = f""" + SELECT d.custom_query as lookup_query, tg.table_group_schema, c.project_qc_schema, + c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, + c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase + FROM {db_schema}.test_definitions d + INNER JOIN {db_schema}.table_groups tg + ON ('{tr_data["table_groups_id"]}'::UUID = tg.id) + INNER JOIN {db_schema}.connections c + ON (tg.connection_id = c.connection_id) + WHERE d.id = '{tr_data["test_definition_id_current"]}'; + """ + + try: + # Retrieve SQL for customer lookup + lst_query = db.retrieve_data_list(str_sql) + + # Retrieve and return data as df + if lst_query: + str_sql = lst_query[0]["lookup_query"] + str_sql = str_sql.replace("{DATA_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"])) + df = db.retrieve_target_db_df( + lst_query[0]["sql_flavor"], + lst_query[0]["project_host"], + lst_query[0]["project_port"], + lst_query[0]["project_db"], + lst_query[0]["project_user"], + lst_query[0]["project_pw_encrypted"], + str_sql, + lst_query[0]["url"], + lst_query[0]["connect_by_url"], + lst_query[0]["connect_by_key"], + lst_query[0]["private_key"], + lst_query[0]["private_key_passphrase"], + ) + if df.empty: + return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None + else: + return "OK", None, str_sql, df + else: + return "NA", "A source data lookup for this Test is not available.", None, None + + except Exception as e: + return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}", str_sql, None + + +def do_source_data_lookup(db_schema, tr_data, sql_only=False): + # Define the query + str_sql = f""" + SELECT t.lookup_query, tg.table_group_schema, c.project_qc_schema, + c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, + c.url, c.connect_by_url, + c.connect_by_key, c.private_key, c.private_key_passphrase + FROM {db_schema}.target_data_lookups t + INNER JOIN {db_schema}.table_groups tg + ON ('{tr_data["table_groups_id"]}'::UUID = tg.id) + INNER JOIN {db_schema}.connections c + ON (tg.connection_id = c.connection_id) + AND (t.sql_flavor = c.sql_flavor) + WHERE t.error_type = 'Test Results' + AND t.test_id = '{tr_data["test_type_id"]}' + AND t.lookup_query > ''; + """ + + def replace_parms(df_test, str_query): + if df_test.empty: + raise ValueError("This test definition is no longer present.") + + str_query = str_query.replace("{TARGET_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"])) + str_query = str_query.replace("{TABLE_NAME}", empty_if_null(tr_data["table_name"])) + str_query = str_query.replace("{COLUMN_NAME}", empty_if_null(tr_data["column_names"])) + str_query = str_query.replace("{DATA_QC_SCHEMA}", empty_if_null(lst_query[0]["project_qc_schema"])) + str_query = str_query.replace("{TEST_DATE}", str(empty_if_null(tr_data["test_date"]))) + + str_query = str_query.replace("{CUSTOM_QUERY}", empty_if_null(df_test.at[0, "custom_query"])) + str_query = str_query.replace("{BASELINE_VALUE}", empty_if_null(df_test.at[0, "baseline_value"])) + str_query = str_query.replace("{BASELINE_CT}", empty_if_null(df_test.at[0, "baseline_ct"])) + str_query = str_query.replace("{BASELINE_AVG}", empty_if_null(df_test.at[0, "baseline_avg"])) + str_query = str_query.replace("{BASELINE_SD}", empty_if_null(df_test.at[0, "baseline_sd"])) + str_query = str_query.replace("{THRESHOLD_VALUE}", empty_if_null(df_test.at[0, "threshold_value"])) + + str_substitute = empty_if_null(df_test.at[0, "subset_condition"]) + str_substitute = "1=1" if str_substitute == "" else str_substitute + str_query = str_query.replace("{SUBSET_CONDITION}", str_substitute) + + str_query = str_query.replace("{GROUPBY_NAMES}", empty_if_null(df_test.at[0, "groupby_names"])) + str_query = str_query.replace("{HAVING_CONDITION}", empty_if_null(df_test.at[0, "having_condition"])) + str_query = str_query.replace("{MATCH_SCHEMA_NAME}", empty_if_null(df_test.at[0, "match_schema_name"])) + str_query = str_query.replace("{MATCH_TABLE_NAME}", empty_if_null(df_test.at[0, "match_table_name"])) + str_query = str_query.replace("{MATCH_COLUMN_NAMES}", empty_if_null(df_test.at[0, "match_column_names"])) + + str_substitute = empty_if_null(df_test.at[0, "match_subset_condition"]) + str_substitute = "1=1" if str_substitute == "" else str_substitute + str_query = str_query.replace("{MATCH_SUBSET_CONDITION}", str_substitute) + + str_query = str_query.replace("{MATCH_GROUPBY_NAMES}", empty_if_null(df_test.at[0, "match_groupby_names"])) + str_query = str_query.replace("{MATCH_HAVING_CONDITION}", empty_if_null(df_test.at[0, "match_having_condition"])) + str_query = str_query.replace("{COLUMN_NAME_NO_QUOTES}", empty_if_null(tr_data["column_names"])) + + str_query = str_query.replace("{WINDOW_DATE_COLUMN}", empty_if_null(df_test.at[0, "window_date_column"])) + str_query = str_query.replace("{WINDOW_DAYS}", empty_if_null(df_test.at[0, "window_days"])) + + str_substitute = ConcatColumnList(tr_data["column_names"], "") + str_query = str_query.replace("{CONCAT_COLUMNS}", str_substitute) + str_substitute = ConcatColumnList(df_test.at[0, "match_groupby_names"], "") + str_query = str_query.replace("{CONCAT_MATCH_GROUPBY}", str_substitute) + + if str_query is None or str_query == "": + raise ValueError("Lookup query is not defined for this Test Type.") + return str_query + + try: + # Retrieve SQL for customer lookup + lst_query = db.retrieve_data_list(str_sql) + + if sql_only: + return lst_query, replace_parms, None + + # Retrieve and return data as df + if lst_query: + df_test = get_test_definition(db_schema, tr_data["test_definition_id_current"]) + + str_sql = replace_parms(df_test, lst_query[0]["lookup_query"]) + df = db.retrieve_target_db_df( + lst_query[0]["sql_flavor"], + lst_query[0]["project_host"], + lst_query[0]["project_port"], + lst_query[0]["project_db"], + lst_query[0]["project_user"], + lst_query[0]["project_pw_encrypted"], + str_sql, + lst_query[0]["url"], + lst_query[0]["connect_by_url"], + lst_query[0]["connect_by_key"], + lst_query[0]["private_key"], + lst_query[0]["private_key_passphrase"], + ) + if df.empty: + return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None + else: + return "OK", None, str_sql, df + else: + return "NA", "A source data lookup for this Test is not available.", None, None + + except Exception as e: + return "ERR", f"Source data lookup query caused:\n\n{e.args[0]}", str_sql, None diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 2101359..6b86ea6 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -1,3 +1,4 @@ +import tempfile import typing from datetime import date @@ -9,11 +10,25 @@ import testgen.ui.services.database_service as db import testgen.ui.services.form_service as fm import testgen.ui.services.query_service as dq -from testgen.common import ConcatColumnList, date_service +from testgen.common import date_service from testgen.ui.components import widgets as testgen +from testgen.ui.components.widgets.download_dialog import download_dialog from testgen.ui.navigation.page import Page +from testgen.ui.pdf.test_result_report import create_report from testgen.ui.services import authentication_service, project_service from testgen.ui.services.string_service import empty_if_null +from testgen.ui.services.test_definition_service import ( + get_test_definition as get_test_definition_uncached, +) +from testgen.ui.services.test_results_service import ( + do_source_data_lookup as do_source_data_lookup_uncached, +) +from testgen.ui.services.test_results_service import ( + do_source_data_lookup_custom as do_source_data_lookup_custom_uncached, +) +from testgen.ui.services.test_results_service import ( + get_test_result_history as get_test_result_history_uncached, +) from testgen.ui.session import session from testgen.ui.views.profiling_modal import view_profiling_button from testgen.ui.views.test_definitions import show_test_form_by_id @@ -200,6 +215,7 @@ def get_test_results_uncached(str_schema, str_run_id, str_sel_test_status, test_ r.schema_name, r.column_names, r.test_time::DATE as test_date, r.test_type, tt.id as test_type_id, tt.test_name_short, tt.test_name_long, r.test_description, tt.measure_uom, tt.measure_uom_description, c.test_operator, r.threshold_value::NUMERIC(16, 5), r.result_measure::NUMERIC(16, 5), r.result_status, + tt.threshold_description, tt.usage_notes, -- These are used in the PDF report CASE WHEN r.result_code <> 1 THEN r.disposition ELSE 'Passed' @@ -333,221 +349,28 @@ def get_test_result_summary(run_id): ] -@st.cache_data(show_spinner=ALWAYS_SPIN) -def get_test_result_history(str_test_type, str_test_suite_id, str_table_name, str_column_names, - str_test_definition_id, auto_gen): - str_schema = st.session_state["dbschema"] - - if auto_gen: - str_where = f""" - WHERE test_suite_id = '{str_test_suite_id}' - AND table_name = '{str_table_name}' - AND column_names = '{str_column_names}' - AND test_type = '{str_test_type}' - AND auto_gen = TRUE - """ - else: - str_where = f""" - WHERE test_definition_id_runtime = '{str_test_definition_id}' - """ - - str_sql = f""" - SELECT test_date, test_type, - test_name_short, test_name_long, measure_uom, test_operator, - threshold_value::NUMERIC, result_measure, result_status - FROM {str_schema}.v_test_results {str_where} - ORDER BY test_date DESC; - """ - - df = db.retrieve_data(str_sql) - # Clean Up - df["test_date"] = pd.to_datetime(df["test_date"]) - - return df - - @st.cache_data(show_spinner=ALWAYS_SPIN) def get_test_definition(str_test_def_id): str_schema = st.session_state["dbschema"] return get_test_definition_uncached(str_schema, str_test_def_id) -def get_test_definition_uncached(str_schema, str_test_def_id): - str_sql = f""" - SELECT d.id::VARCHAR, tt.test_name_short as test_name, tt.test_name_long as full_name, - tt.test_description as description, tt.usage_notes, - d.column_name, - d.baseline_value, d.baseline_ct, d.baseline_avg, d.baseline_sd, d.threshold_value, - d.subset_condition, d.groupby_names, d.having_condition, d.match_schema_name, - d.match_table_name, d.match_column_names, d.match_subset_condition, - d.match_groupby_names, d.match_having_condition, - d.window_date_column, d.window_days::VARCHAR as window_days, - d.custom_query, - d.severity, tt.default_severity, - d.test_active, d.lock_refresh, d.last_manual_update - FROM {str_schema}.test_definitions d - INNER JOIN {str_schema}.test_types tt - ON (d.test_type = tt.test_type) - WHERE d.id = '{str_test_def_id}'; - """ - return db.retrieve_data(str_sql) - - @st.cache_data(show_spinner=False) def do_source_data_lookup(selected_row): schema = st.session_state["dbschema"] return do_source_data_lookup_uncached(schema, selected_row) -def do_source_data_lookup_uncached(str_schema, selected_row, sql_only=False): - # Define the query - str_sql = f""" - SELECT t.lookup_query, tg.table_group_schema, c.project_qc_schema, - c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, - c.url, c.connect_by_url, - c.connect_by_key, c.private_key, c.private_key_passphrase - FROM {str_schema}.target_data_lookups t - INNER JOIN {str_schema}.table_groups tg - ON ('{selected_row["table_groups_id"]}'::UUID = tg.id) - INNER JOIN {str_schema}.connections c - ON (tg.connection_id = c.connection_id) - AND (t.sql_flavor = c.sql_flavor) - WHERE t.error_type = 'Test Results' - AND t.test_id = '{selected_row["test_type_id"]}' - AND t.lookup_query > ''; - """ - - def replace_parms(df_test, str_query): - if df_test.empty: - raise ValueError("This test definition is no longer present.") - - str_query = str_query.replace("{TARGET_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"])) - str_query = str_query.replace("{TABLE_NAME}", empty_if_null(selected_row["table_name"])) - str_query = str_query.replace("{COLUMN_NAME}", empty_if_null(selected_row["column_names"])) - str_query = str_query.replace("{DATA_QC_SCHEMA}", empty_if_null(lst_query[0]["project_qc_schema"])) - str_query = str_query.replace("{TEST_DATE}", str(empty_if_null(selected_row["test_date"]))) - - str_query = str_query.replace("{CUSTOM_QUERY}", empty_if_null(df_test.at[0, "custom_query"])) - str_query = str_query.replace("{BASELINE_VALUE}", empty_if_null(df_test.at[0, "baseline_value"])) - str_query = str_query.replace("{BASELINE_CT}", empty_if_null(df_test.at[0, "baseline_ct"])) - str_query = str_query.replace("{BASELINE_AVG}", empty_if_null(df_test.at[0, "baseline_avg"])) - str_query = str_query.replace("{BASELINE_SD}", empty_if_null(df_test.at[0, "baseline_sd"])) - str_query = str_query.replace("{THRESHOLD_VALUE}", empty_if_null(df_test.at[0, "threshold_value"])) - - str_substitute = empty_if_null(df_test.at[0, "subset_condition"]) - str_substitute = "1=1" if str_substitute == "" else str_substitute - str_query = str_query.replace("{SUBSET_CONDITION}", str_substitute) - - str_query = str_query.replace("{GROUPBY_NAMES}", empty_if_null(df_test.at[0, "groupby_names"])) - str_query = str_query.replace("{HAVING_CONDITION}", empty_if_null(df_test.at[0, "having_condition"])) - str_query = str_query.replace("{MATCH_SCHEMA_NAME}", empty_if_null(df_test.at[0, "match_schema_name"])) - str_query = str_query.replace("{MATCH_TABLE_NAME}", empty_if_null(df_test.at[0, "match_table_name"])) - str_query = str_query.replace("{MATCH_COLUMN_NAMES}", empty_if_null(df_test.at[0, "match_column_names"])) - - str_substitute = empty_if_null(df_test.at[0, "match_subset_condition"]) - str_substitute = "1=1" if str_substitute == "" else str_substitute - str_query = str_query.replace("{MATCH_SUBSET_CONDITION}", str_substitute) - - str_query = str_query.replace("{MATCH_GROUPBY_NAMES}", empty_if_null(df_test.at[0, "match_groupby_names"])) - str_query = str_query.replace("{MATCH_HAVING_CONDITION}", empty_if_null(df_test.at[0, "match_having_condition"])) - str_query = str_query.replace("{COLUMN_NAME_NO_QUOTES}", empty_if_null(selected_row["column_names"])) - - str_query = str_query.replace("{WINDOW_DATE_COLUMN}", empty_if_null(df_test.at[0, "window_date_column"])) - str_query = str_query.replace("{WINDOW_DAYS}", empty_if_null(df_test.at[0, "window_days"])) - - str_substitute = ConcatColumnList(selected_row["column_names"], "") - str_query = str_query.replace("{CONCAT_COLUMNS}", str_substitute) - str_substitute = ConcatColumnList(df_test.at[0, "match_groupby_names"], "") - str_query = str_query.replace("{CONCAT_MATCH_GROUPBY}", str_substitute) - - if str_query is None or str_query == "": - raise ValueError("Lookup query is not defined for this Test Type.") - return str_query - - try: - # Retrieve SQL for customer lookup - lst_query = db.retrieve_data_list(str_sql) - - if sql_only: - return lst_query, replace_parms, None - - # Retrieve and return data as df - if lst_query: - df_test = get_test_definition(selected_row["test_definition_id_current"]) - - str_sql = replace_parms(df_test, lst_query[0]["lookup_query"]) - df = db.retrieve_target_db_df( - lst_query[0]["sql_flavor"], - lst_query[0]["project_host"], - lst_query[0]["project_port"], - lst_query[0]["project_db"], - lst_query[0]["project_user"], - lst_query[0]["project_pw_encrypted"], - str_sql, - lst_query[0]["url"], - lst_query[0]["connect_by_url"], - lst_query[0]["connect_by_key"], - lst_query[0]["private_key"], - lst_query[0]["private_key_passphrase"], - ) - if df.empty: - return "ND", "Data that violates Test criteria is not present in the current dataset.", None - else: - return "OK", None, df - else: - return "NA", "A source data lookup for this Test is not available.", None - - except Exception as e: - return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}\n\n{str_sql}", None - - @st.cache_data(show_spinner=False) def do_source_data_lookup_custom(selected_row): - str_schema = st.session_state["dbschema"] - # Define the query - str_sql = f""" - SELECT d.custom_query as lookup_query, tg.table_group_schema, c.project_qc_schema, - c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, - c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase - FROM {str_schema}.test_definitions d - INNER JOIN {str_schema}.table_groups tg - ON ('{selected_row["table_groups_id"]}'::UUID = tg.id) - INNER JOIN {str_schema}.connections c - ON (tg.connection_id = c.connection_id) - WHERE d.id = '{selected_row["test_definition_id_current"]}'; - """ + schema = st.session_state["dbschema"] + return do_source_data_lookup_custom_uncached(schema, selected_row) - try: - # Retrieve SQL for customer lookup - lst_query = db.retrieve_data_list(str_sql) - - # Retrieve and return data as df - if lst_query: - str_sql = lst_query[0]["lookup_query"] - str_sql = str_sql.replace("{DATA_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"])) - df = db.retrieve_target_db_df( - lst_query[0]["sql_flavor"], - lst_query[0]["project_host"], - lst_query[0]["project_port"], - lst_query[0]["project_db"], - lst_query[0]["project_user"], - lst_query[0]["project_pw_encrypted"], - str_sql, - lst_query[0]["url"], - lst_query[0]["connect_by_url"], - lst_query[0]["connect_by_key"], - lst_query[0]["private_key"], - lst_query[0]["private_key_passphrase"], - ) - if df.empty: - return "ND", "Data that violates Test criteria is not present in the current dataset.", None - else: - return "OK", None, df - else: - return "NA", "A source data lookup for this Test is not available.", None - except Exception as e: - return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}\n\n{str_sql}", None +@st.cache_data(show_spinner=False) +def get_test_result_history(selected_row): + schema = st.session_state["dbschema"] + return get_test_result_history_uncached(schema, selected_row) def show_test_def_detail(str_test_def_id): @@ -698,14 +521,7 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co st.markdown(":orange[Select a record to see more information.]") else: selected_row = selected_rows[len(selected_rows) - 1] - dfh = get_test_result_history( - selected_row["test_type"], - selected_row["test_suite_id"], - selected_row["table_name"], - selected_row["column_names"], - selected_row["test_definition_id_runtime"], - selected_row["auto_gen"] - ) + dfh = get_test_result_history(selected_row) show_hist_columns = ["test_date", "threshold_value", "result_measure", "result_status"] time_columns = ["test_date"] @@ -714,7 +530,7 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co pg_col1, pg_col2 = st.columns([0.5, 0.5]) with pg_col2: - v_col1, v_col2, v_col3 = st.columns([0.33, 0.33, 0.33]) + v_col1, v_col2, v_col3, v_col4 = st.columns([.25, .25, .25, .25]) if authentication_service.current_user_has_edit_role(): view_edit_test(v_col1, selected_row["test_definition_id_current"]) if selected_row["test_scope"] == "column": @@ -724,6 +540,24 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co ) view_bad_data(v_col3, selected_row) + with v_col4: + if st.button( + ":material/file_save: Report", + use_container_width=True, + ): + + def _generate(): + with tempfile.NamedTemporaryFile() as pdf_file: + create_report(pdf_file.name, selected_row) + return pdf_file.read() + + download_dialog( + dialog_title="Download Issue Report", + file_name="testgen_issue_report.pdf", + mime_type="application/pdf", + file_content_func=_generate, + ) + with pg_col1: fm.show_subheader(selected_row["test_name_short"]) st.markdown(f"###### {selected_row['test_description']}") @@ -837,7 +671,7 @@ def do_disposition_update(selected, str_new_status): def view_bad_data(button_container, selected_row): with button_container: if st.button( - "Source Data →", help="Review current source data for highlighted result", use_container_width=True + "Source Data →", help="Review current source data for highlighted result", use_container_width=True ): source_data_dialog(selected_row) @@ -855,13 +689,13 @@ def source_data_dialog(selected_row): with st.spinner("Retrieving source data..."): if selected_row["test_type"] == "CUSTOM": - bad_data_status, bad_data_msg, df_bad = do_source_data_lookup_custom(selected_row) + bad_data_status, bad_data_msg, query, df_bad = do_source_data_lookup_custom(selected_row) else: - bad_data_status, bad_data_msg, df_bad = do_source_data_lookup(selected_row) + bad_data_status, bad_data_msg, query, df_bad = do_source_data_lookup(selected_row) if bad_data_status in {"ND", "NA"}: st.info(bad_data_msg) elif bad_data_status == "ERR": - st.error(bad_data_msg) + st.error(f"{bad_data_msg}\n\n{query}") elif df_bad is None: st.error("An unknown error was encountered.") else: From 764bd12134c11f22c4301e7367f36ffe40858c25 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 26 Sep 2024 12:59:27 -0400 Subject: [PATCH 02/91] misc(ui): enhancements to button, link and flex widgets --- testgen/ui/assets/style.css | 12 ++++++------ .../ui/components/frontend/js/components/button.js | 13 ++++++++++++- .../ui/components/frontend/js/components/link.js | 5 +++++ testgen/ui/components/widgets/button.py | 3 ++- testgen/ui/components/widgets/link.py | 4 ++++ 5 files changed, 29 insertions(+), 8 deletions(-) diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css index 1a1d86f..184a8c6 100644 --- a/testgen/ui/assets/style.css +++ b/testgen/ui/assets/style.css @@ -74,7 +74,7 @@ div[data-testid="stVerticalBlock"] { gap: 0.5rem; } -div[data-testid="collapsedControl"] { +div[data-testid="stSidebarCollapsedControl"] { top: 0.5rem; } /* */ @@ -166,22 +166,22 @@ button[title="Show password text"] { background-color: var(--dk-card-background); } -[data-testid="column"]:has(> div[data-testid="stVerticalBlockBorderWrapper"] > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-row) [data-testid="stVerticalBlock"] { +div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-row) [data-testid="stVerticalBlock"] { width: 100%; flex-direction: row; } -[data-testid="column"]:has(> div[data-testid="stVerticalBlockBorderWrapper"] > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-row) [data-testid="stVerticalBlock"] > div[data-testid="element-container"], -[data-testid="column"]:has(> div[data-testid="stVerticalBlockBorderWrapper"] > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-row) [data-testid="stVerticalBlock"] > div[data-testid="element-container"] > div[data-testid] { +div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-row) [data-testid="stVerticalBlock"] > div[data-testid="element-container"], +div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-row) [data-testid="stVerticalBlock"] > div[data-testid="element-container"] > div[data-testid] { width: auto !important; max-height: 40px; } -[data-testid="column"]:has(> div[data-testid="stVerticalBlockBorderWrapper"] > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-start) [data-testid="stVerticalBlock"] { +div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-start) [data-testid="stVerticalBlock"] { justify-content: flex-start; } -[data-testid="column"]:has(> div[data-testid="stVerticalBlockBorderWrapper"] > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-end) [data-testid="stVerticalBlock"] { +div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-end) [data-testid="stVerticalBlock"] { justify-content: flex-end; } diff --git a/testgen/ui/components/frontend/js/components/button.js b/testgen/ui/components/frontend/js/components/button.js index a5ce8e8..a19d960 100644 --- a/testgen/ui/components/frontend/js/components/button.js +++ b/testgen/ui/components/frontend/js/components/button.js @@ -7,6 +7,7 @@ * @property {(string|null)} tooltip * @property {(string|null)} tooltipPosition * @property {(Function|null)} onclick + * @property {(bool)} disabled * @property {string?} style */ import { enforceElementWidth } from '../utils.js'; @@ -45,6 +46,7 @@ const Button = (/** @type Properties */ props) => { class: `tg-button tg-${props.type.val}-button ${props.type.val !== 'icon' && isIconOnly ? 'tg-icon-button' : ''}`, style: props.style?.val, onclick: onClickHandler, + disabled: !!props.disabled?.val, }, span({class: 'tg-button-focus-state-indicator'}, ''), props.icon ? i({class: 'material-symbols-rounded'}, props.icon) : undefined, @@ -118,6 +120,15 @@ button.tg-button:has(span) { padding: 8px 16px; } +button.tg-button:not(.tg-icon-button):has(span):has(i) { + padding-left: 8px; +} + +button.tg-button[disabled] { + color: var(--disabled-text-color); + cursor: not-allowed; +} + button.tg-button.tg-icon-button > i { font-size: 18px; } @@ -126,7 +137,7 @@ button.tg-button > i:has(+ span) { margin-right: 8px; } -button.tg-button:hover .tg-button-focus-state-indicator::before { +button.tg-button:hover:not([disabled]) .tg-button-focus-state-indicator::before { opacity: var(--button-hover-state-opacity); } `); diff --git a/testgen/ui/components/frontend/js/components/link.js b/testgen/ui/components/frontend/js/components/link.js index 17463d4..86e35cb 100644 --- a/testgen/ui/components/frontend/js/components/link.js +++ b/testgen/ui/components/frontend/js/components/link.js @@ -10,8 +10,10 @@ * @property {string?} right_icon * @property {number?} right_icon_size * @property {number?} height + * @property {number?} width * @property {string?} style */ +import { enforceElementWidth } from '../utils.js'; import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; @@ -19,6 +21,9 @@ const { a, div, i, span } = van.tags; const Link = (/** @type Properties */ props) => { Streamlit.setFrameHeight(props.height?.val || 24); + if (props.width?.val) { + enforceElementWidth(window.frameElement, props.width.val); + } if (!window.testgen.loadedStylesheets.link) { document.adoptedStyleSheets.push(stylesheet); diff --git a/testgen/ui/components/widgets/button.py b/testgen/ui/components/widgets/button.py index a78bc0d..4b0a2d0 100644 --- a/testgen/ui/components/widgets/button.py +++ b/testgen/ui/components/widgets/button.py @@ -13,6 +13,7 @@ def button( tooltip: str | None = None, tooltip_position: TooltipPosition = "left", on_click: typing.Callable[..., None] | None = None, + disabled: bool = False, style: str | None = None, key: str | None = None, ) -> typing.Any: @@ -25,7 +26,7 @@ def button( :param on_click: click handler for this button """ - props = {"type": type_} + props = {"type": type_, "disabled": disabled} if type_ != "icon": if not label: raise ValueError(f"A label is required for {type_} buttons") diff --git a/testgen/ui/components/widgets/link.py b/testgen/ui/components/widgets/link.py index 14511a8..7230edb 100644 --- a/testgen/ui/components/widgets/link.py +++ b/testgen/ui/components/widgets/link.py @@ -13,6 +13,7 @@ def link( right_icon: str | None = None, right_icon_size: float = 20.0, height: float | None = 21.0, + width: float | None = None, style: str | None = None, key: str = "testgen:link", ) -> None: @@ -32,6 +33,9 @@ def link( if style: props.update({"style": style}) + if width: + props.update({"width": width}) + clicked = component(id_="link", key=key, props=props) if clicked: Router().navigate(to=href, with_args=params) From bed5d1c0926a18df6ee4cc1688d75b81aa41eb44 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 26 Sep 2024 13:03:50 -0400 Subject: [PATCH 03/91] refactor(ui): rename "toolbar_select" widget to generic "select" --- testgen/ui/components/widgets/__init__.py | 2 +- testgen/ui/components/widgets/page.py | 48 --------------------- testgen/ui/components/widgets/select.py | 52 +++++++++++++++++++++++ testgen/ui/views/profiling_anomalies.py | 4 +- testgen/ui/views/profiling_results.py | 4 +- testgen/ui/views/profiling_summary.py | 2 +- testgen/ui/views/test_definitions.py | 4 +- testgen/ui/views/test_results.py | 4 +- testgen/ui/views/test_runs.py | 8 ++-- testgen/ui/views/test_suites.py | 11 ++--- 10 files changed, 72 insertions(+), 67 deletions(-) create mode 100644 testgen/ui/components/widgets/select.py diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py index eba62b7..5771dfc 100644 --- a/testgen/ui/components/widgets/__init__.py +++ b/testgen/ui/components/widgets/__init__.py @@ -14,10 +14,10 @@ no_flex_gap, page_header, text, - toolbar_select, whitespace, ) from testgen.ui.components.widgets.paginator import paginator +from testgen.ui.components.widgets.select import select from testgen.ui.components.widgets.sidebar import sidebar from testgen.ui.components.widgets.sorting_selector import sorting_selector from testgen.ui.components.widgets.summary_bar import summary_bar diff --git a/testgen/ui/components/widgets/page.py b/testgen/ui/components/widgets/page.py index cb3b495..2703982 100644 --- a/testgen/ui/components/widgets/page.py +++ b/testgen/ui/components/widgets/page.py @@ -1,11 +1,8 @@ -import pandas as pd import streamlit as st from streamlit.delta_generator import DeltaGenerator -from streamlit_extras.no_default_selectbox import selectbox from testgen.ui.components.widgets.breadcrumbs import Breadcrumb from testgen.ui.components.widgets.breadcrumbs import breadcrumbs as tg_breadcrumbs -from testgen.ui.navigation.router import Router def page_header( @@ -34,51 +31,6 @@ def page_header( st.session_state["last_page"] = title -def toolbar_select( - options: pd.DataFrame | list[str], - value_column: str | None = None, - display_column: str | None = None, - default_value = None, - required: bool = False, - bind_to_query: str | None = None, - **kwargs, -): - kwargs = {**kwargs} - - if isinstance(options, pd.DataFrame): - value_column = value_column or options.columns[0] - display_column = display_column or value_column - kwargs["options"] = options[display_column] - if default_value in options[value_column].values: - kwargs["index"] = int(options[options[value_column] == default_value].index[0]) + (0 if required else 1) - else: - kwargs["options"] = options - if default_value in options: - kwargs["index"] = options.index(default_value) + (0 if required else 1) - - if bind_to_query: - kwargs["key"] = kwargs.get("key", f"toolbar_select_{bind_to_query}") - if default_value is not None and kwargs.get("index") is None: - Router().set_query_params({ bind_to_query: None }) # Unset the query params if the current value is not valid - - def update_query_params(): - query_value = st.session_state[kwargs["key"]] - if not required and query_value == "---": - query_value = None - elif isinstance(options, pd.DataFrame): - query_value = options.loc[options[display_column] == query_value, value_column].iloc[0] - Router().set_query_params({ bind_to_query: query_value }) - - kwargs["on_change"] = update_query_params - - selected = st.selectbox(**kwargs) if required else selectbox(**kwargs) - - if selected and isinstance(options, pd.DataFrame): - return options.loc[options[display_column] == selected, value_column].iloc[0] - - return selected - - def whitespace(size: float, container: DeltaGenerator | None = None): _apply_html(f'
', container) diff --git a/testgen/ui/components/widgets/select.py b/testgen/ui/components/widgets/select.py new file mode 100644 index 0000000..7183887 --- /dev/null +++ b/testgen/ui/components/widgets/select.py @@ -0,0 +1,52 @@ +import pandas as pd +import streamlit as st +from streamlit_extras.no_default_selectbox import selectbox + +from testgen.ui.navigation.router import Router + + +def select( + label: str, + options: pd.DataFrame | list[str], + value_column: str | None = None, + display_column: str | None = None, + default_value = None, + required: bool = False, + bind_to_query: str | None = None, + **kwargs, +): + kwargs = {**kwargs} + kwargs["label"] = label + + if isinstance(options, pd.DataFrame): + value_column = value_column or options.columns[0] + display_column = display_column or value_column + kwargs["options"] = options[display_column] + if default_value in options[value_column].values: + kwargs["index"] = int(options[options[value_column] == default_value].index[0]) + (0 if required else 1) + else: + kwargs["options"] = options + if default_value in options: + kwargs["index"] = options.index(default_value) + (0 if required else 1) + + if bind_to_query: + kwargs["key"] = kwargs.get("key", f"testgen_select_{bind_to_query}") + if default_value is not None and kwargs.get("index") is None: + Router().set_query_params({ bind_to_query: None }) # Unset the query params if the current value is not valid + + def update_query_params(): + query_value = st.session_state[kwargs["key"]] + if not required and query_value == "---": + query_value = None + elif isinstance(options, pd.DataFrame): + query_value = options.loc[options[display_column] == query_value, value_column].iloc[0] + Router().set_query_params({ bind_to_query: query_value }) + + kwargs["on_change"] = update_query_params + + selected = st.selectbox(**kwargs) if required else selectbox(**kwargs) + + if selected and isinstance(options, pd.DataFrame): + return options.loc[options[display_column] == selected, value_column].iloc[0] + + return selected \ No newline at end of file diff --git a/testgen/ui/views/profiling_anomalies.py b/testgen/ui/views/profiling_anomalies.py index 29dc430..a944ed9 100644 --- a/testgen/ui/views/profiling_anomalies.py +++ b/testgen/ui/views/profiling_anomalies.py @@ -51,7 +51,7 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | testgen.flex_row_end(export_button_column) with liklihood_filter_column: - issue_class = testgen.toolbar_select( + issue_class = testgen.select( options=["Definite", "Likely", "Possible", "Potential PII"], default_value=issue_class, required=False, @@ -61,7 +61,7 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | with issue_type_filter_column: issue_type_options = get_issue_types() - issue_type_id = testgen.toolbar_select( + issue_type_id = testgen.select( options=issue_type_options, default_value=None if issue_class == "Potential PII" else issue_type, value_column="id", diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py index aa94ae6..eb2e4f0 100644 --- a/testgen/ui/views/profiling_results.py +++ b/testgen/ui/views/profiling_results.py @@ -51,7 +51,7 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str | with table_filter_column: # Table Name filter df = profiling_queries.run_table_lookup_query(table_group_id) - table_name = testgen.toolbar_select( + table_name = testgen.select( options=df, value_column="table_name", default_value=table_name, @@ -62,7 +62,7 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str | with column_filter_column: # Column Name filter df = profiling_queries.run_column_lookup_query(table_group_id, table_name) - column_name = testgen.toolbar_select( + column_name = testgen.select( options=df, value_column="column_name", default_value=column_name, diff --git a/testgen/ui/views/profiling_summary.py b/testgen/ui/views/profiling_summary.py index 1a0beb0..e3b02d3 100644 --- a/testgen/ui/views/profiling_summary.py +++ b/testgen/ui/views/profiling_summary.py @@ -39,7 +39,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with group_filter_column: table_groups_df = get_db_table_group_choices(project_code) - table_group_id = testgen.toolbar_select( + table_group_id = testgen.select( options=table_groups_df, value_column="id", display_column="table_groups_name", diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index 0892998..eec1f7e 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -59,7 +59,7 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: with table_filter_column: table_options = run_table_lookup_query(table_group["id"]) - table_name = testgen.toolbar_select( + table_name = testgen.select( options=table_options, value_column="table_name", default_value=table_name, @@ -69,7 +69,7 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: ) with column_filter_column: column_options = get_column_names(table_group["id"], table_name) - column_name = testgen.toolbar_select( + column_name = testgen.select( options=column_options, default_value=column_name, bind_to_query="column_name", diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index a521c05..8f3a4c9 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -67,7 +67,7 @@ def render(self, run_id: str, status: str | None = None, test_type: str | None = "Warning", "Passed", ] - status = testgen.toolbar_select( + status = testgen.select( options=status_options, default_value=status or "Failed + Warning", required=False, @@ -76,7 +76,7 @@ def render(self, run_id: str, status: str | None = None, test_type: str | None = ) with test_type_filter_column: - test_type = testgen.toolbar_select( + test_type = testgen.select( options=get_test_types(), value_column="test_type", display_column="test_name_short", diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 6aa358e..028dfeb 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -39,7 +39,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with group_filter_column: table_groups_df = get_db_table_group_choices(project_code) - table_groups_id = testgen.toolbar_select( + table_group_id = testgen.select( options=table_groups_df, value_column="id", display_column="table_groups_name", @@ -49,8 +49,8 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N ) with suite_filter_column: - test_suites_df = get_db_test_suite_choices(project_code, table_groups_id) - test_suite_id = testgen.toolbar_select( + test_suites_df = get_db_test_suite_choices(project_code, table_group_id) + test_suite_id = testgen.select( options=test_suites_df, value_column="id", display_column="test_suite", @@ -65,7 +65,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N testgen.whitespace(0.5) list_container = st.container(border=True) - test_runs_df = get_db_test_runs(project_code, table_groups_id, test_suite_id) + test_runs_df = get_db_test_runs(project_code, table_group_id, test_suite_id) run_count = len(test_runs_df) page_index = testgen.paginator(count=run_count, page_size=PAGE_SIZE) diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 780a2ed..ae0205c 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -36,13 +36,14 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N "https://docs.datakitchen.io/article/dataops-testgen-help/create-a-test-suite", ) + table_groups_df = get_db_table_group_choices(project_code) + add_button_onclick = partial(add_test_suite_dialog, project_code, table_groups_df) group_filter_column, actions_column = st.columns([.2, .8], vertical_alignment="bottom") testgen.flex_row_end(actions_column) with group_filter_column: - df_tg = get_db_table_group_choices(project_code) - table_group_id = testgen.toolbar_select( - options=df_tg, + table_group_id = testgen.select( + options=table_groups_df, value_column="id", display_column="table_groups_name", default_value=table_group_id, @@ -59,7 +60,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N ":material/add: Add Test Suite", key="test_suite:keys:add", help="Add a new test suite", - on_click=lambda: add_test_suite_dialog(project_code, df_tg), + on_click=add_button_onclick, ) for _, test_suite in df.iterrows(): @@ -80,7 +81,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N icon="edit", tooltip="Edit test suite", tooltip_position="right", - on_click=partial(edit_test_suite_dialog, project_code, df_tg, test_suite), + on_click=partial(edit_test_suite_dialog, project_code, table_groups_df, test_suite), key=f"test_suite:keys:edit:{test_suite['id']}", ) testgen.button( From 15b3941e81b68b388d380bc188158db1320b6e2b Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 26 Sep 2024 13:04:56 -0400 Subject: [PATCH 04/91] feat(ui): add empty_state widget --- testgen/ui/components/widgets/__init__.py | 1 + testgen/ui/components/widgets/empty_state.py | 71 ++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 testgen/ui/components/widgets/empty_state.py diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py index 5771dfc..e6751f3 100644 --- a/testgen/ui/components/widgets/__init__.py +++ b/testgen/ui/components/widgets/__init__.py @@ -3,6 +3,7 @@ from testgen.ui.components.widgets.breadcrumbs import breadcrumbs from testgen.ui.components.widgets.button import button from testgen.ui.components.widgets.card import card +from testgen.ui.components.widgets.empty_state import EmptyStateText, empty_state from testgen.ui.components.widgets.expander_toggle import expander_toggle from testgen.ui.components.widgets.link import link from testgen.ui.components.widgets.page import ( diff --git a/testgen/ui/components/widgets/empty_state.py b/testgen/ui/components/widgets/empty_state.py new file mode 100644 index 0000000..7297abe --- /dev/null +++ b/testgen/ui/components/widgets/empty_state.py @@ -0,0 +1,71 @@ +import typing +from enum import Enum + +import streamlit as st + +from testgen.ui.components.widgets.button import button +from testgen.ui.components.widgets.link import link +from testgen.ui.components.widgets.page import css_class, whitespace + + +class EmptyStateText(Enum): + Connection = ( + "Begin by connecting your database.", + "TestGen delivers data quality through data profiling, hygiene review, test generation, and test execution.", + ) + TableGroup = ( + "Profile your tables to detect hygiene issues", + "Create table groups for your connected databases to run data profiling and hygiene review.", + ) + Profiling = ( + "Profile your tables to detect hygiene issues", + "Run data profiling on your table groups to understand data types, column contents, and data patterns.", + ) + TestSuite = ( + "Run data validation tests", + "Automatically generate tests from data profiling results or write custom tests for your business rules.", + ) + TestExecution = ( + "Run data validation tests", + "Execute tests to assess data quality of your tables." + ) + + +def empty_state( + text: EmptyStateText, + action_label: str, + link_href: str | None = None, + link_params: dict | None = None, + button_onclick: typing.Callable[..., None] | None = None, + button_icon: str = "add", +) -> None: + with st.container(border=True): + css_class("bg-white") + whitespace(5) + st.html(f""" +
+ {text.value[0]} +

{text.value[1]}

+
+ """) + _, center_column, _ = st.columns([.4, .3, .4]) + with center_column: + if link_href: + link( + label=action_label, + href=link_href, + params=link_params or {}, + right_icon="chevron_right", + underline=False, + height=40, + style="margin: auto; border-radius: 4px; border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", + ) + elif button_onclick: + button( + type_="flat", + label=action_label, + icon=button_icon, + on_click=button_onclick, + style="margin: auto; width: auto; background-color: var(--primary-color);", + ) + whitespace(5) From 2048bba5577facda61d82ed20b33c0939931f3c0 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 26 Sep 2024 13:21:24 -0400 Subject: [PATCH 05/91] refactor(dialogs): move execution dialogs to separate files --- .../application_logs_dialog.py} | 0 .../ui/views/dialogs/generate_tests_dialog.py | 80 ++++++++++++ .../profiling_results_dialog.py} | 0 .../ui/views/dialogs/run_profiling_dialog.py | 84 ++++++++++++ testgen/ui/views/dialogs/run_tests_dialog.py | 94 ++++++++++++++ testgen/ui/views/profiling_anomalies.py | 2 +- testgen/ui/views/project_settings.py | 2 +- testgen/ui/views/table_groups.py | 42 +----- testgen/ui/views/test_definitions.py | 2 +- testgen/ui/views/test_results.py | 2 +- testgen/ui/views/test_suites.py | 121 +----------------- 11 files changed, 266 insertions(+), 163 deletions(-) rename testgen/ui/views/{app_log_modal.py => dialogs/application_logs_dialog.py} (100%) create mode 100644 testgen/ui/views/dialogs/generate_tests_dialog.py rename testgen/ui/views/{profiling_modal.py => dialogs/profiling_results_dialog.py} (100%) create mode 100644 testgen/ui/views/dialogs/run_profiling_dialog.py create mode 100644 testgen/ui/views/dialogs/run_tests_dialog.py diff --git a/testgen/ui/views/app_log_modal.py b/testgen/ui/views/dialogs/application_logs_dialog.py similarity index 100% rename from testgen/ui/views/app_log_modal.py rename to testgen/ui/views/dialogs/application_logs_dialog.py diff --git a/testgen/ui/views/dialogs/generate_tests_dialog.py b/testgen/ui/views/dialogs/generate_tests_dialog.py new file mode 100644 index 0000000..43c88ac --- /dev/null +++ b/testgen/ui/views/dialogs/generate_tests_dialog.py @@ -0,0 +1,80 @@ +import time +import pandas as pd +import streamlit as st + +import testgen.ui.services.test_suite_service as test_suite_service +from testgen.commands.run_generate_tests import run_test_gen_queries +from testgen.ui.components import widgets as testgen + +ALL_TYPES_LABEL = "All Test Types" + + +@st.dialog(title="Generate Tests") +def generate_tests_dialog(test_suite: pd.Series) -> None: + test_suite_id = test_suite["id"] + test_suite_name = test_suite["test_suite"] + table_group_id = test_suite["table_groups_id"] + + selected_set = "" + generation_sets = test_suite_service.get_generation_set_choices() + + if generation_sets: + generation_sets.insert(0, ALL_TYPES_LABEL) + + with st.container(): + selected_set = st.selectbox("Generation Set", generation_sets) + if selected_set == ALL_TYPES_LABEL: + selected_set = "" + + test_ct, unlocked_test_ct, unlocked_edits_ct = test_suite_service.get_test_suite_refresh_warning(test_suite_id) + if test_ct: + unlocked_message = "" + if unlocked_edits_ct > 0: + unlocked_message = "Manual changes have been made to auto-generated tests in this test suite that have not been locked. " + elif unlocked_test_ct > 0: + unlocked_message = "Auto-generated tests are present in this test suite that have not been locked. " + + warning_message = f""" + {unlocked_message} + Generating tests now will overwrite unlocked tests subject to auto-generation based on the latest profiling. + \n\n_Auto-generated Tests: {test_ct}, Unlocked: {unlocked_test_ct}, Edited Unlocked: {unlocked_edits_ct}_ + """ + + with st.container(): + st.warning(warning_message, icon=":material/warning:") + if unlocked_edits_ct > 0: + if st.button("Lock Edited Tests"): + if test_suite_service.lock_edited_tests(test_suite_id): + st.info("Edited tests have been successfully locked.") + + with st.container(): + st.markdown(f"Execute test generation for the test suite **{test_suite_name}**?") + + if testgen.expander_toggle(expand_label="Show CLI command", key="test_suite:keys:generate-tests-show-cli"): + st.code( + f"testgen run-test-generation --table-group-id {table_group_id} --test-suite-key {test_suite_name}", + language="shellSession", + ) + + button_container = st.empty() + status_container = st.empty() + + test_generation_button = None + with button_container: + _, button_column = st.columns([.75, .25]) + with button_column: + test_generation_button = st.button("Generate Tests", use_container_width=True) + + if test_generation_button: + button_container.empty() + status_container.info("Starting test generation ...") + + try: + run_test_gen_queries(table_group_id, test_suite_name, selected_set) + except Exception as e: + status_container.error(f"Test generation encountered errors: {e!s}.") + + status_container.success(f"Test generation completed for test suite **{test_suite_name}**.") + time.sleep(1) + st.cache_data.clear() + st.rerun() diff --git a/testgen/ui/views/profiling_modal.py b/testgen/ui/views/dialogs/profiling_results_dialog.py similarity index 100% rename from testgen/ui/views/profiling_modal.py rename to testgen/ui/views/dialogs/profiling_results_dialog.py diff --git a/testgen/ui/views/dialogs/run_profiling_dialog.py b/testgen/ui/views/dialogs/run_profiling_dialog.py new file mode 100644 index 0000000..3ba1ab7 --- /dev/null +++ b/testgen/ui/views/dialogs/run_profiling_dialog.py @@ -0,0 +1,84 @@ +import time + +import pandas as pd +import streamlit as st + +import testgen.ui.services.query_service as dq +from testgen.commands.run_profiling_bridge import run_profiling_in_background +from testgen.ui.components import widgets as testgen +from testgen.ui.session import session + +LINK_KEY = "run_profiling_dialog:keys:go-to-runs" +LINK_HREF = "profiling-runs" + + +@st.dialog(title="Run Profiling") +def run_profiling_dialog(project_code: str, table_group: pd.Series | None = None, default_table_group_id: str | None = None) -> None: + if table_group is not None and not table_group.empty: + table_group_id: str = table_group["id"] + table_group_name: str = table_group["table_groups_name"] + else: + table_groups_df = get_table_group_options(project_code) + table_group_id: str = testgen.select( + label="Table Group", + options=table_groups_df, + value_column="id", + display_column="table_groups_name", + default_value=default_table_group_id, + required=True, + ) + table_group_name: str = table_groups_df.loc[table_groups_df["id"] == table_group_id, "table_groups_name"].iloc[0] + testgen.whitespace(1) + + with st.container(): + st.markdown(f"Execute profiling for the table group **{table_group_name}**?") + st.markdown(":material/info: _Profiling will be performed in a background process._") + + if testgen.expander_toggle(expand_label="Show CLI command", key="test_suite:keys:run-tests-show-cli"): + st.code(f"testgen run-profile --table-group-id {table_group_id}", language="shellSession") + + button_container = st.empty() + status_container = st.empty() + + with button_container: + _, button_column = st.columns([.85, .15]) + with button_column: + profile_button = st.button("Run Profiling", use_container_width=True, disabled=not table_group_id) + + if profile_button: + button_container.empty() + status_container.info("Starting profiling run ...") + + try: + run_profiling_in_background(table_group_id) + except Exception as e: + status_container.error(f"Profiling run encountered errors: {e!s}.") + + # The second condition is needed for the link to work + if profile_button or st.session_state.get(LINK_KEY): + with status_container.container(): + st.success( + f"Profiling run started for table group **{table_group_name}**." + ) + + if session.current_page != LINK_HREF: + testgen.link( + label="Go to Profiling Runs", + href=LINK_HREF, + params={ "table_group": table_group_id }, + right_icon="chevron_right", + underline=False, + height=40, + key=LINK_KEY, + style="margin-left: auto; border-radius: 4px; border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", + ) + else: + time.sleep(1) + st.cache_data.clear() + st.rerun() + + +@st.cache_data(show_spinner=False) +def get_table_group_options(project_code: str) -> pd.DataFrame: + schema: str = st.session_state["dbschema"] + return dq.run_table_groups_lookup_query(schema, project_code) diff --git a/testgen/ui/views/dialogs/run_tests_dialog.py b/testgen/ui/views/dialogs/run_tests_dialog.py new file mode 100644 index 0000000..b624fb2 --- /dev/null +++ b/testgen/ui/views/dialogs/run_tests_dialog.py @@ -0,0 +1,94 @@ +import time +import pandas as pd +import streamlit as st + +import testgen.ui.services.database_service as db +from testgen.commands.run_execute_tests import run_execution_steps_in_background +from testgen.ui.components import widgets as testgen +from testgen.ui.session import session + +LINK_KEY = "run_tests_dialog:keys:go-to-runs" +LINK_HREF = "test-runs" + + +@st.dialog(title="Run Tests") +def run_tests_dialog(project_code: str, test_suite: pd.Series | None = None, default_test_suite_id: str | None = None) -> None: + if test_suite is not None and not test_suite.empty: + test_suite_id: str = test_suite["id"] + test_suite_name: str = test_suite["test_suite"] + else: + test_suites_df = get_test_suite_options(project_code) + test_suite_id: str = testgen.select( + label="Test Suite", + options=test_suites_df, + value_column="id", + display_column="test_suite", + default_value=default_test_suite_id, + required=True, + ) + test_suite_name: str = test_suites_df.loc[test_suites_df["id"] == test_suite_id, "test_suite"].iloc[0] + testgen.whitespace(1) + + with st.container(): + st.markdown(f"Run tests for the test suite **{test_suite_name}**?") + st.markdown(":material/info: _Test execution will be performed in a background process._") + + if testgen.expander_toggle(expand_label="Show CLI command", key="run_tests_dialog:keys:show-cli"): + st.code( + f"testgen run-tests --project-key {project_code} --test-suite-key {test_suite['test_suite']}", + language="shellSession" + ) + + button_container = st.empty() + status_container = st.empty() + + run_test_button = None + with button_container: + _, button_column = st.columns([.8, .2]) + with button_column: + run_test_button = st.button("Run Tests", use_container_width=True) + + if run_test_button: + button_container.empty() + status_container.info("Starting test run ...") + + try: + run_execution_steps_in_background(project_code, test_suite_name) + except Exception as e: + status_container.error(f"Test run encountered errors: {e!s}.") + + # The second condition is needed for the link to work + if run_test_button or st.session_state.get(LINK_KEY): + with status_container.container(): + st.success( + f"Test run started for test suite **{test_suite_name}**." + ) + + if session.current_page != LINK_HREF: + testgen.link( + label="Go to Test Runs", + href=LINK_HREF, + params={ "test_suite": test_suite_id }, + right_icon="chevron_right", + underline=False, + height=40, + key=LINK_KEY, + style="margin-left: auto; border-radius: 4px; border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", + ) + else: + time.sleep(1) + st.cache_data.clear() + st.rerun() + + +@st.cache_data(show_spinner=False) +def get_test_suite_options(project_code: str) -> pd.DataFrame: + schema: str = st.session_state["dbschema"] + sql = f""" + SELECT test_suites.id::VARCHAR(50), + test_suites.test_suite + FROM {schema}.test_suites + WHERE test_suites.project_code = '{project_code}' + ORDER BY test_suites.test_suite + """ + return db.retrieve_data(sql) diff --git a/testgen/ui/views/profiling_anomalies.py b/testgen/ui/views/profiling_anomalies.py index a944ed9..46fab19 100644 --- a/testgen/ui/views/profiling_anomalies.py +++ b/testgen/ui/views/profiling_anomalies.py @@ -12,7 +12,7 @@ from testgen.ui.navigation.page import Page from testgen.ui.services import project_service from testgen.ui.session import session -from testgen.ui.views.profiling_modal import view_profiling_button +from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button class ProfilingAnomaliesPage(Page): diff --git a/testgen/ui/views/project_settings.py b/testgen/ui/views/project_settings.py index 603d104..f5ab382 100644 --- a/testgen/ui/views/project_settings.py +++ b/testgen/ui/views/project_settings.py @@ -8,7 +8,7 @@ from testgen.ui.navigation.page import Page from testgen.ui.services import form_service, project_service from testgen.ui.session import session -from testgen.ui.views.app_log_modal import view_log_file +from testgen.ui.views.dialogs.application_logs_dialog import view_log_file class ProjectSettingsPage(Page): diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py index 1f82de5..879ab92 100644 --- a/testgen/ui/views/table_groups.py +++ b/testgen/ui/views/table_groups.py @@ -10,12 +10,12 @@ import testgen.ui.services.connection_service as connection_service import testgen.ui.services.form_service as fm import testgen.ui.services.table_group_service as table_group_service -from testgen.commands.run_profiling_bridge import run_profiling_in_background from testgen.ui.components import widgets as testgen from testgen.ui.navigation.page import Page from testgen.ui.services import project_service from testgen.ui.services.string_service import empty_if_null from testgen.ui.session import session +from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog class TableGroupsPage(Page): @@ -114,7 +114,7 @@ def render(self, connection_id: str, **_kwargs) -> None: testgen.button( type_="stroked", label="Run Profiling", - on_click=partial(run_profiling_dialog, table_group), + on_click=partial(run_profiling_dialog, project_code, table_group), key=f"tablegroups:keys:runprofiling:{table_group['id']}", ) @@ -172,44 +172,6 @@ def delete_table_group_dialog(self, table_group: pd.Series): st.rerun() -@st.dialog(title="Run Profiling") -def run_profiling_dialog(table_group: pd.Series) -> None: - table_group_id = table_group["id"] - - with st.container(): - st.markdown( - f"Execute profiling for the Table Group :green[{table_group['table_groups_name']}]?" - " Profiling will be performed in a background process" - ) - - if testgen.expander_toggle(expand_label="Show CLI command", key="test_suite:keys:run-tests-show-cli"): - st.code(f"testgen run-profile --table-group-id {table_group_id}", language="shellSession") - - button_container = st.empty() - status_container = st.empty() - - with button_container: - _, button_column = st.columns([.85, .15]) - with button_column: - profile_button = st.button("Start", use_container_width=True) - - if profile_button: - button_container.empty() - - status_container.info("Executing Profiling...") - - try: - run_profiling_in_background(table_group_id) - except Exception as e: - status_container.empty() - status_container.error(f"Process started with errors: {e!s}.") - - status_container.empty() - status_container.success( - "Process has successfully started. Check 'Data Profiling' item in the menu to see the progress." - ) - - def show_table_group_form(mode, project_code: str, connection: dict, table_group: pd.Series | None = None): connection_id = connection["connection_id"] table_groups_settings_tab, table_groups_preview_tab = st.tabs(["Table Group Settings", "Test"]) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index eec1f7e..baae650 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -16,7 +16,7 @@ from testgen.ui.services import authentication_service, project_service from testgen.ui.services.string_service import empty_if_null, snake_case_to_title_case from testgen.ui.session import session -from testgen.ui.views.profiling_modal import view_profiling_button +from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button LOG = logging.getLogger("testgen") diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 8f3a4c9..0358595 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -15,7 +15,7 @@ from testgen.ui.services import authentication_service, project_service from testgen.ui.services.string_service import empty_if_null from testgen.ui.session import session -from testgen.ui.views.profiling_modal import view_profiling_button +from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button from testgen.ui.views.test_definitions import show_test_form_by_id ALWAYS_SPIN = False diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index ae0205c..f6c9703 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -9,8 +9,6 @@ import testgen.ui.services.form_service as fm import testgen.ui.services.query_service as dq import testgen.ui.services.test_suite_service as test_suite_service -from testgen.commands.run_execute_tests import run_execution_steps_in_background -from testgen.commands.run_generate_tests import run_test_gen_queries from testgen.commands.run_observability_exporter import export_test_results from testgen.common import date_service from testgen.ui.components import widgets as testgen @@ -18,6 +16,8 @@ from testgen.ui.navigation.page import Page from testgen.ui.services.string_service import empty_if_null from testgen.ui.session import session +from testgen.ui.views.dialogs.generate_tests_dialog import generate_tests_dialog +from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog from testgen.utils import to_int @@ -327,123 +327,6 @@ def delete_test_suite_dialog(selected_test_suite): st.rerun() -@st.dialog(title="Run Tests") -def run_tests_dialog(project_code, selected_test_suite): - test_suite_key = selected_test_suite["test_suite"] - start_process_button_message = "Start" - - with st.container(): - st.markdown(f"Run tests for the test suite :green[{test_suite_key}]?") - - if testgen.expander_toggle(expand_label="Show CLI command", key="test_suite:keys:run-tests-show-cli"): - st.code( - f"testgen run-tests --project-key {project_code} --test-suite-key {selected_test_suite['test_suite']}", - language="shellSession" - ) - - button_container = st.empty() - status_container = st.empty() - - run_test_button = None - with button_container: - _, button_column = st.columns([.85, .15]) - with button_column: - run_test_button = st.button(start_process_button_message, use_container_width=True) - - if run_test_button: - button_container.empty() - - status_container.info(f"Running tests for test suite {test_suite_key}") - - try: - run_execution_steps_in_background(project_code, test_suite_key) - except Exception as e: - status_container.empty() - status_container.error(f"Process started with errors: {e!s}.") - - status_container.empty() - status_container.success( - "Process has successfully started. Check details in menu item 'Data Quality Testing'." - ) - - -@st.dialog(title="Generate Tests") -def generate_tests_dialog(selected_test_suite): - test_suite_id = selected_test_suite["id"] - test_suite_key = selected_test_suite["test_suite"] - table_group_id = selected_test_suite["table_groups_id"] - start_process_button_message = "Start" - - with st.container(): - st.markdown(f"Execute the test generation for test suite :green[{test_suite_key}]?") - - warning_container = st.container() - options_container = st.container() - - if testgen.expander_toggle(expand_label="Show CLI command", key="test_suite:keys:generate-tests-show-cli"): - st.code( - f"testgen run-test-generation --table-group-id {table_group_id} --test-suite-key {test_suite_key}", - language="shellSession", - ) - - button_container = st.empty() - status_container = st.empty() - - test_ct, unlocked_test_ct, unlocked_edits_ct = test_suite_service.get_test_suite_refresh_warning(test_suite_id) - if test_ct: - warning_msg = "" - counts_msg = f"\n\nAuto-Generated Tests: {test_ct}, Unlocked: {unlocked_test_ct}, Edited Unlocked: {unlocked_edits_ct}" - if unlocked_edits_ct > 0: - if unlocked_edits_ct > 1: - - warning_msg = "Manual changes have been made to auto-generated tests in this Test Suite that have not been locked. " - else: - warning_msg = "A manual change has been made to an auto-generated test in this Test Suite that has not been locked. " - elif unlocked_test_ct > 0: - warning_msg = "Auto-generated tests are present in this Test Suite that have not been locked. " - warning_msg = f"{warning_msg}Generating tests now will overwrite unlocked tests subject to auto-generation based on the latest profiling.{counts_msg}" - with warning_container: - st.warning(warning_msg) - if unlocked_edits_ct > 0: - lock_edits_button = st.button("Lock Edited Tests") - if lock_edits_button: - edits_locked = test_suite_service.lock_edited_tests(test_suite_id) - if edits_locked: - st.info("Edited tests have been successfully locked.") - - with options_container: - lst_generation_sets = test_suite_service.get_generation_set_choices() - if lst_generation_sets: - lst_generation_sets.insert(0, "(All Test Types)") - str_generation_set = st.selectbox("Generation Set", lst_generation_sets) - if str_generation_set == "(All Test Types)": - str_generation_set = "" - else: - str_generation_set = "" - - test_generation_button = None - with button_container: - _, button_column = st.columns([.85, .15]) - with button_column: - test_generation_button = st.button(start_process_button_message, use_container_width=True) - - if test_generation_button: - button_container.empty() - - table_group_id = selected_test_suite["table_groups_id"] - test_suite_key = selected_test_suite["test_suite"] - status_container.info("Executing Test Generation...") - - try: - run_test_gen_queries(table_group_id, test_suite_key, str_generation_set) - except Exception as e: - status_container.empty() - status_container.error(f"Process had errors: {e!s}.") - - status_container.empty() - status_container.success("Process has successfully finished.") - - @st.dialog(title="Export to Observability") def observability_export_dialog(selected_test_suite): project_key = selected_test_suite["project_code"] From 5c1de82801ad2acb81440af42ab127667c26ef42 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 26 Sep 2024 13:24:15 -0400 Subject: [PATCH 06/91] feat(ui): add empty state messaging and links/actions to all top-level pages --- testgen/ui/queries/project_queries.py | 53 +++++++++++++++++++++++++- testgen/ui/views/overview.py | 31 +++++++++++++-- testgen/ui/views/profiling_summary.py | 48 ++++++++++++++++++++++-- testgen/ui/views/table_groups.py | 13 ++++++- testgen/ui/views/test_runs.py | 54 +++++++++++++++++++++++++-- testgen/ui/views/test_suites.py | 35 ++++++++++++++++- 6 files changed, 220 insertions(+), 14 deletions(-) diff --git a/testgen/ui/queries/project_queries.py b/testgen/ui/queries/project_queries.py index 43eced1..5c08706 100644 --- a/testgen/ui/queries/project_queries.py +++ b/testgen/ui/queries/project_queries.py @@ -1,9 +1,58 @@ +import pandas as pd import streamlit as st +import testgen.ui.services.database_service as db import testgen.ui.services.query_service as query_service @st.cache_data(show_spinner=False) def get_projects(): - str_schema = st.session_state["dbschema"] - return query_service.run_project_lookup_query(str_schema) + schema: str = st.session_state["dbschema"] + return query_service.run_project_lookup_query(schema) + + +@st.cache_data(show_spinner=False) +def get_summary_by_code(project_code: str) -> pd.Series: + schema: str = st.session_state["dbschema"] + sql = f""" + SELECT ( + SELECT COUNT(*) AS count + FROM {schema}.connections + WHERE connections.project_code = '{project_code}' + ) AS connections_ct, + ( + SELECT connection_id + FROM {schema}.connections + WHERE connections.project_code = '{project_code}' + LIMIT 1 + ) AS default_connection_id, + ( + SELECT COUNT(*) + FROM {schema}.table_groups + WHERE table_groups.project_code = '{project_code}' + ) AS table_groups_ct, + ( + SELECT COUNT(*) + FROM {schema}.profiling_runs + LEFT JOIN {schema}.table_groups ON profiling_runs.table_groups_id = table_groups.id + WHERE table_groups.project_code = '{project_code}' + ) AS profiling_runs_ct, + ( + SELECT COUNT(*) + FROM {schema}.test_suites + WHERE test_suites.project_code = '{project_code}' + ) AS test_suites_ct, + ( + SELECT COUNT(*) + FROM {schema}.test_definitions + LEFT JOIN {schema}.test_suites ON test_definitions.test_suite_id = test_suites.id + WHERE test_suites.project_code = '{project_code}' + ) AS test_definitions_ct, + ( + SELECT COUNT(*) + FROM {schema}.test_runs + LEFT JOIN {schema}.test_suites ON test_runs.test_suite_id = test_suites.id + WHERE test_suites.project_code = '{project_code}' + ) AS test_runs_ct; + """ + return db.retrieve_data(sql).iloc[0] diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index 727d643..dccd338 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -9,6 +9,7 @@ from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page +from testgen.ui.queries import project_queries from testgen.ui.services import test_suite_service from testgen.ui.session import session from testgen.utils import to_int @@ -24,21 +25,45 @@ class OverviewPage(Page): menu_item = MenuItem(icon="home", label="Overview", order=0) def render(self, project_code: str | None = None, **_kwargs): - project_code = project_code or session.project - table_groups_df: pd.DataFrame = get_table_groups_summary(project_code) - testgen.page_header( "Project Overview", "https://docs.datakitchen.io/article/dataops-testgen-help/introduction-to-dataops-testgen", ) + project_code = project_code or session.project + table_groups_df: pd.DataFrame = get_table_groups_summary(project_code) render_project_summary(table_groups_df) + if render_empty_state(project_code): + return + st.html(f'
Table Groups ({len(table_groups_df.index)})
') for index, table_group in table_groups_df.iterrows(): render_table_group_card(table_group, project_code, index) +def render_empty_state(project_code: str) -> bool: + project_summary_df = project_queries.get_summary_by_code(project_code) + if project_summary_df["profiling_runs_ct"] or project_summary_df["test_runs_ct"]: + return False + + testgen.whitespace(3) + if not project_summary_df["connections_ct"]: + testgen.empty_state( + text=testgen.EmptyStateText.Connection, + action_label="Go to Connections", + link_href="connections", + ) + else: + testgen.empty_state( + text=testgen.EmptyStateText.Profiling if project_summary_df["table_groups_ct"] else testgen.EmptyStateText.TableGroup, + action_label="Go to Table Groups", + link_href="connections:table-groups", + link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } + ) + return True + + def render_project_summary(table_groups: pd.DataFrame) -> None: project_column, _ = st.columns([.5, .5]) with project_column: diff --git a/testgen/ui/views/profiling_summary.py b/testgen/ui/views/profiling_summary.py index e3b02d3..bce1650 100644 --- a/testgen/ui/views/profiling_summary.py +++ b/testgen/ui/views/profiling_summary.py @@ -13,7 +13,10 @@ from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page +from testgen.ui.queries import project_queries +from testgen.ui.services import authentication_service from testgen.ui.session import session +from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog from testgen.utils import to_int FORM_DATA_WIDTH = 400 @@ -28,13 +31,15 @@ class DataProfilingPage(Page): menu_item = MenuItem(icon="problem", label="Data Profiling", order=1) def render(self, project_code: str | None = None, table_group_id: str | None = None, **_kwargs) -> None: - project_code = project_code or session.project - testgen.page_header( "Profiling Runs", "https://docs.datakitchen.io/article/dataops-testgen-help/investigate-profiling", ) + project_code = project_code or session.project + if render_empty_state(project_code): + return + group_filter_column, actions_column = st.columns([.3, .7], vertical_alignment="bottom") with group_filter_column: @@ -48,7 +53,15 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N label="Table Group", ) - testgen.flex_row_end(actions_column) + with actions_column: + testgen.flex_row_end() + + if authentication_service.current_user_has_edit_role(): + st.button( + ":material/play_arrow: Run Profiling", + help="Run profiling for a table group", + on_click=partial(run_profiling_dialog, project_code, None, table_group_id) + ) fm.render_refresh_button(actions_column) testgen.whitespace(0.5) @@ -80,6 +93,35 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N testgen.divider(-4, 4) +def render_empty_state(project_code: str) -> bool: + project_summary_df = project_queries.get_summary_by_code(project_code) + if project_summary_df["profiling_runs_ct"]: + return False + + testgen.whitespace(5) + if not project_summary_df["connections_ct"]: + testgen.empty_state( + text=testgen.EmptyStateText.Connection, + action_label="Go to Connections", + link_href="connections", + ) + elif not project_summary_df["table_groups_ct"]: + testgen.empty_state( + text=testgen.EmptyStateText.TableGroup, + action_label="Go to Table Groups", + link_href="connections:table-groups", + link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } + ) + else: + testgen.empty_state( + text=testgen.EmptyStateText.Profiling, + action_label="Run Profiling", + button_onclick=partial(run_profiling_dialog, project_code), + button_icon="play_arrow", + ) + return True + + def render_profiling_run_row(profiling_run: pd.Series, column_spec: list[int]) -> None: profiling_run_id = profiling_run["profiling_run_id"] status = profiling_run["status"] diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py index 879ab92..887bc09 100644 --- a/testgen/ui/views/table_groups.py +++ b/testgen/ui/views/table_groups.py @@ -46,11 +46,20 @@ def render(self, connection_id: str, **_kwargs) -> None: ], ) + df = table_group_service.get_by_connection(project_code, connection_id) + + if df.empty: + testgen.whitespace(3) + testgen.empty_state( + text=testgen.EmptyStateText.TableGroup, + action_label="Add Table Group", + button_onclick=partial(self.add_table_group_dialog, project_code, connection), + ) + return + _, actions_column = st.columns([.1, .9], vertical_alignment="bottom") testgen.flex_row_end(actions_column) - df = table_group_service.get_by_connection(project_code, connection_id) - for _, table_group in df.iterrows(): with testgen.card(title=table_group["table_groups_name"]) as table_group_card: with table_group_card.actions: diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 028dfeb..1733dc0 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -13,7 +13,10 @@ from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page +from testgen.ui.queries import project_queries +from testgen.ui.services import authentication_service from testgen.ui.session import session +from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog from testgen.utils import to_int PAGE_SIZE = 10 @@ -28,13 +31,15 @@ class TestRunsPage(Page): menu_item = MenuItem(icon="labs", label="Data Quality Testing", order=2) def render(self, project_code: str | None = None, table_group_id: str | None = None, test_suite_id: str | None = None, **_kwargs) -> None: - project_code = project_code or st.session_state["project"] - testgen.page_header( "Test Runs", "https://docs.datakitchen.io/article/dataops-testgen-help/test-results", ) + project_code = project_code or session.project + if render_empty_state(project_code): + return + group_filter_column, suite_filter_column, actions_column = st.columns([.3, .3, .4], vertical_alignment="bottom") with group_filter_column: @@ -59,7 +64,16 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N label="Test Suite", ) - testgen.flex_row_end(actions_column) + with actions_column: + testgen.flex_row_end(actions_column) + + if authentication_service.current_user_has_edit_role(): + st.button( + ":material/play_arrow: Run Tests", + help="Run tests for a test suite", + on_click=partial(run_tests_dialog, project_code, None, test_suite_id) + ) + fm.render_refresh_button(actions_column) testgen.whitespace(0.5) @@ -90,6 +104,40 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N testgen.divider(-4, 4) +def render_empty_state(project_code: str) -> bool: + project_summary_df = project_queries.get_summary_by_code(project_code) + if project_summary_df["test_runs_ct"]: + return False + + testgen.whitespace(5) + if not project_summary_df["connections_ct"]: + testgen.empty_state( + text=testgen.EmptyStateText.Connection, + action_label="Go to Connections", + link_href="connections", + ) + elif not project_summary_df["table_groups_ct"]: + testgen.empty_state( + text=testgen.EmptyStateText.TableGroup, + action_label="Go to Table Groups", + link_href="connections:table-groups", + link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } + ) + elif not project_summary_df["test_suites_ct"] or not project_summary_df["test_definitions_ct"]: + testgen.empty_state( + text=testgen.EmptyStateText.TestSuite, + action_label="Go to Test Suites", + link_href="test-suites", + ) + else: + testgen.empty_state( + text=testgen.EmptyStateText.TestExecution, + action_label="Run Tests", + button_onclick=partial(run_tests_dialog, project_code), + button_icon="play_arrow", + ) + return True + def render_test_run_row(test_run: pd.Series, column_spec: list[int]) -> None: test_run_id = test_run["test_run_id"] status = test_run["status"] diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index f6c9703..9b9ac5b 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -14,6 +14,7 @@ from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page +from testgen.ui.queries import project_queries from testgen.ui.services.string_service import empty_if_null from testgen.ui.session import session from testgen.ui.views.dialogs.generate_tests_dialog import generate_tests_dialog @@ -29,15 +30,19 @@ class TestSuitesPage(Page): menu_item = MenuItem(icon="list_alt", label="Test Suites", order=3) def render(self, project_code: str | None = None, table_group_id: str | None = None, **_kwargs) -> None: - project_code = st.session_state["project"] testgen.page_header( "Test Suites", "https://docs.datakitchen.io/article/dataops-testgen-help/create-a-test-suite", ) + project_code = project_code or session.project table_groups_df = get_db_table_group_choices(project_code) add_button_onclick = partial(add_test_suite_dialog, project_code, table_groups_df) + + if render_empty_state(project_code, add_button_onclick): + return + group_filter_column, actions_column = st.columns([.2, .8], vertical_alignment="bottom") testgen.flex_row_end(actions_column) @@ -152,6 +157,34 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N ) +def render_empty_state(project_code: str, add_button_onclick: partial) -> bool: + project_summary_df = project_queries.get_summary_by_code(project_code) + if project_summary_df["test_suites_ct"]: + return False + + testgen.whitespace(5) + if not project_summary_df["connections_ct"]: + testgen.empty_state( + text=testgen.EmptyStateText.Connection, + action_label="Go to Connections", + link_href="connections", + ) + elif not project_summary_df["table_groups_ct"]: + testgen.empty_state( + text=testgen.EmptyStateText.TableGroup, + action_label="Go to Table Groups", + link_href="connections:table-groups", + link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } + ) + else: + testgen.empty_state( + text=testgen.EmptyStateText.TestSuite, + action_label="Add Test Suite", + button_onclick=add_button_onclick, + ) + return True + + @st.cache_data(show_spinner=False) def get_db_table_group_choices(project_code): schema = st.session_state["dbschema"] From 40b24e9bbc58bbe1f1a5b6b3d47302855831b215 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 26 Sep 2024 13:26:03 -0400 Subject: [PATCH 07/91] misc(ui): disable buttons when not relevant and styling improvements --- testgen/common/date_service.py | 5 ++++- testgen/ui/queries/test_suite_queries.py | 20 +++++++++++++++++- testgen/ui/services/query_service.py | 23 ++++++++++---------- testgen/ui/views/overview.py | 27 ++++++++++++++++-------- testgen/ui/views/table_groups.py | 13 +++++++++--- testgen/ui/views/test_definitions.py | 11 ++++++++-- testgen/ui/views/test_suites.py | 10 +++++++-- 7 files changed, 80 insertions(+), 29 deletions(-) diff --git a/testgen/common/date_service.py b/testgen/common/date_service.py index 28e4b06..e5e89c1 100644 --- a/testgen/common/date_service.py +++ b/testgen/common/date_service.py @@ -68,7 +68,10 @@ def get_timezoned_now(streamlit_session): return get_timezoned_timestamp(streamlit_session, value) -def get_formatted_duration(duration: str) -> str: +def get_formatted_duration(duration: str | None) -> str: + if not duration: + return "--" + hour, minute, second = duration.split(":") formatted = "" if int(hour): diff --git a/testgen/ui/queries/test_suite_queries.py b/testgen/ui/queries/test_suite_queries.py index 80a3fcc..5c696c4 100644 --- a/testgen/ui/queries/test_suite_queries.py +++ b/testgen/ui/queries/test_suite_queries.py @@ -67,6 +67,18 @@ def get_by_project(schema, project_code, table_group_id=None): test_runs.id = test_results.test_run_id ) GROUP BY test_runs.id + ), + test_defs AS ( + SELECT test_suite_id, + COUNT(*) as count + FROM {schema}.test_definitions + GROUP BY test_suite_id + ), + last_profile_date AS ( + SELECT table_groups_id, + MAX(profiling_starttime) as profiling_starttime + FROM {schema}.profiling_runs + GROUP BY table_groups_id ) SELECT suites.id::VARCHAR(50), @@ -84,7 +96,9 @@ def get_by_project(schema, project_code, table_group_id=None): suites.component_key, suites.component_type, suites.component_name, + test_defs.count as test_ct, last_gen_date.auto_gen_date as latest_auto_gen_date, + last_profile_date.profiling_starttime as latest_profiling_date, last_run.id as latest_run_id, last_run.test_starttime as latest_run_start, last_run.test_ct as last_run_test_ct, @@ -98,10 +112,14 @@ def get_by_project(schema, project_code, table_group_id=None): ON (suites.id = last_gen_date.test_suite_id) LEFT JOIN last_run ON (suites.id = last_run.test_suite_id) + LEFT JOIN test_defs + ON (suites.id = test_defs.test_suite_id) LEFT JOIN {schema}.connections AS connections ON (connections.connection_id = suites.connection_id) LEFT JOIN {schema}.table_groups as groups - ON (groups.id = suites.table_groups_id) + ON (groups.id = suites.table_groups_id) + LEFT JOIN last_profile_date + ON (groups.id = last_profile_date.table_groups_id) WHERE suites.project_code = '{project_code}' """ diff --git a/testgen/ui/services/query_service.py b/testgen/ui/services/query_service.py index 3343010..284a5da 100644 --- a/testgen/ui/services/query_service.py +++ b/testgen/ui/services/query_service.py @@ -1,3 +1,4 @@ +import pandas as pd import testgen.ui.services.database_service as db """ @@ -84,35 +85,35 @@ def run_connections_lookup_query(str_schema, str_project_code): return db.retrieve_data(str_sql) -def run_table_groups_lookup_query(str_schema, str_project_code, connection_id=None, table_group_id=None): - str_sql = f""" +def run_table_groups_lookup_query(schema: str, project_code: str, connection_id: str | None = None, table_group_id: str | None = None) -> pd.DataFrame: + sql = f""" SELECT tg.id::VARCHAR(50), tg.table_groups_name, tg.connection_id, tg.table_group_schema - FROM {str_schema}.table_groups tg + FROM {schema}.table_groups tg """ if connection_id: - str_sql += f""" - inner join {str_schema}.connections c on c.connection_id = tg.connection_id + sql += f""" + inner join {schema}.connections c on c.connection_id = tg.connection_id """ - str_sql += f""" - WHERE tg.project_code = '{str_project_code}' + sql += f""" + WHERE tg.project_code = '{project_code}' """ if table_group_id: - str_sql += f""" + sql += f""" AND tg.id = '{table_group_id}'::UUID """ if connection_id: - str_sql += f""" + sql += f""" AND c.id = '{connection_id}'::UUID """ - str_sql += """ + sql += """ ORDER BY table_groups_name """ - return db.retrieve_data(str_sql) + return db.retrieve_data(sql) def run_table_lookup_query(str_schema, str_table_groups_id): diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index dccd338..00c9525 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -132,9 +132,19 @@ def render_table_group_card(table_group: pd.Series, project_code: str, key: int) ) anomaly_count = to_int(table_group["latest_anomalies_ct"]) - st.html(f""" - {anomaly_count} hygiene issues in {to_int(table_group["latest_profile_table_ct"])} tables - """) + with st.container(): + testgen.flex_row_start() + testgen.text(f""" + {to_int(table_group['latest_profile_table_ct'])} tables  |  + {to_int(table_group['latest_profile_column_ct'])} tables  | + """) + testgen.link( + label=f"{anomaly_count} hygiene issues", + href="profiling-runs:hygiene", + params={ "run_id": str(table_group["latest_profile_id"]) }, + width=150, + key=f"overview:keys:go-to-issues:{table_group['latest_profile_id']}", + ) if anomaly_count: testgen.summary_bar( @@ -156,11 +166,8 @@ def render_table_group_card(table_group: pd.Series, project_code: str, key: int) total_tests = to_int(table_group["latest_tests_ct"]) if total_tests: passed_tests = to_int(table_group["latest_tests_passed_ct"]) - - st.html(f""" -

{round(passed_tests * 100 / total_tests)}% passed

- {total_tests} tests in {to_int(table_group["latest_tests_suite_ct"])} test suites - """) + testgen.text(f"{round(passed_tests * 100 / total_tests)}% passed") + testgen.text(f"{total_tests} tests in {to_int(table_group['latest_tests_suite_ct'])} test suites", "margin: 12px 0 12px;") testgen.summary_bar( items=[ @@ -207,7 +214,7 @@ def render_test_suite_item(test_suite: pd.Series, column_spec: list[int]) -> Non params={ "test_suite_id": str(test_suite["id"]) }, key=f"overview:keys:go-to-definitions:{test_suite['id']}", ) - testgen.caption(f"{to_int(test_suite['last_run_test_ct'])} tests", "margin-top: -16px;") + testgen.caption(f"{to_int(test_suite['test_ct'])} tests", "margin-top: -16px;") with generation_column: if (latest_generation := test_suite["latest_auto_gen_date"]) and pd.notnull(latest_generation): @@ -257,6 +264,7 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame: latest_run.id, latest_run.profiling_starttime, latest_run.table_ct, + latest_run.column_ct, latest_run.anomaly_ct, SUM( CASE @@ -359,6 +367,7 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame: latest_profile.id as latest_profile_id, latest_profile.profiling_starttime as latest_profile_start, latest_profile.table_ct as latest_profile_table_ct, + latest_profile.column_ct as latest_profile_column_ct, latest_profile.anomaly_ct as latest_anomalies_ct, latest_profile.definite_ct as latest_anomalies_definite_ct, latest_profile.likely_ct as latest_anomalies_likely_ct, diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py index 887bc09..0fcd627 100644 --- a/testgen/ui/views/table_groups.py +++ b/testgen/ui/views/table_groups.py @@ -164,11 +164,18 @@ def delete_table_group_dialog(self, table_group: pd.Series): ) accept_cascade_delete = st.toggle("I accept deletion of this Table Group and all related TestGen data.") - with st.form("Delete Table Group", clear_on_submit=True): + with st.form("Delete Table Group", clear_on_submit=True, border=False): disable_delete_button = authentication_service.current_user_has_read_role() or ( not can_be_deleted and not accept_cascade_delete ) - delete = st.form_submit_button("Delete", disabled=disable_delete_button, type="primary") + _, button_column = st.columns([.85, .15]) + with button_column: + delete = st.form_submit_button( + "Delete", + disabled=disable_delete_button, + type="primary", + use_container_width=True, + ) if delete: if table_group_service.are_table_groups_in_use([table_group_name]): @@ -375,7 +382,7 @@ def show_table_group_form(mode, project_code: str, connection: dict, table_group success_message = "Changes have been saved successfully. " else: table_group_service.add(entity) - success_message = "New Table Group added successfully. " + success_message = "New table group added successfully. " except IntegrityError: st.error("A Table Group with the same name already exists. ") return diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index baae650..9348c32 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -156,9 +156,16 @@ def delete_test_dialog(selected_test_definition): int_data_width=700, ) - with st.form("Delete Test Definition", clear_on_submit=True): + with st.form("Delete Test Definition", clear_on_submit=True, border=False): disable_delete_button = authentication_service.current_user_has_read_role() or not can_be_deleted - delete = st.form_submit_button("Delete", disabled=disable_delete_button, type="primary") + _, button_column = st.columns([.85, .15]) + with button_column: + delete = st.form_submit_button( + "Delete", + disabled=disable_delete_button, + type="primary", + use_container_width=True, + ) if delete: test_definition_service.delete([test_definition_id]) diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 9b9ac5b..8c4a5bc 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -103,7 +103,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with main_section: testgen.no_flex_gap() testgen.link( - label=f"{to_int(test_suite['last_run_test_ct'])} tests definitions", + label=f"{to_int(test_suite['test_ct'])} tests definitions", href="test-suites:definitions", params={ "test_suite_id": test_suite["id"] }, right_icon="chevron_right", @@ -143,16 +143,22 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N if user_can_edit: with actions_section: + run_disabled = not to_int(test_suite["test_ct"]) testgen.button( type_="stroked", label="Run Tests", + tooltip="No test definitions to run" if run_disabled else None, on_click=partial(run_tests_dialog, project_code, test_suite), + disabled=run_disabled, key=f"test_suite:keys:runtests:{test_suite['id']}", ) + generate_disabled = pd.isnull(test_suite["latest_profiling_date"]) testgen.button( type_="stroked", label="Generate Tests", + tooltip="No profiling data available for test generation" if generate_disabled else None, on_click=partial(generate_tests_dialog, test_suite), + disabled=generate_disabled, key=f"test_suite:keys:generatetests:{test_suite['id']}", ) @@ -303,7 +309,7 @@ def show_test_suite(mode, project_code, table_groups_df, selected=None): success_message = ( "Changes have been saved successfully. " if mode == "edit" - else "New TestSuite added successfully. " + else "New test suite added successfully. " ) st.success(success_message) time.sleep(1) From 4826fc93a41b91c59d4b280212395fd40b902489 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 26 Sep 2024 13:42:41 -0400 Subject: [PATCH 08/91] style(ui): lint changed files --- testgen/ui/components/widgets/select.py | 2 +- testgen/ui/navigation/page.py | 2 +- testgen/ui/queries/test_suite_queries.py | 20 +++++++++---------- testgen/ui/services/project_service.py | 2 +- testgen/ui/services/query_service.py | 1 + .../ui/views/dialogs/generate_tests_dialog.py | 5 +++-- .../ui/views/dialogs/run_profiling_dialog.py | 2 +- testgen/ui/views/dialogs/run_tests_dialog.py | 3 ++- testgen/ui/views/overview.py | 2 +- testgen/ui/views/profiling_anomalies.py | 2 +- testgen/ui/views/profiling_summary.py | 2 +- testgen/ui/views/table_groups.py | 2 +- testgen/ui/views/test_definitions.py | 2 +- testgen/ui/views/test_runs.py | 2 +- testgen/ui/views/test_suites.py | 2 +- 15 files changed, 27 insertions(+), 24 deletions(-) diff --git a/testgen/ui/components/widgets/select.py b/testgen/ui/components/widgets/select.py index 7183887..e259410 100644 --- a/testgen/ui/components/widgets/select.py +++ b/testgen/ui/components/widgets/select.py @@ -49,4 +49,4 @@ def update_query_params(): if selected and isinstance(options, pd.DataFrame): return options.loc[options[display_column] == selected, value_column].iloc[0] - return selected \ No newline at end of file + return selected diff --git a/testgen/ui/navigation/page.py b/testgen/ui/navigation/page.py index b7a53cc..369e43f 100644 --- a/testgen/ui/navigation/page.py +++ b/testgen/ui/navigation/page.py @@ -40,7 +40,7 @@ def _navigate(self) -> None: session.current_page_args = session.current_page_args or {} self._validate_project_query_param() - + self.render(**session.current_page_args) def _validate_project_query_param(self) -> None: diff --git a/testgen/ui/queries/test_suite_queries.py b/testgen/ui/queries/test_suite_queries.py index 5c696c4..765a14c 100644 --- a/testgen/ui/queries/test_suite_queries.py +++ b/testgen/ui/queries/test_suite_queries.py @@ -99,12 +99,12 @@ def get_by_project(schema, project_code, table_group_id=None): test_defs.count as test_ct, last_gen_date.auto_gen_date as latest_auto_gen_date, last_profile_date.profiling_starttime as latest_profiling_date, - last_run.id as latest_run_id, - last_run.test_starttime as latest_run_start, + last_run.id as latest_run_id, + last_run.test_starttime as latest_run_start, last_run.test_ct as last_run_test_ct, - last_run.passed_ct as last_run_passed_ct, - last_run.warning_ct as last_run_warning_ct, - last_run.failed_ct as last_run_failed_ct, + last_run.passed_ct as last_run_passed_ct, + last_run.warning_ct as last_run_warning_ct, + last_run.failed_ct as last_run_failed_ct, last_run.error_ct as last_run_error_ct, last_run.dismissed_ct as last_run_dismissed_ct FROM {schema}.test_suites as suites @@ -114,15 +114,15 @@ def get_by_project(schema, project_code, table_group_id=None): ON (suites.id = last_run.test_suite_id) LEFT JOIN test_defs ON (suites.id = test_defs.test_suite_id) - LEFT JOIN {schema}.connections AS connections - ON (connections.connection_id = suites.connection_id) - LEFT JOIN {schema}.table_groups as groups + LEFT JOIN {schema}.connections AS connections + ON (connections.connection_id = suites.connection_id) + LEFT JOIN {schema}.table_groups as groups ON (groups.id = suites.table_groups_id) LEFT JOIN last_profile_date ON (groups.id = last_profile_date.table_groups_id) WHERE suites.project_code = '{project_code}' """ - + if table_group_id: sql += f""" AND suites.table_groups_id = '{table_group_id}' @@ -131,7 +131,7 @@ def get_by_project(schema, project_code, table_group_id=None): sql += """ ORDER BY suites.test_suite; """ - + return db.retrieve_data(sql) diff --git a/testgen/ui/services/project_service.py b/testgen/ui/services/project_service.py index 24a41ab..fa049b7 100644 --- a/testgen/ui/services/project_service.py +++ b/testgen/ui/services/project_service.py @@ -24,4 +24,4 @@ def get_project_by_code(code: str): if not code: return None return query_service.get_project_by_code(session.dbschema, code) - \ No newline at end of file + diff --git a/testgen/ui/services/query_service.py b/testgen/ui/services/query_service.py index 284a5da..088c4b0 100644 --- a/testgen/ui/services/query_service.py +++ b/testgen/ui/services/query_service.py @@ -1,4 +1,5 @@ import pandas as pd + import testgen.ui.services.database_service as db """ diff --git a/testgen/ui/views/dialogs/generate_tests_dialog.py b/testgen/ui/views/dialogs/generate_tests_dialog.py index 43c88ac..7647645 100644 --- a/testgen/ui/views/dialogs/generate_tests_dialog.py +++ b/testgen/ui/views/dialogs/generate_tests_dialog.py @@ -1,4 +1,5 @@ import time + import pandas as pd import streamlit as st @@ -49,7 +50,7 @@ def generate_tests_dialog(test_suite: pd.Series) -> None: with st.container(): st.markdown(f"Execute test generation for the test suite **{test_suite_name}**?") - + if testgen.expander_toggle(expand_label="Show CLI command", key="test_suite:keys:generate-tests-show-cli"): st.code( f"testgen run-test-generation --table-group-id {table_group_id} --test-suite-key {test_suite_name}", @@ -75,6 +76,6 @@ def generate_tests_dialog(test_suite: pd.Series) -> None: status_container.error(f"Test generation encountered errors: {e!s}.") status_container.success(f"Test generation completed for test suite **{test_suite_name}**.") - time.sleep(1) + time.sleep(1) st.cache_data.clear() st.rerun() diff --git a/testgen/ui/views/dialogs/run_profiling_dialog.py b/testgen/ui/views/dialogs/run_profiling_dialog.py index 3ba1ab7..b39aa15 100644 --- a/testgen/ui/views/dialogs/run_profiling_dialog.py +++ b/testgen/ui/views/dialogs/run_profiling_dialog.py @@ -73,7 +73,7 @@ def run_profiling_dialog(project_code: str, table_group: pd.Series | None = None style="margin-left: auto; border-radius: 4px; border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", ) else: - time.sleep(1) + time.sleep(1) st.cache_data.clear() st.rerun() diff --git a/testgen/ui/views/dialogs/run_tests_dialog.py b/testgen/ui/views/dialogs/run_tests_dialog.py index b624fb2..3e46e7b 100644 --- a/testgen/ui/views/dialogs/run_tests_dialog.py +++ b/testgen/ui/views/dialogs/run_tests_dialog.py @@ -1,4 +1,5 @@ import time + import pandas as pd import streamlit as st @@ -76,7 +77,7 @@ def run_tests_dialog(project_code: str, test_suite: pd.Series | None = None, def style="margin-left: auto; border-radius: 4px; border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", ) else: - time.sleep(1) + time.sleep(1) st.cache_data.clear() st.rerun() diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index 00c9525..1d987c5 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -135,7 +135,7 @@ def render_table_group_card(table_group: pd.Series, project_code: str, key: int) with st.container(): testgen.flex_row_start() testgen.text(f""" - {to_int(table_group['latest_profile_table_ct'])} tables  |  + {to_int(table_group['latest_profile_table_ct'])} tables  |  {to_int(table_group['latest_profile_column_ct'])} tables  | """) testgen.link( diff --git a/testgen/ui/views/profiling_anomalies.py b/testgen/ui/views/profiling_anomalies.py index 46fab19..7f69db6 100644 --- a/testgen/ui/views/profiling_anomalies.py +++ b/testgen/ui/views/profiling_anomalies.py @@ -29,7 +29,7 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | f"Profiling run with ID '{run_id}' does not exist. Redirecting to list of Profiling Runs ...", "profiling-runs", ) - + run_date, _table_group_id, table_group_name, project_code = run_parentage run_date = date_service.get_timezoned_timestamp(st.session_state, run_date) project_service.set_current_project(project_code) diff --git a/testgen/ui/views/profiling_summary.py b/testgen/ui/views/profiling_summary.py index bce1650..3080ac6 100644 --- a/testgen/ui/views/profiling_summary.py +++ b/testgen/ui/views/profiling_summary.py @@ -97,7 +97,7 @@ def render_empty_state(project_code: str) -> bool: project_summary_df = project_queries.get_summary_by_code(project_code) if project_summary_df["profiling_runs_ct"]: return False - + testgen.whitespace(5) if not project_summary_df["connections_ct"]: testgen.empty_state( diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py index 0fcd627..70eeacc 100644 --- a/testgen/ui/views/table_groups.py +++ b/testgen/ui/views/table_groups.py @@ -56,7 +56,7 @@ def render(self, connection_id: str, **_kwargs) -> None: button_onclick=partial(self.add_table_group_dialog, project_code, connection), ) return - + _, actions_column = st.columns([.1, .9], vertical_alignment="bottom") testgen.flex_row_end(actions_column) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index 9348c32..f9f1ffd 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -127,7 +127,7 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: help="Delete the selected Test Definition", disabled=not selected, ): - delete_test_dialog(selected_test_def) + delete_test_dialog(selected_test_def) @st.dialog("Delete Test") diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 1733dc0..22c1f5d 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -108,7 +108,7 @@ def render_empty_state(project_code: str) -> bool: project_summary_df = project_queries.get_summary_by_code(project_code) if project_summary_df["test_runs_ct"]: return False - + testgen.whitespace(5) if not project_summary_df["connections_ct"]: testgen.empty_state( diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 8c4a5bc..315f76b 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -167,7 +167,7 @@ def render_empty_state(project_code: str, add_button_onclick: partial) -> bool: project_summary_df = project_queries.get_summary_by_code(project_code) if project_summary_df["test_suites_ct"]: return False - + testgen.whitespace(5) if not project_summary_df["connections_ct"]: testgen.empty_state( From 0cf62acbd3eba52518f9edb38869680d30f81a4a Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 30 Sep 2024 19:30:04 -0400 Subject: [PATCH 09/91] fix(ui): update empty state widget --- testgen/ui/components/widgets/__init__.py | 2 +- testgen/ui/components/widgets/empty_state.py | 11 +++++++---- testgen/ui/views/overview.py | 12 +++++++++--- testgen/ui/views/profiling_summary.py | 16 ++++++++++++---- testgen/ui/views/table_groups.py | 4 +++- testgen/ui/views/test_runs.py | 20 +++++++++++++++----- testgen/ui/views/test_suites.py | 17 +++++++++++++---- 7 files changed, 60 insertions(+), 22 deletions(-) diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py index e6751f3..72930bb 100644 --- a/testgen/ui/components/widgets/__init__.py +++ b/testgen/ui/components/widgets/__init__.py @@ -3,7 +3,7 @@ from testgen.ui.components.widgets.breadcrumbs import breadcrumbs from testgen.ui.components.widgets.button import button from testgen.ui.components.widgets.card import card -from testgen.ui.components.widgets.empty_state import EmptyStateText, empty_state +from testgen.ui.components.widgets.empty_state import EmptyStateMessage, empty_state from testgen.ui.components.widgets.expander_toggle import expander_toggle from testgen.ui.components.widgets.link import link from testgen.ui.components.widgets.page import ( diff --git a/testgen/ui/components/widgets/empty_state.py b/testgen/ui/components/widgets/empty_state.py index 7297abe..8b34df0 100644 --- a/testgen/ui/components/widgets/empty_state.py +++ b/testgen/ui/components/widgets/empty_state.py @@ -8,7 +8,7 @@ from testgen.ui.components.widgets.page import css_class, whitespace -class EmptyStateText(Enum): +class EmptyStateMessage(Enum): Connection = ( "Begin by connecting your database.", "TestGen delivers data quality through data profiling, hygiene review, test generation, and test execution.", @@ -32,7 +32,9 @@ class EmptyStateText(Enum): def empty_state( - text: EmptyStateText, + label: str, + icon: str, + message: EmptyStateMessage, action_label: str, link_href: str | None = None, link_params: dict | None = None, @@ -44,8 +46,9 @@ def empty_state( whitespace(5) st.html(f"""
- {text.value[0]} -

{text.value[1]}

+

{label}

+

{icon}

+

{message.value[0]}
{message.value[1]}

""") _, center_column, _ = st.columns([.4, .3, .4]) diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index 1d987c5..c25a62b 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -15,6 +15,7 @@ from testgen.utils import to_int STALE_PROFILE_DAYS = 30 +PAGE_ICON = "home" class OverviewPage(Page): @@ -22,7 +23,7 @@ class OverviewPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, ] - menu_item = MenuItem(icon="home", label="Overview", order=0) + menu_item = MenuItem(icon=PAGE_ICON, label="Overview", order=0) def render(self, project_code: str | None = None, **_kwargs): testgen.page_header( @@ -47,16 +48,21 @@ def render_empty_state(project_code: str) -> bool: if project_summary_df["profiling_runs_ct"] or project_summary_df["test_runs_ct"]: return False + label="Your project is empty" testgen.whitespace(3) if not project_summary_df["connections_ct"]: testgen.empty_state( - text=testgen.EmptyStateText.Connection, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.Connection, action_label="Go to Connections", link_href="connections", ) else: testgen.empty_state( - text=testgen.EmptyStateText.Profiling if project_summary_df["table_groups_ct"] else testgen.EmptyStateText.TableGroup, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.Profiling if project_summary_df["table_groups_ct"] else testgen.EmptyStateMessage.TableGroup, action_label="Go to Table Groups", link_href="connections:table-groups", link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } diff --git a/testgen/ui/views/profiling_summary.py b/testgen/ui/views/profiling_summary.py index 3080ac6..0ce607d 100644 --- a/testgen/ui/views/profiling_summary.py +++ b/testgen/ui/views/profiling_summary.py @@ -21,6 +21,7 @@ FORM_DATA_WIDTH = 400 PAGE_SIZE = 10 +PAGE_ICON = "data_thresholding" class DataProfilingPage(Page): @@ -28,7 +29,7 @@ class DataProfilingPage(Page): can_activate: typing.ClassVar = [ lambda: session.authentication_status, ] - menu_item = MenuItem(icon="problem", label="Data Profiling", order=1) + menu_item = MenuItem(icon=PAGE_ICON, label="Data Profiling", order=1) def render(self, project_code: str | None = None, table_group_id: str | None = None, **_kwargs) -> None: testgen.page_header( @@ -98,23 +99,30 @@ def render_empty_state(project_code: str) -> bool: if project_summary_df["profiling_runs_ct"]: return False + label = "No profiling runs yet" testgen.whitespace(5) if not project_summary_df["connections_ct"]: testgen.empty_state( - text=testgen.EmptyStateText.Connection, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.Connection, action_label="Go to Connections", link_href="connections", ) elif not project_summary_df["table_groups_ct"]: testgen.empty_state( - text=testgen.EmptyStateText.TableGroup, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.TableGroup, action_label="Go to Table Groups", link_href="connections:table-groups", link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } ) else: testgen.empty_state( - text=testgen.EmptyStateText.Profiling, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.Profiling, action_label="Run Profiling", button_onclick=partial(run_profiling_dialog, project_code), button_icon="play_arrow", diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py index 70eeacc..e62787c 100644 --- a/testgen/ui/views/table_groups.py +++ b/testgen/ui/views/table_groups.py @@ -51,7 +51,9 @@ def render(self, connection_id: str, **_kwargs) -> None: if df.empty: testgen.whitespace(3) testgen.empty_state( - text=testgen.EmptyStateText.TableGroup, + label="No table groups yet", + icon="table_view", + message=testgen.EmptyStateMessage.TableGroup, action_label="Add Table Group", button_onclick=partial(self.add_table_group_dialog, project_code, connection), ) diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 22c1f5d..21c4c24 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -20,6 +20,7 @@ from testgen.utils import to_int PAGE_SIZE = 10 +PAGE_ICON = "labs" class TestRunsPage(Page): @@ -28,7 +29,7 @@ class TestRunsPage(Page): lambda: session.authentication_status, lambda: session.project != None or "overview", ] - menu_item = MenuItem(icon="labs", label="Data Quality Testing", order=2) + menu_item = MenuItem(icon=PAGE_ICON, label="Data Quality Testing", order=2) def render(self, project_code: str | None = None, table_group_id: str | None = None, test_suite_id: str | None = None, **_kwargs) -> None: testgen.page_header( @@ -109,29 +110,38 @@ def render_empty_state(project_code: str) -> bool: if project_summary_df["test_runs_ct"]: return False + label="No test runs yet" testgen.whitespace(5) if not project_summary_df["connections_ct"]: testgen.empty_state( - text=testgen.EmptyStateText.Connection, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.Connection, action_label="Go to Connections", link_href="connections", ) elif not project_summary_df["table_groups_ct"]: testgen.empty_state( - text=testgen.EmptyStateText.TableGroup, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.TableGroup, action_label="Go to Table Groups", link_href="connections:table-groups", link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } ) elif not project_summary_df["test_suites_ct"] or not project_summary_df["test_definitions_ct"]: testgen.empty_state( - text=testgen.EmptyStateText.TestSuite, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.TestSuite, action_label="Go to Test Suites", link_href="test-suites", ) else: testgen.empty_state( - text=testgen.EmptyStateText.TestExecution, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.TestExecution, action_label="Run Tests", button_onclick=partial(run_tests_dialog, project_code), button_icon="play_arrow", diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 315f76b..ca00ade 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -21,13 +21,15 @@ from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog from testgen.utils import to_int +PAGE_ICON = "rule" + class TestSuitesPage(Page): path = "test-suites" can_activate: typing.ClassVar = [ lambda: session.authentication_status, ] - menu_item = MenuItem(icon="list_alt", label="Test Suites", order=3) + menu_item = MenuItem(icon=PAGE_ICON, label="Test Suites", order=3) def render(self, project_code: str | None = None, table_group_id: str | None = None, **_kwargs) -> None: @@ -168,23 +170,30 @@ def render_empty_state(project_code: str, add_button_onclick: partial) -> bool: if project_summary_df["test_suites_ct"]: return False + label="No test suites yet" testgen.whitespace(5) if not project_summary_df["connections_ct"]: testgen.empty_state( - text=testgen.EmptyStateText.Connection, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.Connection, action_label="Go to Connections", link_href="connections", ) elif not project_summary_df["table_groups_ct"]: testgen.empty_state( - text=testgen.EmptyStateText.TableGroup, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.TableGroup, action_label="Go to Table Groups", link_href="connections:table-groups", link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } ) else: testgen.empty_state( - text=testgen.EmptyStateText.TestSuite, + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.TestSuite, action_label="Add Test Suite", button_onclick=add_button_onclick, ) From 330aca1220e2585f1ca6a5edf622478acdaf654e Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Tue, 1 Oct 2024 10:19:55 -0400 Subject: [PATCH 10/91] feat(pdf): Improving the PDF report aesthetics --- pyproject.toml | 5 +- testgen/ui/pdf/test_result_report.py | 367 ++++++++++++++++++++++----- testgen/ui/views/test_results.py | 6 +- 3 files changed, 312 insertions(+), 66 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 038d591..116243b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -225,8 +225,9 @@ select = ["A", "F", "S", "I", "T10", "B", "UP", "ISC", "T20", "RSE", "Q", "ARG", # globally ignore the following error codes # * TRY003: Avoid specifying long messages outside the exception class # * S608: Hardcoded SQL -# # F841: Unused local variable (it is instable) -ignore = ["TRY003", "S608", "S404", "F841"] +# * F841: Unused local variable (it is instable) +# * B023: Buggy: https://github.com/astral-sh/ruff/issues/7847 +ignore = ["TRY003", "S608", "S404", "F841", "B023"] # Ignore the following errors in files: # F403 - in __init__.py: We use __all__ in our module files so this behavior is acceptable in __init__.py diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py index 464beaa..e2d8e88 100644 --- a/testgen/ui/pdf/test_result_report.py +++ b/testgen/ui/pdf/test_result_report.py @@ -1,6 +1,20 @@ -from reportlab.lib import enums + +import pandas +from pandas.core.dtypes.common import is_numeric_dtype, is_string_dtype +from reportlab.lib import colors, enums +from reportlab.lib.colors import HexColor from reportlab.lib.styles import ParagraphStyle -from reportlab.platypus import Paragraph, SimpleDocTemplate, Table, TableStyle +from reportlab.lib.units import inch +from reportlab.pdfbase.pdfmetrics import stringWidth +from reportlab.platypus import ( + BalancedColumns, + CondPageBreak, + KeepTogether, + Paragraph, + SimpleDocTemplate, + Table, + TableStyle, +) from testgen.ui.services.database_service import get_schema from testgen.ui.services.test_results_service import ( @@ -9,122 +23,351 @@ get_test_result_history, ) +MARGIN = 0.4 * inch + +SECTION_MIN_AVAILABLE_HEIGHT = 60 + +COLOR_GRAY_BG = HexColor(0xF2F2F2) + +COLOR_GREEN_BG = HexColor(0xDCE4DA) + +COLOR_YELLOW_BG = HexColor(0xA0C84E40, hasAlpha=True) + +COLOR_GREEN_TEXT = HexColor(0x139549) + +COLOR_FADED_TEXT = HexColor(0x404040) + +COLOR_TEST_STATUS = { + "Passed": HexColor(0x94C465), + "Warning": HexColor(0xFCD349), + "Failed": HexColor(0xE94D4A), +} + PARA_STYLE_DEFAULT = ParagraphStyle( "default", fontSize=8, + fontName="Helvetica", ) -PARA_STYLE_INFO = PARA_STYLE_DEFAULT - +PARA_STYLE_TEXT = ParagraphStyle( + "text", + PARA_STYLE_DEFAULT, + fontName="Times-Roman", +) -PARA_STYLE_ERROR = PARA_STYLE_DEFAULT +PARA_STYLE_INFO = ParagraphStyle( + "info", + PARA_STYLE_DEFAULT, + fontName="Helvetica", + backColor=COLOR_YELLOW_BG, + borderPadding=12, + leftIndent=12, + rightIndent=12, + spaceBefore=18, + spaceAfter=18, +) PARA_STYLE_MONO = ParagraphStyle( - "heading_1", + "monospaced", PARA_STYLE_DEFAULT, - + fontName="Courier", + borderPadding=4, + backColor=COLOR_GRAY_BG, + leftIndent=4, + rightIndent=4, + spaceBefore=8, + spaceAfter=8, ) - -PARA_STYLE_H1 = ParagraphStyle( - "heading_1", +PARA_STYLE_FOOTNOTE = ParagraphStyle( + "footnote", PARA_STYLE_DEFAULT, - fontSize=12, - leading=16, + fontSize=6, + fontName="Helvetica-Oblique", + textColor=COLOR_FADED_TEXT, ) + PARA_STYLE_TITLE = ParagraphStyle( "title", PARA_STYLE_DEFAULT, fontSize=18, leading=30, alignment=enums.TA_CENTER, + spaceBefore=12, + spaceAfter=4, + textColor=COLOR_GREEN_TEXT, ) -TABLE_STYLE_SUMMARY = TableStyle( +PARA_STYLE_H1 = ParagraphStyle( + "heading_1", + PARA_STYLE_TITLE, + fontSize=12, + leading=16, + alignment=enums.TA_LEFT, +) + +TABLE_STYLE_DEFAULT = TableStyle( ( - # All cells ("ALIGN", (0, 0), (-1, -1), "LEFT"), ("VALIGN", (0, 0), (-1, -1), "TOP"), ("FONT", (0, 0), (-1, -1), "Helvetica", 7), - - # Header - ("FONT", (0, 0), (0, -1), "Helvetica-Bold"), - ("ALIGN", (0, 0), (0, -1), "RIGHT"), ) ) -def get_report_content(tr_data): +PARA_STYLE_CELL = ParagraphStyle( + "table_cell", + fontSize=7, + fontName="Helvetica", +) + +PARA_STYLE_CELL_NUMERIC = ParagraphStyle( + "table_cell_numeric", + PARA_STYLE_CELL, + alignment=enums.TA_RIGHT, + fontName="Courier", +) + +PARA_STYLE_CELL_NULL = ParagraphStyle( + "table_cell_null", + PARA_STYLE_CELL_NUMERIC, + alignment=enums.TA_CENTER, + textColor=COLOR_FADED_TEXT, +) + + +# One time use styles + + + +TABLE_HEADER_CELL_CMD = ( + ("FONT", "Helvetica-Bold"), + ("ALIGN", "RIGHT"), + ("BACKGROUND", COLOR_GREEN_BG), +) + +TABLE_STYLE_SUMMARY = TableStyle( + ( + ("GRID", (0, 0), (-1, -1), 2, colors.white), + ("BACKGROUND", (0, 0), (-1, -1), COLOR_GRAY_BG), + *[(cmd[0], (0, 0), (0, -1), *cmd[1:]) for cmd in TABLE_HEADER_CELL_CMD], + ), + parent=TABLE_STYLE_DEFAULT, +) - yield Paragraph(f"TestGen Issue Report: {tr_data['result_status']}", PARA_STYLE_TITLE) +TABLE_STYLE_DATA = TableStyle( + ( + ("ALIGN", (0, 0), (-1, 0), "CENTER"), + ("VALIGN", (0, 0), (-1, 0), "MIDDLE"), + ("GRID", (0, 0), (-1, -1), 0.5, COLOR_GRAY_BG), + ("INNERGRID", (0, 0), (-1, 0), 1, colors.white), + ("BACKGROUND", (0, 0), (-1, 0), COLOR_GRAY_BG), + ("FONT", (0, 0), (-10, 0), "Helvetica-Bold"), + + ), + parent=TABLE_STYLE_DEFAULT, +) - yield Paragraph("Summary", PARA_STYLE_H1) +def get_report_content(document, tr_data): + + yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE) + + status_color = COLOR_TEST_STATUS.get(tr_data["result_status"], COLOR_GRAY_BG) + summary_table_style = TableStyle( + ( + *[(cmd[0], (3, 3), (3, -1), *cmd[1:]) for cmd in TABLE_HEADER_CELL_CMD], + ("SPAN", (1, 0), (4, 0)), + ("SPAN", (5, 0), (5, 2)), + ("SPAN", (2, 1), (4, 1)), + ("SPAN", (2, 2), (4, 2)), + ("SPAN", (1, 3), (2, 3)), + ("SPAN", (4, 3), (5, 3)), + ("SPAN", (1, 4), (2, 4)), + ("SPAN", (4, 4), (5, 4)), + ("SPAN", (1, 5), (2, 5)), + ("SPAN", (4, 5), (5, 5)), + ("SPAN", (1, 6), (2, 6)), + ("SPAN", (4, 6), (5, 6)), + + # Measure cell + ("FONT", (1, 1), (1, 1), "Helvetica-Bold"), + + # Status cell + ("BACKGROUND", (5, 0), (5, 0), status_color), + ("FONT", (5, 0), (5, 0), "Helvetica", 14), + ("ALIGN", (5, 0), (5, 0), "CENTER"), + ("VALIGN", (5, 0), (5, 0), "MIDDLE"), + ("TEXTCOLOR", (5, 0), (5, 0), colors.white), + ), + parent=TABLE_STYLE_SUMMARY, + ) + test_timestamp = pandas.to_datetime(tr_data["test_time"]).strftime("%Y-%m-%d %H:%M:%S") summary_table_data = [ - ("Date", tr_data["test_date"]), - ("Database/Schema", tr_data["schema_name"]), - ("Table", tr_data["table_name"]), - ("Column", tr_data["column_names"]), - ("Table Group", tr_data["table_groups_name"]), - ("Test Suite", tr_data["test_suite"]), - ("Issue Type", "Test Result"), - ("Risk Level", tr_data["severity"]), - ("Data Quality Dimension", tr_data["dq_dimension"]), - ("Test", f"""{tr_data["test_name_short"]}: {tr_data["test_name_long"]}\n{tr_data["test_description"]}"""), - ("Result Measure", tr_data["result_measure"]), - ("Threshold Value", f"""{tr_data["threshold_value"]} {tr_data["threshold_description"]}"""), + ( + "Test", + ( + Paragraph(f"""{tr_data["test_name_short"]}: {tr_data["test_name_long"]}""", + style=PARA_STYLE_CELL), + Paragraph(tr_data["test_description"], style=PARA_STYLE_CELL), + ), + None, + None, + None, + tr_data["result_status"], + ), + ("Measured Value", tr_data["result_measure"], tr_data["measure_uom_description"]), + ("Threshold Value", tr_data["threshold_value"], tr_data["threshold_description"]), + + ("Date", test_timestamp, None, "Table Group", tr_data["table_groups_name"]), + ("Database/Schema", tr_data["schema_name"], None, "Test Suite", tr_data["test_suite"]), + ("Table", tr_data["table_name"], None, "Data Quality Dimension", tr_data["dq_dimension"]), + ("Column", tr_data["column_names"], None, "Risk Level", tr_data["severity"]), ] - if tr_data["measure_uom_description"]: - summary_table_data.append(("Units", tr_data["measure_uom_description"])) - - yield Table(summary_table_data, style=TABLE_STYLE_SUMMARY, hAlign="LEFT") - yield Paragraph("Usage Notes", PARA_STYLE_H1) - yield Paragraph(tr_data["usage_notes"], PARA_STYLE_DEFAULT) + summary_table_col_widths = [n * document.width for n in (.2, .1, .2, .2, .15, .15)] + yield Table(summary_table_data, style=summary_table_style, hAlign="LEFT", colWidths=summary_table_col_widths) - yield Paragraph("Result History", PARA_STYLE_H1) + yield KeepTogether([ + Paragraph("Usage Notes", PARA_STYLE_H1), + Paragraph(f"{tr_data['usage_notes']}", PARA_STYLE_TEXT), + ]) history_data = get_test_result_history(get_schema(), tr_data) - history_table_data = [ - (r["test_date"], r["threshold_value"], r["result_measure"], r["result_status"]) - for _, r in history_data.iterrows() - ] + history_table_style = TableStyle( + ( + ("FONT", (1, 1), (2, -1), "Courier"), + ("ALIGN", (0, 1), (0, -1), "CENTER"), + ("ALIGN", (1, 1), (2, -1), "RIGHT"), + ("ALIGN", (3, 1), (3, -1), "CENTER"), + ), + parent=TABLE_STYLE_DATA, + ) + + history_iterator = iter(history_data.iterrows()) + historical_status = history_data["result_status"][0] + status_change_idx = 1 + while historical_status: + try: + idx, row = next(history_iterator) + except StopIteration: + row = {"result_status": None} + idx += 1 + + if row["result_status"] != historical_status: + history_table_style.add( + "TEXTCOLOR", + (3, status_change_idx), + (3, idx), + COLOR_TEST_STATUS.get(historical_status, COLOR_GRAY_BG) + ) + historical_status = row["result_status"] + status_change_idx = idx + 1 + + if idx > 1 and "test_date" in row and str(row["test_date"]) == test_timestamp: + history_table_style.add( + "BACKGROUND", (0, idx + 1), (-1, idx + 1), COLOR_GRAY_BG + ) - yield Table(history_table_data) + history_table_data = ( + ("Test Date", "Threshold Value", "Measure Value", "Status"), + *[ + (r["test_date"], r["threshold_value"], r["result_measure"], r["result_status"]) + for _, r in history_data.iterrows() + ], + ) + + history_table = Table(history_table_data, style=history_table_style, repeatRows=1, hAlign="LEFT") + + yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT) + yield Paragraph("Result History", PARA_STYLE_H1) + yield BalancedColumns(history_table) if len(history_table_data) > 10 else history_table + yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT) yield Paragraph("Sample Data", PARA_STYLE_H1) + col_padding = 16 if tr_data["test_type"] == "CUSTOM": bad_data_status, bad_data_msg, lookup_query, sample_data = do_source_data_lookup_custom(get_schema(), tr_data) else: bad_data_status, bad_data_msg, lookup_query, sample_data = do_source_data_lookup(get_schema(), tr_data) - if bad_data_status in {"ND", "NA"}: + + if bad_data_status in ("ND", "NA"): yield Paragraph(bad_data_msg, style=PARA_STYLE_INFO) - elif bad_data_status == "ERR": - yield Paragraph(bad_data_msg, style=PARA_STYLE_ERROR) - elif sample_data is None: - yield Paragraph("An unknown error was encountered.", style=PARA_STYLE_ERROR) + elif bad_data_status == "ERR" or sample_data is None: + yield Paragraph("It was not possible to fetch the sample data this time.", style=PARA_STYLE_INFO) else: - if bad_data_msg: - yield Paragraph(bad_data_msg, style=PARA_STYLE_DEFAULT) + table_data = sample_data.fillna(Paragraph("NULL", style=PARA_STYLE_CELL_NULL)) + col_len_data = pandas.DataFrame(columns=["min_chars", "max_chars", "min_width", "max_width"], index=iter(sample_data)) + + for col_idx in sample_data: + col = sample_data[col_idx] + para_style = PARA_STYLE_CELL_NUMERIC if is_numeric_dtype(col) else PARA_STYLE_CELL + if not is_string_dtype(sample_data[col_idx]): + col = sample_data[col_idx].astype(str) + + max_width = col.map(lambda cell: stringWidth(cell, para_style.fontName, para_style.fontSize)).max() + min_chars = col.map( + lambda cell: max([len(word) for word in cell.split(" ")]) + ).max() + max_chars = col.str.len().max() + col_padding = 16 + col_len_data.loc[col_idx] = ( + min_chars, + max_chars, + min_chars * max_width / max_chars + col_padding, + max_width + col_padding, + ) + table_data[col_idx] = col.map( + lambda cell: Paragraph(cell, style=para_style) if cell else Paragraph("NULL", PARA_STYLE_CELL_NUMERIC) + ) + + available_width = document.width - sample_data.fillna("[NULL]", inplace=True) + while True: + if col_len_data["min_width"].sum() <= available_width: + break + largest_col = col_len_data["min_width"].idxmax() + table_data = table_data.drop(columns=largest_col) + col_len_data = col_len_data.drop(index=largest_col) + bad_data_msg = "Some too wide columns are omitted. Visit the website to check the full content." - yield Table( + expandable_width = available_width - col_len_data["min_width"].sum() + col_len_data["expand_appetite"] = col_len_data["max_width"] - col_len_data["min_width"] + col_len_data["width"] = col_len_data["min_width"] + col_len_data["expand_appetite"] * max(1, col_len_data["expand_appetite"].sum() / expandable_width) + + sample_data_table = Table( ( - [col.replace("_", " ").title() for col in sample_data.columns], - *(data for _, data in sample_data.iterrows()), - ) + [col.replace("_", " ").title() for col in table_data.columns], + *(data.tolist() for _, data in table_data.iterrows()), + ), + style=TABLE_STYLE_DATA, + hAlign="LEFT", + colWidths=col_len_data["width"].tolist(), + repeatRows=1, ) + layout_columns = int(available_width / (col_len_data["width"].sum() + col_padding)) + if layout_columns > 1 and len(table_data) > 10: + yield BalancedColumns(sample_data_table, layout_columns) + else: + yield sample_data_table + if bad_data_msg: + yield Paragraph(bad_data_msg, style=PARA_STYLE_FOOTNOTE) - yield Paragraph("SQL Query", PARA_STYLE_H1) + if lookup_query: + lookup_query_para = Paragraph(lookup_query, PARA_STYLE_MONO) + else: + lookup_query_para = Paragraph("No sample data lookup query registered for this test.") - yield Paragraph(lookup_query, PARA_STYLE_MONO) + yield KeepTogether([ + Paragraph("SQL Query", PARA_STYLE_H1), + lookup_query_para + ]) -def create_report(filename, test_result_id): - doc = SimpleDocTemplate(filename) - doc.build(flowables=list(get_report_content(test_result_id))) +def create_report(filename, tr_data): + doc = SimpleDocTemplate(filename, leftMargin=MARGIN, rightMargin=MARGIN, topMargin=MARGIN, bottomMargin=MARGIN) + doc.build(flowables=list(get_report_content(doc, tr_data))) diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 6b86ea6..1acee8f 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -215,7 +215,6 @@ def get_test_results_uncached(str_schema, str_run_id, str_sel_test_status, test_ r.schema_name, r.column_names, r.test_time::DATE as test_date, r.test_type, tt.id as test_type_id, tt.test_name_short, tt.test_name_long, r.test_description, tt.measure_uom, tt.measure_uom_description, c.test_operator, r.threshold_value::NUMERIC(16, 5), r.result_measure::NUMERIC(16, 5), r.result_status, - tt.threshold_description, tt.usage_notes, -- These are used in the PDF report CASE WHEN r.result_code <> 1 THEN r.disposition ELSE 'Passed' @@ -243,7 +242,10 @@ def get_test_results_uncached(str_schema, str_run_id, str_sel_test_status, test_ WHEN r.auto_gen = TRUE THEN d.id ELSE r.test_definition_id END::VARCHAR as test_definition_id_current, - r.auto_gen + r.auto_gen, + + tt.threshold_description, tt.usage_notes, r.test_time -- These are used in the PDF report + FROM run_results r INNER JOIN {str_schema}.test_types tt ON (r.test_type = tt.test_type) From f8e26f33ecd933a0eb4699ba9828af121f0a603c Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 3 Oct 2024 10:50:38 -0400 Subject: [PATCH 11/91] refactor(ui): improvements to vanjs components to support page components --- testgen/ui/components/frontend/css/shared.css | 377 +++++++++++++++++- .../frontend/js/components/breadcrumbs.js | 14 +- .../frontend/js/components/button.js | 17 +- .../frontend/js/components/expander_toggle.js | 8 +- .../components/frontend/js/components/link.js | 25 +- .../frontend/js/components/paginator.js | 32 +- .../frontend/js/components/select.js | 7 +- .../js/components/sorting_selector.js | 11 +- .../frontend/js/components/summary_bar.js | 15 +- testgen/ui/components/frontend/js/utils.js | 21 +- testgen/ui/components/widgets/breadcrumbs.py | 2 +- 11 files changed, 456 insertions(+), 73 deletions(-) diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index fcf0fa1..3284332 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -24,7 +24,8 @@ body { --primary-text-color: #000000de; --secondary-text-color: #0000008a; --disabled-text-color: #00000042; - --caption-text-color: rgba(49, 51, 63, 0.6); /* Match Streamlit's caption color */ + --caption-text-color: rgba(49, 51, 63, 0.6); + /* Match Streamlit's caption color */ --sidebar-background-color: white; --sidebar-item-hover-color: #f5f5f5; @@ -47,6 +48,8 @@ body { --button-stroked-text-color: var(--primary-color); --button-stroked-background: transparent; --button-stroked-border: 1px solid rgba(0, 0, 0, .12); + + --dk-card-background: #fff; } @media (prefers-color-scheme: dark) { @@ -54,7 +57,8 @@ body { --primary-text-color: rgba(255, 255, 255); --secondary-text-color: rgba(255, 255, 255, .7); --disabled-text-color: rgba(255, 255, 255, .5); - --caption-text-color: rgba(250, 250, 250, .6); /* Match Streamlit's caption color */ + --caption-text-color: rgba(250, 250, 250, .6); + /* Match Streamlit's caption color */ --sidebar-background-color: #14181f; --sidebar-item-hover-color: #10141b; @@ -67,5 +71,374 @@ body { --button-flat-background: rgba(255, 255, 255, .54); --button-stroked-border: 1px solid rgba(255, 255, 255, .12); + + --dk-card-background: #14181f; } } + +/* Table styles */ +.table { + background-color: var(--dk-card-background); + border: var(--button-stroked-border); + border-radius: 8px; + padding: 16px; +} + +.table-row { + padding: 12px 0; +} + +.table-row:not(:last-child) { + border-bottom: var(--button-stroked-border); +} + +.table-row:last-child { + padding-bottom: 0; +} + +.table-header { + border-bottom: var(--button-stroked-border); + padding: 0 0 8px 0; + font-size: 12px; + color: var(--caption-text-color); + text-transform: uppercase; +} +/* */ + +/* Text utilities */ +.text-primary { + color: var(--primary-text-color); +} + +.text-secondary { + color: var(--secondary-text-color); +} + +.text-caption { + font-size: 12px; + color: var(--caption-text-color); +} +/* */ + +/* Flex utilities */ +.flex-row { + display: flex; + flex-direction: row; + flex-grow: 1; + width: 100%; + align-items: center; +} + +.flex-column { + display: flex; + flex-direction: column; + flex-grow: 1; +} + +.fx-flex { + flex: 1 1 0%; +} + +.fx-flex-wrap { + flex-wrap: wrap; +} + +.fx-align-flex-center { + align-items: center; +} + +.fx-align-flex-start { + align-items: flex-start; +} + +.fx-align-flex-end { + align-items: flex-end; +} + +.fx-align-baseline { + align-items: baseline; +} + +.fx-justify-flex-end { + justify-items: flex-end; +} + +.fx-justify-content-flex-end { + justify-content: flex-end; +} + +.fx-justify-flex-start { + justify-content: flex-start; +} + +.fx-justify-center { + justify-content: center; +} + +.fx-justify-space-between { + justify-content: space-between; +} + +.fx-flex-align-content { + align-content: flex-start; +} + +/* */ + +/* Whitespace utilities */ +.mt-0 { + margin-top: 2px; +} + +.mt-1 { + margin-top: 4px; +} + +.mt-2 { + margin-top: 8px; +} + +.mt-3 { + margin-top: 12px; +} + +.mt-4 { + margin-top: 16px; +} + +.mt-5 { + margin-top: 24px; +} + +.mt-6 { + margin-top: 32px; +} + +.mt-7 { + margin-top: 40px; +} + +.mr-0 { + margin-right: 2px; +} + +.mr-1 { + margin-right: 4px; +} + +.mr-2 { + margin-right: 8px; +} + +.mr-3 { + margin-right: 12px; +} + +.mr-4 { + margin-right: 16px; +} + +.mr-5 { + margin-right: 24px; +} + +.mr-6 { + margin-right: 32px; +} + +.mr-7 { + margin-right: 40px; +} + +.mb-0 { + margin-bottom: 2px; +} + +.mb-1 { + margin-bottom: 4px; +} + +.mb-2 { + margin-bottom: 8px; +} + +.mb-3 { + margin-bottom: 12px; +} + +.mb-4 { + margin-bottom: 16px; +} + +.mb-5 { + margin-bottom: 24px; +} + +.mb-6 { + margin-bottom: 32px; +} + +.mb-7 { + margin-bottom: 40px; +} + +.ml-0 { + margin-left: 2px; +} + +.ml-1 { + margin-left: 4px; +} + +.ml-2 { + margin-left: 8px; +} + +.ml-3 { + margin-left: 12px; +} + +.ml-4 { + margin-left: 16px; +} + +.ml-5 { + margin-left: 24px; +} + +.ml-6 { + margin-left: 32px; +} + +.ml-7 { + margin-left: 40px; +} + +.pt-0 { + padding-top: 2px; +} + +.pt-1 { + padding-top: 4px; +} + +.pt-2 { + padding-top: 8px; +} + +.pt-3 { + padding-top: 12px; +} + +.pt-4 { + padding-top: 16px; +} + +.pt-5 { + padding-top: 24px; +} + +.pt-6 { + padding-top: 32px; +} + +.pt-7 { + padding-top: 40px; +} + +.pr-0 { + padding-right: 2px; +} + +.pr-1 { + padding-right: 4px; +} + +.pr-2 { + padding-right: 8px; +} + +.pr-3 { + padding-right: 12px; +} + +.pr-4 { + padding-right: 16px; +} + +.pr-5 { + padding-right: 24px; +} + +.pr-6 { + padding-right: 32px; +} + +.pr-7 { + padding-right: 40px; +} + +.pb-0 { + padding-bottom: 2px; +} + +.pb-1 { + padding-bottom: 4px; +} + +.pb-2 { + padding-bottom: 8px; +} + +.pb-3 { + padding-bottom: 12px; +} + +.pb-4 { + padding-bottom: 16px; +} + +.pb-5 { + padding-bottom: 24px; +} + +.pb-6 { + padding-bottom: 32px; +} + +.pb-7 { + padding-bottom: 40px; +} + +.pl-0 { + padding-left: 2px; +} + +.pl-1 { + padding-left: 4px; +} + +.pl-2 { + padding-left: 8px; +} + +.pl-3 { + padding-left: 12px; +} + +.pl-4 { + padding-left: 16px; +} + +.pl-5 { + padding-left: 24px; +} + +.pl-6 { + padding-left: 32px; +} + +.pl-7 { + padding-left: 40px; +} +/* */ \ No newline at end of file diff --git a/testgen/ui/components/frontend/js/components/breadcrumbs.js b/testgen/ui/components/frontend/js/components/breadcrumbs.js index d6976c8..bba5320 100644 --- a/testgen/ui/components/frontend/js/components/breadcrumbs.js +++ b/testgen/ui/components/frontend/js/components/breadcrumbs.js @@ -11,15 +11,15 @@ */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; +import { loadStylesheet } from '../utils.js'; const { a, div, span } = van.tags; const Breadcrumbs = (/** @type Properties */ props) => { - Streamlit.setFrameHeight(24); + loadStylesheet('breadcrumbs', stylesheet); - if (!window.testgen.loadedStylesheets.breadcrumbs) { - document.adoptedStyleSheets.push(stylesheet); - window.testgen.loadedStylesheets.breadcrumbs = true; + if (!window.testgen.isPage) { + Streamlit.setFrameHeight(24); } return div( @@ -31,7 +31,7 @@ const Breadcrumbs = (/** @type Properties */ props) => { { class: 'tg-breadcrumbs' }, breadcrumbs.reduce((items, b, idx) => { const isLastItem = idx === breadcrumbs.length - 1; - items.push(a({ class: `tg-breadcrumbs--${ isLastItem ? 'current' : 'active'}`, href: `#/${b.path}`, onclick: () => navigate(b.path, b.params) }, b.label)) + items.push(a({ class: `tg-breadcrumbs--${ isLastItem ? 'current' : 'active'}`, href: `#/${b.path}`, onclick: () => emiEvent(b.path, b.params) }, b.label)) if (!isLastItem) { items.push(span({class: 'tg-breadcrumbs--arrow'}, '>')); } @@ -42,8 +42,8 @@ const Breadcrumbs = (/** @type Properties */ props) => { ) }; -function navigate(/** @type string */ path, /** @type object */ params) { - Streamlit.sendData({ path, params }); +function emiEvent(/** @type string */ href, /** @type object */ params) { + Streamlit.sendData({ event: 'LinkClicked', href, params }); return false; } diff --git a/testgen/ui/components/frontend/js/components/button.js b/testgen/ui/components/frontend/js/components/button.js index a19d960..90bbfea 100644 --- a/testgen/ui/components/frontend/js/components/button.js +++ b/testgen/ui/components/frontend/js/components/button.js @@ -10,7 +10,7 @@ * @property {(bool)} disabled * @property {string?} style */ -import { enforceElementWidth } from '../utils.js'; +import { enforceElementWidth, loadStylesheet } from '../utils.js'; import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; @@ -23,11 +23,15 @@ const BUTTON_TYPE = { }; const Button = (/** @type Properties */ props) => { - Streamlit.setFrameHeight(40); + loadStylesheet('button', stylesheet); const isIconOnly = props.type === BUTTON_TYPE.ICON || (props.icon?.val && !props.label?.val); - if (isIconOnly) { // Force a 40px width for the parent iframe & handle window resizing - enforceElementWidth(window.frameElement, 40); + + if (!window.testgen.isPage) { + Streamlit.setFrameHeight(40); + if (isIconOnly) { // Force a 40px width for the parent iframe & handle window resizing + enforceElementWidth(window.frameElement, 40); + } } if (props.tooltip) { @@ -35,11 +39,6 @@ const Button = (/** @type Properties */ props) => { window.frameElement.parentElement.setAttribute('data-tooltip-position', props.tooltipPosition.val); } - if (!window.testgen.loadedStylesheets.button) { - document.adoptedStyleSheets.push(stylesheet); - window.testgen.loadedStylesheets.button = true; - } - const onClickHandler = props.onclick || post; return button( { diff --git a/testgen/ui/components/frontend/js/components/expander_toggle.js b/testgen/ui/components/frontend/js/components/expander_toggle.js index 0a5220d..fe68891 100644 --- a/testgen/ui/components/frontend/js/components/expander_toggle.js +++ b/testgen/ui/components/frontend/js/components/expander_toggle.js @@ -7,15 +7,15 @@ */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; +import { loadStylesheet } from '../utils.js'; const { div, span, i } = van.tags; const ExpanderToggle = (/** @type Properties */ props) => { - Streamlit.setFrameHeight(24); + loadStylesheet('expanderToggle', stylesheet); - if (!window.testgen.loadedStylesheets.expanderToggle) { - document.adoptedStyleSheets.push(stylesheet); - window.testgen.loadedStylesheets.expanderToggle = true; + if (!window.testgen.isPage) { + Streamlit.setFrameHeight(24); } const expandedState = van.state(!!props.default.val); diff --git a/testgen/ui/components/frontend/js/components/link.js b/testgen/ui/components/frontend/js/components/link.js index 86e35cb..6600ed6 100644 --- a/testgen/ui/components/frontend/js/components/link.js +++ b/testgen/ui/components/frontend/js/components/link.js @@ -13,28 +13,27 @@ * @property {number?} width * @property {string?} style */ -import { enforceElementWidth } from '../utils.js'; +import { enforceElementWidth, loadStylesheet } from '../utils.js'; import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; const { a, div, i, span } = van.tags; const Link = (/** @type Properties */ props) => { - Streamlit.setFrameHeight(props.height?.val || 24); - if (props.width?.val) { - enforceElementWidth(window.frameElement, props.width.val); - } + loadStylesheet('link', stylesheet); - if (!window.testgen.loadedStylesheets.link) { - document.adoptedStyleSheets.push(stylesheet); - window.testgen.loadedStylesheets.link = true; + if (!window.testgen.isPage) { + Streamlit.setFrameHeight(props.height?.val || 24); + if (props.width?.val) { + enforceElementWidth(window.frameElement, props.width.val); + } } return a( { - class: `tg-link ${props.underline.val ? 'tg-link--underline' : ''}`, + class: `tg-link ${props.underline?.val ? 'tg-link--underline' : ''}`, style: props.style, - onclick: () => navigate(props.href.val, props.params.val), + onclick: () => emitEvent(props.href.val, props.params.val), }, div( {class: 'tg-link--wrapper'}, @@ -51,13 +50,13 @@ const LinkIcon = ( /** @type string */position, ) => { return i( - {class: `material-symbols-rounded tg-link--icon tg-link--icon-${position}`, style: `font-size: ${size.val}px;`}, + {class: `material-symbols-rounded tg-link--icon tg-link--icon-${position}`, style: `font-size: ${size?.val || 20}px;`}, icon, ); }; -function navigate(href, params) { - Streamlit.sendData({ href, params }); +function emitEvent(href, params) { + Streamlit.sendData({ event: 'LinkClicked', href, params }); } const stylesheet = new CSSStyleSheet(); diff --git a/testgen/ui/components/frontend/js/components/paginator.js b/testgen/ui/components/frontend/js/components/paginator.js index 7c839a2..0b50677 100644 --- a/testgen/ui/components/frontend/js/components/paginator.js +++ b/testgen/ui/components/frontend/js/components/paginator.js @@ -7,20 +7,18 @@ */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; +import { loadStylesheet } from '../utils.js'; const { div, span, i, button } = van.tags; const Paginator = (/** @type Properties */ props) => { - const count = props.count.val; - const pageSize = props.pageSize.val; + loadStylesheet('paginator', stylesheet); - Streamlit.setFrameHeight(32); - - if (!window.testgen.loadedStylesheets.expanderToggle) { - document.adoptedStyleSheets.push(stylesheet); - window.testgen.loadedStylesheets.expanderToggle = true; + if (!window.testgen.isPage) { + Streamlit.setFrameHeight(32); } + const { count, pageSize } = props; const pageIndexState = van.state(props.pageIndex.val || 0); return div( @@ -29,7 +27,7 @@ const Paginator = (/** @type Properties */ props) => { { class: 'tg-paginator--label' }, () => { const pageIndex = pageIndexState.val; - return `${pageSize * pageIndex + 1} - ${Math.min(count, pageSize * (pageIndex + 1))} of ${count}` + return `${pageSize.val * pageIndex + 1} - ${Math.min(count.val, pageSize.val * (pageIndex + 1))} of ${count.val}`; }, ), button( @@ -37,7 +35,7 @@ const Paginator = (/** @type Properties */ props) => { class: 'tg-paginator--button', onclick: () => { pageIndexState.val = 0; - Streamlit.sendData(pageIndexState.val); + emitEvent(pageIndexState.val); }, disabled: () => pageIndexState.val === 0, }, @@ -48,7 +46,7 @@ const Paginator = (/** @type Properties */ props) => { class: 'tg-paginator--button', onclick: () => { pageIndexState.val--; - Streamlit.sendData(pageIndexState.val); + emitEvent(pageIndexState.val); }, disabled: () => pageIndexState.val === 0, }, @@ -59,9 +57,9 @@ const Paginator = (/** @type Properties */ props) => { class: 'tg-paginator--button', onclick: () => { pageIndexState.val++; - Streamlit.sendData(pageIndexState.val); + emitEvent(pageIndexState.val); }, - disabled: () => pageIndexState.val === Math.ceil(count / pageSize) - 1, + disabled: () => pageIndexState.val === Math.ceil(count.val / pageSize.val) - 1, }, i({class: 'material-symbols-rounded'}, 'chevron_right') ), @@ -69,16 +67,20 @@ const Paginator = (/** @type Properties */ props) => { { class: 'tg-paginator--button', onclick: () => { - pageIndexState.val = Math.ceil(count / pageSize) - 1; - Streamlit.sendData(pageIndexState.val); + pageIndexState.val = Math.ceil(count.val / pageSize.val) - 1; + emitEvent(pageIndexState.val); }, - disabled: () => pageIndexState.val === Math.ceil(count / pageSize) - 1, + disabled: () => pageIndexState.val === Math.ceil(count.val / pageSize.val) - 1, }, i({class: 'material-symbols-rounded'}, 'last_page') ), ); }; +function emitEvent(/** @type number */pageIndex) { + Streamlit.sendData({ event: 'PageChanged', pageIndex }) +} + const stylesheet = new CSSStyleSheet(); stylesheet.replace(` .tg-paginator { diff --git a/testgen/ui/components/frontend/js/components/select.js b/testgen/ui/components/frontend/js/components/select.js index cc8e493..6cd7c48 100644 --- a/testgen/ui/components/frontend/js/components/select.js +++ b/testgen/ui/components/frontend/js/components/select.js @@ -13,17 +13,14 @@ */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; +import { loadStylesheet } from '../utils.js'; const { div, label, option, select } = van.tags; const Select = (/** @type {Properties} */ props) => { + loadStylesheet('select', stylesheet); Streamlit.setFrameHeight(); - if (!window.testgen.loadedStylesheets.select) { - document.adoptedStyleSheets.push(stylesheet); - window.testgen.loadedStylesheets.select = true; - } - const domId = Math.random().toString(36).substring(2); const changeHandler = props.onChange || post; return div( diff --git a/testgen/ui/components/frontend/js/components/sorting_selector.js b/testgen/ui/components/frontend/js/components/sorting_selector.js index 926a173..60b9afa 100644 --- a/testgen/ui/components/frontend/js/components/sorting_selector.js +++ b/testgen/ui/components/frontend/js/components/sorting_selector.js @@ -1,5 +1,6 @@ import {Streamlit} from "../streamlit.js"; import van from '../van.min.js'; +import { loadStylesheet } from '../utils.js'; /** * @typedef ColDef @@ -16,20 +17,18 @@ import van from '../van.min.js'; const { button, div, i, span } = van.tags; const SortingSelector = (/** @type {Properties} */ props) => { + loadStylesheet('sortingSelector', stylesheet); let defaultDirection = "ASC"; - if (!window.testgen.loadedStylesheets.sortingSelector) { - document.adoptedStyleSheets.push(stylesheet); - window.testgen.loadedStylesheets.sortSelector = true; - } - const columns = props.columns.val; const prevComponentState = props.state.val || []; const columnLabel = columns.reduce((acc, [colLabel, colId]) => ({ ...acc, [colId]: colLabel}), {}); - Streamlit.setFrameHeight(100 + 30 * columns.length); + if (!window.testgen.isPage) { + Streamlit.setFrameHeight(100 + 30 * columns.length); + } const componentState = columns.reduce( (state, [colLabel, colId]) => ( diff --git a/testgen/ui/components/frontend/js/components/summary_bar.js b/testgen/ui/components/frontend/js/components/summary_bar.js index ec67e01..152b589 100644 --- a/testgen/ui/components/frontend/js/components/summary_bar.js +++ b/testgen/ui/components/frontend/js/components/summary_bar.js @@ -13,7 +13,7 @@ * @property {number} width */ import van from '../van.min.js'; -import { Streamlit } from '../streamlit.js'; +import { loadStylesheet } from '../utils.js'; const { div, span } = van.tags; const colorMap = { @@ -28,19 +28,14 @@ const colorMap = { } const SummaryBar = (/** @type Properties */ props) => { + loadStylesheet('summaryBar', stylesheet); + const height = props.height.val || 24; const width = props.width.val; const summaryItems = props.items.val; - const label = props.label.val; + const label = props.label?.val; const total = summaryItems.reduce((sum, item) => sum + item.value, 0); - Streamlit.setFrameHeight(height + 24 + (label ? 24 : 0)); - - if (!window.testgen.loadedStylesheets.summaryBar) { - document.adoptedStyleSheets.push(stylesheet); - window.testgen.loadedStylesheets.summaryBar = true; - } - return div( { class: 'tg-summary-bar-wrapper' }, () => { @@ -62,7 +57,7 @@ const SummaryBar = (/** @type Properties */ props) => { () => { return total ? div( { class: `tg-summary-bar--caption` }, - summaryItems.map(item => `${item.label}: ${item.value}`).join(', '), + summaryItems.map(item => `${item.label}: ${item.value || 0}`).join(', '), ) : null; }, ); diff --git a/testgen/ui/components/frontend/js/utils.js b/testgen/ui/components/frontend/js/utils.js index 7757caa..0f0103b 100644 --- a/testgen/ui/components/frontend/js/utils.js +++ b/testgen/ui/components/frontend/js/utils.js @@ -1,3 +1,5 @@ +import van from './van.min.js'; + function enforceElementWidth( /** @type Element */element, /** @type number */width, @@ -9,4 +11,21 @@ function enforceElementWidth( observer.observe(element); } -export { enforceElementWidth }; +function loadStylesheet( + /** @type string */key, + /** @type CSSStyleSheet */stylesheet, +) { + if (!window.testgen.loadedStylesheets[key]) { + document.adoptedStyleSheets.push(stylesheet); + window.testgen.loadedStylesheets[key] = true; + } +} + +function wrapProps(/** @type object */props) { + for (const [key, value] of Object.entries(props)) { + props[key] = van.state(value); + } + return props; +} + +export { enforceElementWidth, loadStylesheet, wrapProps }; diff --git a/testgen/ui/components/widgets/breadcrumbs.py b/testgen/ui/components/widgets/breadcrumbs.py index bb258d1..ecfc88a 100644 --- a/testgen/ui/components/widgets/breadcrumbs.py +++ b/testgen/ui/components/widgets/breadcrumbs.py @@ -23,7 +23,7 @@ def breadcrumbs( props={"breadcrumbs": breadcrumbs}, ) if data: - Router().navigate(to=data["path"], with_args=data["params"]) + Router().navigate(to=data["href"], with_args=data["params"]) class Breadcrumb(typing.TypedDict): path: str | None From dc968b5827d3dd738e97c06bdb84368314b4ea33 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 3 Oct 2024 10:51:38 -0400 Subject: [PATCH 12/91] refactor(ui): use page components in test runs and profiling runs --- .../components/frontend/js/display_utils.js | 29 ++++ testgen/ui/components/frontend/js/main.js | 4 + .../frontend/js/pages/profiling_runs.js | 160 ++++++++++++++++++ .../components/frontend/js/pages/test_runs.js | 136 +++++++++++++++ .../ui/components/frontend/js/van-tooltip.js | 52 ++++++ testgen/ui/views/profiling_summary.py | 127 +++----------- testgen/ui/views/test_runs.py | 105 ++---------- 7 files changed, 417 insertions(+), 196 deletions(-) create mode 100644 testgen/ui/components/frontend/js/display_utils.js create mode 100644 testgen/ui/components/frontend/js/pages/profiling_runs.js create mode 100644 testgen/ui/components/frontend/js/pages/test_runs.js create mode 100644 testgen/ui/components/frontend/js/van-tooltip.js diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js new file mode 100644 index 0000000..512cc0f --- /dev/null +++ b/testgen/ui/components/frontend/js/display_utils.js @@ -0,0 +1,29 @@ +function formatTimestamp(/** @type number */ timestamp) { + if (!timestamp) { + return '--'; + } + + const date = new Date(timestamp); + const months = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]; + const hours = date.getHours(); + const minutes = date.getMinutes(); + return `${months[date.getMonth()]} ${date.getDate()}, ${hours % 12}:${String(minutes).padStart(2, '0')} ${hours / 12 > 1 ? 'PM' : 'AM'}`; +} + +function formatDuration(/** @type string */ duration) { + if (!duration) { + return '--'; + } + + const { hour, minute, second } = duration.split(':'); + let formatted = [ + { value: Number(hour), unit: 'h' }, + { value: Number(minute), unit: 'm' }, + { value: Number(second), unit: 's' }, + ].map(({ value, unit }) => value ? `${value}${unit}` : '') + .join(' '); + + return formatted.trim() || '< 1s'; +} + +export { formatTimestamp, formatDuration }; diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js index ece2e49..9c42de7 100644 --- a/testgen/ui/components/frontend/js/main.js +++ b/testgen/ui/components/frontend/js/main.js @@ -14,6 +14,8 @@ import { Link } from './components/link.js'; import { Paginator } from './components/paginator.js'; import { Select } from './components/select.js' import { SortingSelector } from './components/sorting_selector.js'; +import { TestRuns } from './pages/test_runs.js'; +import { ProfilingRuns } from './pages/profiling_runs.js'; let currentWindowVan = van; let topWindowVan = window.top.van; @@ -28,6 +30,8 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props) select: Select, sorting_selector: SortingSelector, sidebar: window.top.testgen.components.Sidebar, + test_runs: TestRuns, + profiling_runs: ProfilingRuns, }; if (Object.keys(componentById).includes(id)) { diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js new file mode 100644 index 0000000..639f581 --- /dev/null +++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js @@ -0,0 +1,160 @@ +/** + * @typedef Properties + * @type {object} + * @property {array} items + */ +import van from '../van.min.js'; +import { Tooltip } from '../van-tooltip.js'; +import { SummaryBar } from '../components/summary_bar.js'; +import { Link } from '../components/link.js'; +import { Button } from '../components/button.js'; +import { Streamlit } from '../streamlit.js'; +import { wrapProps } from '../utils.js'; +import { formatTimestamp, formatDuration } from '../display_utils.js'; + +const { div, span, i } = van.tags; + +const ProfilingRuns = (/** @type Properties */ props) => { + window.testgen.isPage = true; + + const profilingRunItems = van.derive(() => { + let items = []; + try { + items = JSON.parse(props.items?.val); + } catch { } + Streamlit.setFrameHeight(44 + 84.5 * items.length); + return items; + }); + const columns = ['20%', '20%', '20%', '40%']; + + return div( + () => div( + { class: 'table' }, + div( + { class: 'table-header flex-row' }, + span( + { style: `flex: ${columns[0]}` }, + 'Start Time | Table Group', + ), + span( + { style: `flex: ${columns[1]}` }, + 'Status | Duration', + ), + span( + { style: `flex: ${columns[2]}` }, + 'Schema', + ), + span( + { style: `flex: ${columns[3]}` }, + 'Hygiene Issues', + ), + ), + profilingRunItems.val.map(item => ProfilingRunItem(item, columns)), + ), + ); +} + +const ProfilingRunItem = (item, /** @type string[] */ columns) => { + return div( + { class: 'table-row flex-row' }, + div( + { style: `flex: ${columns[0]}` }, + div(formatTimestamp(item.start_time)), + div( + { class: 'text-caption mt-1' }, + item.table_groups_name, + ), + ), + div( + { class: 'flex-row', style: `flex: ${columns[1]}` }, + div( + ProfilingRunStatus(item), + div( + { class: 'text-caption mt-1' }, + formatDuration(item.duration), + ), + ), + item.status === 'Running' && item.process_id ? Button(wrapProps({ + type: 'stroked', + label: 'Cancel Run', + style: 'width: auto; height: 32px; color: var(--purple); margin-left: 16px;', + onclick: () => Streamlit.sendData({ + event: 'RunCanceled', + profiling_run: item, + _id: Math.random(), // Forces on_change component handler to be triggered on every click + }), + })) : null, + ), + div( + { style: `flex: ${columns[2]}` }, + div(item.schema_name), + div( + { + class: 'text-caption mt-1 mb-1', + style: item.status === 'Complete' && !item.column_ct ? 'color: var(--red);' : '', + }, + `${item.table_ct || 0} tables, ${item.column_ct || 0} columns`, + ), + item.column_ct ? Link(wrapProps({ + label: 'View results', + href: 'profiling-runs:results', + params: { 'run_id': item.profiling_run_id }, + underline: true, + right_icon: 'chevron_right', + })) : null, + ), + div( + { style: `flex: ${columns[3]}` }, + item.anomaly_ct ? SummaryBar(wrapProps({ + items: [ + { label: 'Definite', value: item.anomalies_definite_ct, color: 'red' }, + { label: 'Likely', value: item.anomalies_likely_ct, color: 'orange' }, + { label: 'Possible', value: item.anomalies_possible_ct, color: 'yellow' }, + { label: 'Dismissed', value: item.anomalies_dismissed_ct, color: 'grey' }, + ], + height: 10, + width: 300, + })) : '--', + item.anomaly_ct ? Link(wrapProps({ + label: `View ${item.anomaly_ct} issues`, + href: 'profiling-runs:hygiene', + params: { 'run_id': item.profiling_run_id }, + underline: true, + right_icon: 'chevron_right', + style: 'margin-top: 8px;', + })) : null, + ), + ); +} + +function ProfilingRunStatus(/** @type object */ item) { + const attributeMap = { + Running: { label: 'Running', color: 'blue' }, + Complete: { label: 'Completed', color: '' }, + Error: { label: 'Error', color: 'red' }, + Cancelled: { label: 'Canceled', color: 'purple' }, + }; + const attributes = attributeMap[item.status] || { label: 'Unknown', color: 'grey' }; + return span( + { + class: 'flex-row', + style: `color: var(--${attributes.color});`, + }, + attributes.label, + () => { + const tooltipError = van.state(false); + return item.status === 'Error' && item.log_message ? i( + { + class: 'material-symbols-rounded text-secondary ml-1 profiling-runs--info', + style: 'position: relative; font-size: 16px;', + onmouseenter: () => tooltipError.val = true, + onmouseleave: () => tooltipError.val = false, + }, + 'info', + Tooltip({ text: item.log_message, show: tooltipError }), + ) : null; + }, + ); +} + +export { ProfilingRuns }; diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js new file mode 100644 index 0000000..3e91354 --- /dev/null +++ b/testgen/ui/components/frontend/js/pages/test_runs.js @@ -0,0 +1,136 @@ +/** + * @typedef Properties + * @type {object} + * @property {array} items + */ +import van from '../van.min.js'; +import { Tooltip } from '../van-tooltip.js'; +import { SummaryBar } from '../components/summary_bar.js'; +import { Link } from '../components/link.js'; +import { Button } from '../components/button.js'; +import { Streamlit } from '../streamlit.js'; +import { wrapProps } from '../utils.js'; +import { formatTimestamp, formatDuration } from '../display_utils.js'; + +const { div, span, i } = van.tags; + +const TestRuns = (/** @type Properties */ props) => { + window.testgen.isPage = true; + + const testRunItems = van.derive(() => { + let items = []; + try { + items = JSON.parse(props.items?.val); + } catch { } + Streamlit.setFrameHeight(44 + 60.5 * items.length); + return items; + }); + const columns = ['30%', '20%', '50%']; + + return div( + () => div( + { class: 'table' }, + div( + { class: 'table-header flex-row' }, + span( + { style: `flex: ${columns[0]}` }, + 'Start Time | Table Group | Test Suite', + ), + span( + { style: `flex: ${columns[1]}` }, + 'Status | Duration', + ), + span( + { style: `flex: ${columns[2]}` }, + 'Results Summary', + ), + ), + testRunItems.val.map(item => TestRunItem(item, columns)), + ), + ); +} + +const TestRunItem = (item, /** @type string[] */ columns) => { + return div( + { class: 'table-row flex-row' }, + div( + { style: `flex: ${columns[0]}` }, + Link(wrapProps({ + label: formatTimestamp(item.test_starttime), + href: 'test-runs:results', + params: { 'run_id': item.test_run_id }, + underline: true, + })), + div( + { class: 'text-caption mt-1' }, + `${item.table_groups_name} > ${item.test_suite}`, + ), + ), + div( + { class: 'flex-row', style: `flex: ${columns[1]}` }, + div( + TestRunStatus(item), + div( + { class: 'text-caption mt-1' }, + formatDuration(item.duration), + ), + ), + item.status === 'Running' && item.process_id ? Button(wrapProps({ + type: 'stroked', + label: 'Cancel Run', + style: 'width: auto; height: 32px; color: var(--purple); margin-left: 16px;', + onclick: () => Streamlit.sendData({ + event: 'RunCanceled', + test_run: item, + _id: Math.random(), // Forces on_change component handler to be triggered on every click + }), + })) : null, + ), + div( + { style: `flex: ${columns[2]}` }, + item.test_ct ? SummaryBar(wrapProps({ + items: [ + { label: 'Passed', value: item.passed_ct, color: 'green' }, + { label: 'Warning', value: item.warning_ct, color: 'yellow' }, + { label: 'Failed', value: item.failed_ct, color: 'red' }, + { label: 'Error', value: item.error_ct, color: 'brown' }, + { label: 'Dismissed', value: item.dismissed_ct, color: 'grey' }, + ], + height: 10, + width: 300, + })) : '--', + ), + ); +} + +function TestRunStatus(/** @type object */ item) { + const attributeMap = { + Running: { label: 'Running', color: 'blue' }, + Complete: { label: 'Completed', color: '' }, + Error: { label: 'Error', color: 'red' }, + Cancelled: { label: 'Canceled', color: 'purple' }, + }; + const attributes = attributeMap[item.status] || { label: 'Unknown', color: 'grey' }; + return span( + { + class: 'flex-row', + style: `color: var(--${attributes.color});`, + }, + attributes.label, + () => { + const tooltipError = van.state(false); + return item.status === 'Error' && item.log_message ? i( + { + class: 'material-symbols-rounded text-secondary ml-1', + style: 'position: relative; font-size: 16px;', + onmouseenter: () => tooltipError.val = true, + onmouseleave: () => tooltipError.val = false, + }, + 'info', + Tooltip({ text: item.log_message, show: tooltipError }), + ) : null; + }, + ); +} + +export { TestRuns }; diff --git a/testgen/ui/components/frontend/js/van-tooltip.js b/testgen/ui/components/frontend/js/van-tooltip.js new file mode 100644 index 0000000..565715b --- /dev/null +++ b/testgen/ui/components/frontend/js/van-tooltip.js @@ -0,0 +1,52 @@ +// Code modified from vanjs-ui +// https://www.npmjs.com/package/vanjs-ui +// https://cdn.jsdelivr.net/npm/vanjs-ui@0.10.0/dist/van-ui.nomodule.js + +import van from './van.min.js'; +const { div, span } = van.tags; + +const toStyleStr = (style) => Object.entries(style).map(([k, v]) => `${k}: ${v};`).join(""); + +const Tooltip = ({ text, show, backgroundColor = '#333D', fontColor = 'white', fadeInSec = 0.3, tooltipClass = '', tooltipStyleOverrides = {}, triangleClass = '', triangleStyleOverrides = {}, }) => { + const tooltipStylesStr = toStyleStr({ + width: 'max-content', + 'min-width': '100px', + 'max-width': '400px', + visibility: 'hidden', + 'background-color': backgroundColor, + color: fontColor, + 'text-align': 'center', + padding: '5px', + 'border-radius': '5px', + position: 'absolute', + 'z-index': 1, + bottom: '125%', + left: '50%', + transform: 'translateX(-50%)', + opacity: 0, + transition: `opacity ${fadeInSec}s`, + 'font-size': '14px', + 'font-family': `'Roboto', 'Helvetica Neue', sans-serif`, + 'text-wrap': 'wrap', + ...tooltipStyleOverrides, + }); + const triangleStylesStr = toStyleStr({ + width: 0, + height: 0, + 'margin-left': '-5px', + 'border-left': '5px solid transparent', + 'border-right': '5px solid transparent', + 'border-top': '5px solid #333', + position: 'absolute', + bottom: '-5px', + left: '50%', + ...triangleStyleOverrides, + }); + const dom = span({ class: tooltipClass, style: tooltipStylesStr }, text, div({ class: triangleClass, style: triangleStylesStr })); + van.derive(() => show.val ? + (dom.style.opacity = '1', dom.style.visibility = 'visible') : + (dom.style.opacity = '0', dom.style.visibility = 'hidden')); + return dom; +}; + +export { Tooltip }; diff --git a/testgen/ui/views/profiling_summary.py b/testgen/ui/views/profiling_summary.py index 0ce607d..f0c736e 100644 --- a/testgen/ui/views/profiling_summary.py +++ b/testgen/ui/views/profiling_summary.py @@ -9,18 +9,17 @@ import testgen.ui.services.form_service as fm import testgen.ui.services.query_service as dq from testgen.commands.run_profiling_bridge import update_profile_run_status -from testgen.common import date_service from testgen.ui.components import widgets as testgen +from testgen.ui.components.utils.component import component from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import project_queries from testgen.ui.services import authentication_service from testgen.ui.session import session from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog -from testgen.utils import to_int FORM_DATA_WIDTH = 400 -PAGE_SIZE = 10 +PAGE_SIZE = 50 PAGE_ICON = "data_thresholding" @@ -66,33 +65,31 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N fm.render_refresh_button(actions_column) testgen.whitespace(0.5) - list_container = st.container(border=True) + list_container = st.container() profiling_runs_df = get_db_profiling_runs(project_code, table_group_id) run_count = len(profiling_runs_df) page_index = testgen.paginator(count=run_count, page_size=PAGE_SIZE) + paginated_df = profiling_runs_df[PAGE_SIZE * page_index : PAGE_SIZE * (page_index + 1)] with list_container: - testgen.css_class("bg-white") - column_spec = [.2, .2, .2, .4] - - run_column, status_column, schema_column, issues_column = st.columns(column_spec, vertical_alignment="top") - header_styles = "font-size: 12px; text-transform: uppercase; margin-bottom: 8px;" - testgen.caption("Start Time | Table Group", header_styles, run_column) - testgen.caption("Status | Duration", header_styles, status_column) - testgen.caption("Schema", header_styles, schema_column) - testgen.caption("Hygiene Issues", header_styles, issues_column) - testgen.divider(-8) - - paginated_df = profiling_runs_df[PAGE_SIZE * page_index : PAGE_SIZE * (page_index + 1)] - for index, profiling_run in paginated_df.iterrows(): - with st.container(): - render_profiling_run_row(profiling_run, column_spec) - - if (index + 1) % PAGE_SIZE and index != run_count - 1: - testgen.divider(-4, 4) - + component_key = "testgen:profiling_runs" + def handle_event(): + if event_data := st.session_state.get(component_key): + event = event_data["event"] + if event == "LinkClicked": + self.router.navigate(to=event_data["href"], with_args=event_data.get("params")) + elif event == "RunCanceled": + on_cancel_run(event_data["profiling_run"]) + + component( + id_="profiling_runs", + key=component_key, + props={"items": paginated_df.to_json(orient="records")}, + on_change=handle_event, + ) + def render_empty_state(project_code: str) -> bool: project_summary_df = project_queries.get_summary_by_code(project_code) @@ -130,90 +127,6 @@ def render_empty_state(project_code: str) -> bool: return True -def render_profiling_run_row(profiling_run: pd.Series, column_spec: list[int]) -> None: - profiling_run_id = profiling_run["profiling_run_id"] - status = profiling_run["status"] - - run_column, status_column, schema_column, issues_column = st.columns(column_spec, vertical_alignment="top") - - with run_column: - start_time = date_service.get_timezoned_timestamp(st.session_state, profiling_run["start_time"]) if pd.notnull(profiling_run["start_time"]) else "--" - testgen.no_flex_gap() - testgen.text(start_time) - testgen.caption(profiling_run["table_groups_name"]) - - with status_column: - testgen.flex_row_start() - - status_display_map = { - "Running": { "label": "Running", "color": "blue" }, - "Complete": { "label": "Completed", "color": "" }, - "Error": { "label": "Error", "color": "red" }, - "Cancelled": { "label": "Canceled", "color": "purple" }, - } - status_attrs = status_display_map.get(status, { "label": "Unknown", "color": "grey" }) - - st.html(f""" -

{status_attrs["label"]}

-

{date_service.get_formatted_duration(profiling_run["duration"])}

- """) - - if status == "Error" and (log_message := profiling_run["log_message"]): - st.markdown("", help=log_message) - - if status == "Running" and pd.notnull(profiling_run["process_id"]): - testgen.button( - type_="stroked", - label="Cancel Run", - style="width: auto; height: 32px; color: var(--purple); margin-left: 16px;", - on_click=partial(on_cancel_run, profiling_run), - key=f"profiling_run:keys:cancel-run:{profiling_run_id}", - ) - - with schema_column: - column_count = to_int(profiling_run["column_ct"]) - testgen.no_flex_gap() - testgen.text(profiling_run["schema_name"]) - testgen.caption( - f"{to_int(profiling_run['table_ct'])} tables, {column_count} columns", - f"margin-bottom: 3px;{' color: var(--red);' if status == 'Complete' and not column_count else ''}", - ) - - if column_count: - testgen.link( - label="View results", - href="profiling-runs:results", - params={ "run_id": str(profiling_run_id) }, - right_icon="chevron_right", - height=18, - key=f"profiling_run:keys:go-to-runs:{profiling_run_id}", - ) - - with issues_column: - if anomaly_count := to_int(profiling_run["anomaly_ct"]): - testgen.no_flex_gap() - testgen.summary_bar( - items=[ - { "label": "Definite", "value": to_int(profiling_run["anomalies_definite_ct"]), "color": "red" }, - { "label": "Likely", "value": to_int(profiling_run["anomalies_likely_ct"]), "color": "orange" }, - { "label": "Possible", "value": to_int(profiling_run["anomalies_possible_ct"]), "color": "yellow" }, - { "label": "Dismissed", "value": to_int(profiling_run["anomalies_dismissed_ct"]), "color": "grey" }, - ], - height=10, - width=280, - ) - testgen.link( - label=f"View {anomaly_count} issues", - href="profiling-runs:hygiene", - params={ "run_id": str(profiling_run_id) }, - right_icon="chevron_right", - height=18, - key=f"profiling_run:keys:go-to-hygiene:{profiling_run_id}", - ) - else: - st.markdown("--") - - def on_cancel_run(profiling_run: pd.Series) -> None: process_status, process_message = process_service.kill_test_run(profiling_run["process_id"]) if process_status: diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 21c4c24..dfe8a90 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -9,17 +9,16 @@ import testgen.ui.services.form_service as fm import testgen.ui.services.query_service as dq import testgen.ui.services.test_run_service as test_run_service -from testgen.common import date_service from testgen.ui.components import widgets as testgen +from testgen.ui.components.utils.component import component from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import project_queries from testgen.ui.services import authentication_service from testgen.ui.session import session from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog -from testgen.utils import to_int -PAGE_SIZE = 10 +PAGE_SIZE = 50 PAGE_ICON = "labs" @@ -78,31 +77,24 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N fm.render_refresh_button(actions_column) testgen.whitespace(0.5) - list_container = st.container(border=True) + list_container = st.container() test_runs_df = get_db_test_runs(project_code, table_group_id, test_suite_id) - - run_count = len(test_runs_df) - page_index = testgen.paginator(count=run_count, page_size=PAGE_SIZE) + page_index = testgen.paginator(count=len(test_runs_df), page_size=PAGE_SIZE) + paginated_df = test_runs_df[PAGE_SIZE * page_index : PAGE_SIZE * (page_index + 1)] with list_container: - testgen.css_class("bg-white") - column_spec = [.3, .2, .5] - - run_column, status_column, results_column = st.columns(column_spec, vertical_alignment="top") - header_styles = "font-size: 12px; text-transform: uppercase; margin-bottom: 8px;" - testgen.caption("Start Time | Table Group | Test Suite", header_styles, run_column) - testgen.caption("Status | Duration", header_styles, status_column) - testgen.caption("Results Summary", header_styles, results_column) - testgen.divider(-8) - - paginated_df = test_runs_df[PAGE_SIZE * page_index : PAGE_SIZE * (page_index + 1)] - for index, test_run in paginated_df.iterrows(): - with st.container(): - render_test_run_row(test_run, column_spec) - - if (index + 1) % PAGE_SIZE and index != run_count - 1: - testgen.divider(-4, 4) + event_data = component( + id_="test_runs", + key="test_runs", + props={"items": paginated_df.to_json(orient="records")}, + ) + if event_data: + event = event_data["event"] + if event == "LinkClicked": + self.router.navigate(to=event_data["href"], with_args=event_data.get("params")) + elif event == "RunCanceled": + on_cancel_run(event_data["test_run"]) def render_empty_state(project_code: str) -> bool: @@ -148,71 +140,6 @@ def render_empty_state(project_code: str) -> bool: ) return True -def render_test_run_row(test_run: pd.Series, column_spec: list[int]) -> None: - test_run_id = test_run["test_run_id"] - status = test_run["status"] - - run_column, status_column, results_column = st.columns(column_spec, vertical_alignment="top") - - with run_column: - start_time = date_service.get_timezoned_timestamp(st.session_state, test_run["test_starttime"]) if pd.notnull(test_run["test_starttime"]) else "--" - testgen.no_flex_gap() - testgen.link( - label=start_time, - href="test-runs:results", - params={ "run_id": str(test_run_id) }, - height=18, - key=f"test_run:keys:go-to-run:{test_run_id}", - ) - testgen.caption( - f"{test_run['table_groups_name']} > {test_run['test_suite']}", - "margin-top: -9px;" - ) - - with status_column: - testgen.flex_row_start() - - status_display_map = { - "Running": { "label": "Running", "color": "blue" }, - "Complete": { "label": "Completed", "color": "" }, - "Error": { "label": "Error", "color": "red" }, - "Cancelled": { "label": "Canceled", "color": "purple" }, - } - status_attrs = status_display_map.get(status, { "label": "Unknown", "color": "grey" }) - - st.html(f""" -

{status_attrs["label"]}

-

{date_service.get_formatted_duration(test_run["duration"])}

- """) - - if status == "Error" and (log_message := test_run["log_message"]): - st.markdown("", help=log_message) - - if status == "Running" and pd.notnull(test_run["process_id"]): - testgen.button( - type_="stroked", - label="Cancel Run", - style="width: auto; height: 32px; color: var(--purple); margin-left: 16px;", - on_click=partial(on_cancel_run, test_run), - key=f"test_run:keys:cancel-run:{test_run_id}", - ) - - with results_column: - if to_int(test_run["test_ct"]): - testgen.summary_bar( - items=[ - { "label": "Passed", "value": to_int(test_run["passed_ct"]), "color": "green" }, - { "label": "Warning", "value": to_int(test_run["warning_ct"]), "color": "yellow" }, - { "label": "Failed", "value": to_int(test_run["failed_ct"]), "color": "red" }, - { "label": "Error", "value": to_int(test_run["error_ct"]), "color": "brown" }, - { "label": "Dismissed", "value": to_int(test_run["dismissed_ct"]), "color": "grey" }, - ], - height=10, - width=300, - ) - else: - st.markdown("--") - def on_cancel_run(test_run: pd.Series) -> None: process_status, process_message = process_service.kill_test_run(test_run["process_id"]) From 0d749e996b037e1234136a3bac98483186b84891 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 4 Oct 2024 00:41:34 -0400 Subject: [PATCH 13/91] refactor(ui): improve and abstract event handling from page components --- testgen/ui/bootstrap.py | 2 +- .../frontend/js/components/breadcrumbs.js | 13 ++++--- .../frontend/js/components/button.js | 8 ++--- .../components/frontend/js/components/link.js | 8 ++--- .../frontend/js/components/paginator.js | 14 ++++---- .../frontend/js/pages/profiling_runs.js | 8 ++--- .../components/frontend/js/pages/test_runs.js | 8 ++--- testgen/ui/components/frontend/js/utils.js | 10 +++++- testgen/ui/components/widgets/__init__.py | 1 + testgen/ui/components/widgets/paginator.py | 5 +-- .../components/widgets/testgen_component.py | 36 +++++++++++++++++++ testgen/ui/session.py | 2 ++ ...profiling_summary.py => profiling_runs.py} | 20 +++-------- testgen/ui/views/test_runs.py | 16 +++------ 14 files changed, 83 insertions(+), 68 deletions(-) create mode 100644 testgen/ui/components/widgets/testgen_component.py rename testgen/ui/views/{profiling_summary.py => profiling_runs.py} (90%) diff --git a/testgen/ui/bootstrap.py b/testgen/ui/bootstrap.py index 05b943f..e3a99a6 100644 --- a/testgen/ui/bootstrap.py +++ b/testgen/ui/bootstrap.py @@ -15,7 +15,7 @@ from testgen.ui.views.overview import OverviewPage from testgen.ui.views.profiling_anomalies import ProfilingAnomaliesPage from testgen.ui.views.profiling_results import ProfilingResultsPage -from testgen.ui.views.profiling_summary import DataProfilingPage +from testgen.ui.views.profiling_runs import DataProfilingPage from testgen.ui.views.project_settings import ProjectSettingsPage from testgen.ui.views.table_groups import TableGroupsPage from testgen.ui.views.test_definitions import TestDefinitionsPage diff --git a/testgen/ui/components/frontend/js/components/breadcrumbs.js b/testgen/ui/components/frontend/js/components/breadcrumbs.js index bba5320..949499c 100644 --- a/testgen/ui/components/frontend/js/components/breadcrumbs.js +++ b/testgen/ui/components/frontend/js/components/breadcrumbs.js @@ -11,7 +11,7 @@ */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; -import { loadStylesheet } from '../utils.js'; +import { emitEvent, loadStylesheet } from '../utils.js'; const { a, div, span } = van.tags; @@ -31,7 +31,11 @@ const Breadcrumbs = (/** @type Properties */ props) => { { class: 'tg-breadcrumbs' }, breadcrumbs.reduce((items, b, idx) => { const isLastItem = idx === breadcrumbs.length - 1; - items.push(a({ class: `tg-breadcrumbs--${ isLastItem ? 'current' : 'active'}`, href: `#/${b.path}`, onclick: () => emiEvent(b.path, b.params) }, b.label)) + items.push(a({ + class: `tg-breadcrumbs--${ isLastItem ? 'current' : 'active'}`, + onclick: () => emitEvent('LinkClicked', { href: b.path, params: b.params }) }, + b.label, + )); if (!isLastItem) { items.push(span({class: 'tg-breadcrumbs--arrow'}, '>')); } @@ -42,11 +46,6 @@ const Breadcrumbs = (/** @type Properties */ props) => { ) }; -function emiEvent(/** @type string */ href, /** @type object */ params) { - Streamlit.sendData({ event: 'LinkClicked', href, params }); - return false; -} - const stylesheet = new CSSStyleSheet(); stylesheet.replace(` .tg-breadcrumbs-wrapper { diff --git a/testgen/ui/components/frontend/js/components/button.js b/testgen/ui/components/frontend/js/components/button.js index 90bbfea..e3670e3 100644 --- a/testgen/ui/components/frontend/js/components/button.js +++ b/testgen/ui/components/frontend/js/components/button.js @@ -10,7 +10,7 @@ * @property {(bool)} disabled * @property {string?} style */ -import { enforceElementWidth, loadStylesheet } from '../utils.js'; +import { emitEvent, enforceElementWidth, loadStylesheet } from '../utils.js'; import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; @@ -39,7 +39,7 @@ const Button = (/** @type Properties */ props) => { window.frameElement.parentElement.setAttribute('data-tooltip-position', props.tooltipPosition.val); } - const onClickHandler = props.onclick || post; + const onClickHandler = props.onclick || (() => emitEvent('ButtonClicked')); return button( { class: `tg-button tg-${props.type.val}-button ${props.type.val !== 'icon' && isIconOnly ? 'tg-icon-button' : ''}`, @@ -53,10 +53,6 @@ const Button = (/** @type Properties */ props) => { ); }; -function post() { - Streamlit.sendData({ value: Math.random() }); -} - const stylesheet = new CSSStyleSheet(); stylesheet.replace(` button.tg-button { diff --git a/testgen/ui/components/frontend/js/components/link.js b/testgen/ui/components/frontend/js/components/link.js index 6600ed6..8a0b09b 100644 --- a/testgen/ui/components/frontend/js/components/link.js +++ b/testgen/ui/components/frontend/js/components/link.js @@ -13,7 +13,7 @@ * @property {number?} width * @property {string?} style */ -import { enforceElementWidth, loadStylesheet } from '../utils.js'; +import { emitEvent, enforceElementWidth, loadStylesheet } from '../utils.js'; import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; @@ -33,7 +33,7 @@ const Link = (/** @type Properties */ props) => { { class: `tg-link ${props.underline?.val ? 'tg-link--underline' : ''}`, style: props.style, - onclick: () => emitEvent(props.href.val, props.params.val), + onclick: () => emitEvent('LinkClicked', { href: props.href.val, params: props.params.val }), }, div( {class: 'tg-link--wrapper'}, @@ -55,10 +55,6 @@ const LinkIcon = ( ); }; -function emitEvent(href, params) { - Streamlit.sendData({ event: 'LinkClicked', href, params }); -} - const stylesheet = new CSSStyleSheet(); stylesheet.replace(` .tg-link { diff --git a/testgen/ui/components/frontend/js/components/paginator.js b/testgen/ui/components/frontend/js/components/paginator.js index 0b50677..2c3a497 100644 --- a/testgen/ui/components/frontend/js/components/paginator.js +++ b/testgen/ui/components/frontend/js/components/paginator.js @@ -7,7 +7,7 @@ */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; -import { loadStylesheet } from '../utils.js'; +import { emitEvent, loadStylesheet } from '../utils.js'; const { div, span, i, button } = van.tags; @@ -35,7 +35,7 @@ const Paginator = (/** @type Properties */ props) => { class: 'tg-paginator--button', onclick: () => { pageIndexState.val = 0; - emitEvent(pageIndexState.val); + changePage(pageIndexState.val); }, disabled: () => pageIndexState.val === 0, }, @@ -46,7 +46,7 @@ const Paginator = (/** @type Properties */ props) => { class: 'tg-paginator--button', onclick: () => { pageIndexState.val--; - emitEvent(pageIndexState.val); + changePage(pageIndexState.val); }, disabled: () => pageIndexState.val === 0, }, @@ -57,7 +57,7 @@ const Paginator = (/** @type Properties */ props) => { class: 'tg-paginator--button', onclick: () => { pageIndexState.val++; - emitEvent(pageIndexState.val); + changePage(pageIndexState.val); }, disabled: () => pageIndexState.val === Math.ceil(count.val / pageSize.val) - 1, }, @@ -68,7 +68,7 @@ const Paginator = (/** @type Properties */ props) => { class: 'tg-paginator--button', onclick: () => { pageIndexState.val = Math.ceil(count.val / pageSize.val) - 1; - emitEvent(pageIndexState.val); + changePage(pageIndexState.val); }, disabled: () => pageIndexState.val === Math.ceil(count.val / pageSize.val) - 1, }, @@ -77,8 +77,8 @@ const Paginator = (/** @type Properties */ props) => { ); }; -function emitEvent(/** @type number */pageIndex) { - Streamlit.sendData({ event: 'PageChanged', pageIndex }) +function changePage(/** @type number */page_index) { + emitEvent('PageChanged', { page_index }) } const stylesheet = new CSSStyleSheet(); diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js index 639f581..7532145 100644 --- a/testgen/ui/components/frontend/js/pages/profiling_runs.js +++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js @@ -9,7 +9,7 @@ import { SummaryBar } from '../components/summary_bar.js'; import { Link } from '../components/link.js'; import { Button } from '../components/button.js'; import { Streamlit } from '../streamlit.js'; -import { wrapProps } from '../utils.js'; +import { emitEvent, wrapProps } from '../utils.js'; import { formatTimestamp, formatDuration } from '../display_utils.js'; const { div, span, i } = van.tags; @@ -78,11 +78,7 @@ const ProfilingRunItem = (item, /** @type string[] */ columns) => { type: 'stroked', label: 'Cancel Run', style: 'width: auto; height: 32px; color: var(--purple); margin-left: 16px;', - onclick: () => Streamlit.sendData({ - event: 'RunCanceled', - profiling_run: item, - _id: Math.random(), // Forces on_change component handler to be triggered on every click - }), + onclick: () => emitEvent('RunCanceled', { payload: item }), })) : null, ), div( diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js index 3e91354..0661972 100644 --- a/testgen/ui/components/frontend/js/pages/test_runs.js +++ b/testgen/ui/components/frontend/js/pages/test_runs.js @@ -9,7 +9,7 @@ import { SummaryBar } from '../components/summary_bar.js'; import { Link } from '../components/link.js'; import { Button } from '../components/button.js'; import { Streamlit } from '../streamlit.js'; -import { wrapProps } from '../utils.js'; +import { emitEvent, wrapProps } from '../utils.js'; import { formatTimestamp, formatDuration } from '../display_utils.js'; const { div, span, i } = van.tags; @@ -79,11 +79,7 @@ const TestRunItem = (item, /** @type string[] */ columns) => { type: 'stroked', label: 'Cancel Run', style: 'width: auto; height: 32px; color: var(--purple); margin-left: 16px;', - onclick: () => Streamlit.sendData({ - event: 'RunCanceled', - test_run: item, - _id: Math.random(), // Forces on_change component handler to be triggered on every click - }), + onclick: () => emitEvent('RunCanceled', { payload: item }), })) : null, ), div( diff --git a/testgen/ui/components/frontend/js/utils.js b/testgen/ui/components/frontend/js/utils.js index 0f0103b..3a37f12 100644 --- a/testgen/ui/components/frontend/js/utils.js +++ b/testgen/ui/components/frontend/js/utils.js @@ -1,4 +1,5 @@ import van from './van.min.js'; +import { Streamlit } from './streamlit.js'; function enforceElementWidth( /** @type Element */element, @@ -28,4 +29,11 @@ function wrapProps(/** @type object */props) { return props; } -export { enforceElementWidth, loadStylesheet, wrapProps }; +function emitEvent( + /** @type string */event, + /** @type object */data = {}, +) { + Streamlit.sendData({ event, ...data, _id: Math.random() }) // Identify the event so its handler is called once +} + +export { emitEvent, enforceElementWidth, loadStylesheet, wrapProps }; diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py index 72930bb..b9f3502 100644 --- a/testgen/ui/components/widgets/__init__.py +++ b/testgen/ui/components/widgets/__init__.py @@ -22,3 +22,4 @@ from testgen.ui.components.widgets.sidebar import sidebar from testgen.ui.components.widgets.sorting_selector import sorting_selector from testgen.ui.components.widgets.summary_bar import summary_bar +from testgen.ui.components.widgets.testgen_component import TestGenComponentId, testgen_component diff --git a/testgen/ui/components/widgets/paginator.py b/testgen/ui/components/widgets/paginator.py index 8c1e4c7..c98a335 100644 --- a/testgen/ui/components/widgets/paginator.py +++ b/testgen/ui/components/widgets/paginator.py @@ -17,9 +17,10 @@ def paginator( :param key: unique key to give the component a persisting state """ - return component( + event_data = component( id_="paginator", key=key, - default=page_index, + default={ page_index: page_index }, props={"count": count, "pageSize": page_size, "pageIndex": page_index}, ) + return event_data.get("page_index", 0) diff --git a/testgen/ui/components/widgets/testgen_component.py b/testgen/ui/components/widgets/testgen_component.py new file mode 100644 index 0000000..8ac4554 --- /dev/null +++ b/testgen/ui/components/widgets/testgen_component.py @@ -0,0 +1,36 @@ +from enum import Enum + +from testgen.ui.components.utils.component import component +from testgen.ui.navigation.router import Router +from testgen.ui.session import session + + +class TestGenComponentId(Enum): + ProfililngRuns = "profiling_runs" + TestRuns = "test_runs" + +def testgen_component( + component_id: TestGenComponentId, + props: dict, + event_handlers: dict | None, +) -> dict | None: + + event_data = component( + id_=component_id.value, + key=f"testgen:{component_id.value}", + props=props, + ) + if event_data and (event := event_data.get("event")): + if event == "LinkClicked": + Router().navigate(to=event_data["href"], with_args=event_data.get("params")) + + elif event_handlers and (handler := event_handlers.get(event)): + # Prevent handling the same event multiple times + event_id = f"{component_id.value}:{event_data.get('_id', '')}" + if event_id != session.testgen_event_id: + session.testgen_event_id = event_id + # These events are not handled through the component's on_change callback + # because they may call st.rerun(), causing the "Calling st.rerun() within a callback is a noop" error + handler(event_data.get("payload")) + + return event_data diff --git a/testgen/ui/session.py b/testgen/ui/session.py index b10e251..5c7459a 100644 --- a/testgen/ui/session.py +++ b/testgen/ui/session.py @@ -29,6 +29,8 @@ class TestgenSession(Singleton): add_project: bool latest_version: str | None + testgen_event_id: str | None + def __init__(self, state: SessionStateProxy) -> None: super().__setattr__("_state", state) diff --git a/testgen/ui/views/profiling_summary.py b/testgen/ui/views/profiling_runs.py similarity index 90% rename from testgen/ui/views/profiling_summary.py rename to testgen/ui/views/profiling_runs.py index f0c736e..5921244 100644 --- a/testgen/ui/views/profiling_summary.py +++ b/testgen/ui/views/profiling_runs.py @@ -10,7 +10,7 @@ import testgen.ui.services.query_service as dq from testgen.commands.run_profiling_bridge import update_profile_run_status from testgen.ui.components import widgets as testgen -from testgen.ui.components.utils.component import component +from testgen.ui.components.widgets import TestGenComponentId, testgen_component from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import project_queries @@ -74,20 +74,10 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N paginated_df = profiling_runs_df[PAGE_SIZE * page_index : PAGE_SIZE * (page_index + 1)] with list_container: - component_key = "testgen:profiling_runs" - def handle_event(): - if event_data := st.session_state.get(component_key): - event = event_data["event"] - if event == "LinkClicked": - self.router.navigate(to=event_data["href"], with_args=event_data.get("params")) - elif event == "RunCanceled": - on_cancel_run(event_data["profiling_run"]) - - component( - id_="profiling_runs", - key=component_key, - props={"items": paginated_df.to_json(orient="records")}, - on_change=handle_event, + testgen_component( + TestGenComponentId.ProfililngRuns, + props={ "items": paginated_df.to_json(orient="records") }, + event_handlers={ "RunCanceled": on_cancel_run } ) diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index dfe8a90..11fdf3b 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -10,7 +10,7 @@ import testgen.ui.services.query_service as dq import testgen.ui.services.test_run_service as test_run_service from testgen.ui.components import widgets as testgen -from testgen.ui.components.utils.component import component +from testgen.ui.components.widgets import TestGenComponentId, testgen_component from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import project_queries @@ -84,17 +84,11 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N paginated_df = test_runs_df[PAGE_SIZE * page_index : PAGE_SIZE * (page_index + 1)] with list_container: - event_data = component( - id_="test_runs", - key="test_runs", - props={"items": paginated_df.to_json(orient="records")}, + testgen_component( + TestGenComponentId.TestRuns, + props={ "items": paginated_df.to_json(orient="records") }, + event_handlers={ "RunCanceled": on_cancel_run } ) - if event_data: - event = event_data["event"] - if event == "LinkClicked": - self.router.navigate(to=event_data["href"], with_args=event_data.get("params")) - elif event == "RunCanceled": - on_cancel_run(event_data["test_run"]) def render_empty_state(project_code: str) -> bool: From 79c708f63c94c951eb51a14b59da514d2957fa3b Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 4 Oct 2024 00:42:16 -0400 Subject: [PATCH 14/91] fix(ui): handle height of page components responsively --- .../frontend/js/pages/profiling_runs.js | 47 ++++++++++--------- .../components/frontend/js/pages/test_runs.js | 39 ++++++++------- testgen/ui/components/frontend/js/utils.js | 12 ++++- 3 files changed, 57 insertions(+), 41 deletions(-) diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js index 7532145..531768d 100644 --- a/testgen/ui/components/frontend/js/pages/profiling_runs.js +++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js @@ -9,7 +9,7 @@ import { SummaryBar } from '../components/summary_bar.js'; import { Link } from '../components/link.js'; import { Button } from '../components/button.js'; import { Streamlit } from '../streamlit.js'; -import { emitEvent, wrapProps } from '../utils.js'; +import { emitEvent, resizeFrameHeightToElement, wrapProps } from '../utils.js'; import { formatTimestamp, formatDuration } from '../display_utils.js'; const { div, span, i } = van.tags; @@ -22,33 +22,36 @@ const ProfilingRuns = (/** @type Properties */ props) => { try { items = JSON.parse(props.items?.val); } catch { } - Streamlit.setFrameHeight(44 + 84.5 * items.length); + Streamlit.setFrameHeight(100 * items.length); return items; }); const columns = ['20%', '20%', '20%', '40%']; + const tableId = 'profiling-runs-table'; + resizeFrameHeightToElement(tableId); + return div( - () => div( - { class: 'table' }, - div( - { class: 'table-header flex-row' }, - span( - { style: `flex: ${columns[0]}` }, - 'Start Time | Table Group', - ), - span( - { style: `flex: ${columns[1]}` }, - 'Status | Duration', - ), - span( - { style: `flex: ${columns[2]}` }, - 'Schema', - ), - span( - { style: `flex: ${columns[3]}` }, - 'Hygiene Issues', - ), + { class: 'table', id: tableId }, + div( + { class: 'table-header flex-row' }, + span( + { style: `flex: ${columns[0]}` }, + 'Start Time | Table Group', + ), + span( + { style: `flex: ${columns[1]}` }, + 'Status | Duration', + ), + span( + { style: `flex: ${columns[2]}` }, + 'Schema', ), + span( + { style: `flex: ${columns[3]}` }, + 'Hygiene Issues', + ), + ), + () => div( profilingRunItems.val.map(item => ProfilingRunItem(item, columns)), ), ); diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js index 0661972..596e8a7 100644 --- a/testgen/ui/components/frontend/js/pages/test_runs.js +++ b/testgen/ui/components/frontend/js/pages/test_runs.js @@ -9,7 +9,7 @@ import { SummaryBar } from '../components/summary_bar.js'; import { Link } from '../components/link.js'; import { Button } from '../components/button.js'; import { Streamlit } from '../streamlit.js'; -import { emitEvent, wrapProps } from '../utils.js'; +import { emitEvent, resizeFrameHeightToElement, wrapProps } from '../utils.js'; import { formatTimestamp, formatDuration } from '../display_utils.js'; const { div, span, i } = van.tags; @@ -22,29 +22,32 @@ const TestRuns = (/** @type Properties */ props) => { try { items = JSON.parse(props.items?.val); } catch { } - Streamlit.setFrameHeight(44 + 60.5 * items.length); + Streamlit.setFrameHeight(100 * items.length); return items; }); const columns = ['30%', '20%', '50%']; + const tableId = 'test-runs-table'; + resizeFrameHeightToElement(tableId); + return div( - () => div( - { class: 'table' }, - div( - { class: 'table-header flex-row' }, - span( - { style: `flex: ${columns[0]}` }, - 'Start Time | Table Group | Test Suite', - ), - span( - { style: `flex: ${columns[1]}` }, - 'Status | Duration', - ), - span( - { style: `flex: ${columns[2]}` }, - 'Results Summary', - ), + { class: 'table', id: tableId }, + div( + { class: 'table-header flex-row' }, + span( + { style: `flex: ${columns[0]}` }, + 'Start Time | Table Group | Test Suite', + ), + span( + { style: `flex: ${columns[1]}` }, + 'Status | Duration', + ), + span( + { style: `flex: ${columns[2]}` }, + 'Results Summary', ), + ), + () => div( testRunItems.val.map(item => TestRunItem(item, columns)), ), ); diff --git a/testgen/ui/components/frontend/js/utils.js b/testgen/ui/components/frontend/js/utils.js index 3a37f12..9b3bcb9 100644 --- a/testgen/ui/components/frontend/js/utils.js +++ b/testgen/ui/components/frontend/js/utils.js @@ -12,6 +12,16 @@ function enforceElementWidth( observer.observe(element); } +function resizeFrameHeightToElement(/** @type string */elementId) { + const observer = new ResizeObserver(() => { + const height = document.getElementById(elementId).offsetHeight; + if (height) { + Streamlit.setFrameHeight(height); + } + }); + observer.observe(window.frameElement); +} + function loadStylesheet( /** @type string */key, /** @type CSSStyleSheet */stylesheet, @@ -36,4 +46,4 @@ function emitEvent( Streamlit.sendData({ event, ...data, _id: Math.random() }) // Identify the event so its handler is called once } -export { emitEvent, enforceElementWidth, loadStylesheet, wrapProps }; +export { emitEvent, enforceElementWidth, loadStylesheet, resizeFrameHeightToElement, wrapProps }; From f082c0ae2905dc8554fe3a463871a1250efbecf1 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 4 Oct 2024 13:56:59 -0400 Subject: [PATCH 15/91] style: improve typing --- testgen/ui/components/widgets/__init__.py | 2 +- testgen/ui/components/widgets/testgen_component.py | 14 +++++--------- testgen/ui/views/profiling_runs.py | 4 ++-- testgen/ui/views/test_runs.py | 4 ++-- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py index b9f3502..c847d35 100644 --- a/testgen/ui/components/widgets/__init__.py +++ b/testgen/ui/components/widgets/__init__.py @@ -22,4 +22,4 @@ from testgen.ui.components.widgets.sidebar import sidebar from testgen.ui.components.widgets.sorting_selector import sorting_selector from testgen.ui.components.widgets.summary_bar import summary_bar -from testgen.ui.components.widgets.testgen_component import TestGenComponentId, testgen_component +from testgen.ui.components.widgets.testgen_component import testgen_component diff --git a/testgen/ui/components/widgets/testgen_component.py b/testgen/ui/components/widgets/testgen_component.py index 8ac4554..447686e 100644 --- a/testgen/ui/components/widgets/testgen_component.py +++ b/testgen/ui/components/widgets/testgen_component.py @@ -1,23 +1,19 @@ -from enum import Enum +import typing from testgen.ui.components.utils.component import component from testgen.ui.navigation.router import Router from testgen.ui.session import session -class TestGenComponentId(Enum): - ProfililngRuns = "profiling_runs" - TestRuns = "test_runs" - def testgen_component( - component_id: TestGenComponentId, + component_id: typing.Literal["profiling_runs", "test_runs"], props: dict, event_handlers: dict | None, ) -> dict | None: event_data = component( - id_=component_id.value, - key=f"testgen:{component_id.value}", + id_=component_id, + key=f"testgen:{component_id}", props=props, ) if event_data and (event := event_data.get("event")): @@ -26,7 +22,7 @@ def testgen_component( elif event_handlers and (handler := event_handlers.get(event)): # Prevent handling the same event multiple times - event_id = f"{component_id.value}:{event_data.get('_id', '')}" + event_id = f"{component_id}:{event_data.get('_id', '')}" if event_id != session.testgen_event_id: session.testgen_event_id = event_id # These events are not handled through the component's on_change callback diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py index 5921244..c184700 100644 --- a/testgen/ui/views/profiling_runs.py +++ b/testgen/ui/views/profiling_runs.py @@ -10,7 +10,7 @@ import testgen.ui.services.query_service as dq from testgen.commands.run_profiling_bridge import update_profile_run_status from testgen.ui.components import widgets as testgen -from testgen.ui.components.widgets import TestGenComponentId, testgen_component +from testgen.ui.components.widgets import testgen_component from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import project_queries @@ -75,7 +75,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with list_container: testgen_component( - TestGenComponentId.ProfililngRuns, + "profiling_runs", props={ "items": paginated_df.to_json(orient="records") }, event_handlers={ "RunCanceled": on_cancel_run } ) diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index 11fdf3b..729ab8b 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -10,7 +10,7 @@ import testgen.ui.services.query_service as dq import testgen.ui.services.test_run_service as test_run_service from testgen.ui.components import widgets as testgen -from testgen.ui.components.widgets import TestGenComponentId, testgen_component +from testgen.ui.components.widgets import testgen_component from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page from testgen.ui.queries import project_queries @@ -85,7 +85,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N with list_container: testgen_component( - TestGenComponentId.TestRuns, + "test_runs", props={ "items": paginated_df.to_json(orient="records") }, event_handlers={ "RunCanceled": on_cancel_run } ) From 77f107e7a959d8eb056df278f94f326d5e7254ec Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Sat, 5 Oct 2024 13:32:12 -0400 Subject: [PATCH 16/91] feat(pdf): Refacoring the code and fixing the header --- testgen/ui/pdf/dataframe_table.py | 281 ++++++++++++++++++++ testgen/ui/pdf/style.py | 88 +++++++ testgen/ui/pdf/test_result_report.py | 367 ++++++++------------------- 3 files changed, 472 insertions(+), 264 deletions(-) create mode 100644 testgen/ui/pdf/dataframe_table.py create mode 100644 testgen/ui/pdf/style.py diff --git a/testgen/ui/pdf/dataframe_table.py b/testgen/ui/pdf/dataframe_table.py new file mode 100644 index 0000000..b24a5ae --- /dev/null +++ b/testgen/ui/pdf/dataframe_table.py @@ -0,0 +1,281 @@ +from collections.abc import Iterable +from math import nan + +import pandas +from numpy import NaN +from pandas.core.dtypes.common import is_numeric_dtype +from reportlab.lib import colors, enums +from reportlab.lib.styles import ParagraphStyle +from reportlab.pdfbase.pdfmetrics import stringWidth +from reportlab.platypus import BalancedColumns, Flowable, Paragraph, Table, TableStyle + +from testgen.ui.pdf.style import COLOR_FADED_TEXT, COLOR_GRAY_BG, PARA_STYLE_CELL, TABLE_STYLE_DEFAULT + +PARA_STYLE_CELL_DATA = ParagraphStyle( + "table_cell_data", + PARA_STYLE_CELL, + leading=10, +) + +PARA_STYLE_CELL_NUMERIC = ParagraphStyle( + "table_cell_numeric", + PARA_STYLE_CELL_DATA, + alignment=enums.TA_RIGHT, + fontName="Courier", +) + +PARA_STYLE_CELL_NULL = ParagraphStyle( + "table_cell_null", + PARA_STYLE_CELL_NUMERIC, + alignment=enums.TA_CENTER, + textColor=COLOR_FADED_TEXT, +) + +PARA_STYLE_CELL_HEADER = ParagraphStyle( + "table_cell_header", + PARA_STYLE_CELL_DATA, + alignment=enums.TA_CENTER, + fontName="Helvetica", + splitLongWords=0, +) + +TABLE_STYLE_DATA = TableStyle( + ( + ("VALIGN", (0, 0), (-1, 0), "MIDDLE"), + ("GRID", (0, 0), (-1, -1), 0.5, COLOR_GRAY_BG), + ("INNERGRID", (0, 0), (-1, 0), 1, colors.white), + ("BACKGROUND", (0, 0), (-1, 0), COLOR_GRAY_BG), + ("LEFTPADDING", (0, 0), (-1, 0), 4), + ("RIGHTPADDING", (0, 0), (-1, 0), 4), + ("TOPPADDING", (0, 0), (-1, 0), 6), + ("BOTTOMPADDING", (0, 0), (-1, 0), 6), + + ), + parent=TABLE_STYLE_DEFAULT, +) + + +class VerticalHeaderCell(Flowable): + """ + Wrap a Paragraph rotating it 90 degrees. + + Technically, it could rotate any element, but it was designed to rotate a Paragraph (which uses all the available + with by default, and grows vertically as needed) into a narrow space, such as a table column with a pre-determined + width, which is the case of our DataFrame table implementation. + + It leverages a starting value for the height as an attempt to avoid unnecessary line breaks, when there's room + available. It attempts to wrap the Paragraph using the header height as its width, but it checks if the Paragraph + height is exceeding the column width, making more room and re-wrapping the Paragraph when necessary. + + It also centralizes the flowable, regardless of the cell style. + """ + + INITIAL_HEIGHT = 40 + HEIGHT_INCR_STEP = 5 + + def __init__(self, flowable): + self.flowable = flowable + self.available_width = 0 + self.flowable_width = 0 + super().__init__() + + def wrap(self, availWidth, _): + self.available_width = availWidth + + available_height = self.INITIAL_HEIGHT + while True: + flowable_height, self.flowable_width = self.flowable.wrap(available_height, self.available_width) + + if self.flowable_width > self.available_width: + available_height += self.HEIGHT_INCR_STEP + else: + break + + return self.available_width, flowable_height + + def drawOn(self, canvas, x, y, _sW=0): + canvas.saveState() + canvas.rotate(90) + # Besides translating x and y for the rotated canvas, we are horizontally centralizing the content by adding + # half of the "unused" width to the y position (which affects what we as "x" in the rotated canvas) + ret = self.flowable.drawOn( + canvas, + y, + -(x + self.available_width + (self.available_width - self.flowable_width) / 2), + _sW, + ) + canvas.restoreState() + return ret + + +class DataFrameTableBuilder: + """ + Build a Table based on the contents of a Pandas DataFrame. + + It wraps the content of each cell into a Paragraph, to ease line breaks when necessary. Both Tables and Paragraphs + adjusts their widths automatically, but they don't play well together, so this class calculates each column width + based on the DataFrame content. It can discard columns when they don't fit in the page width, dropping the widest + until it fits. + + It also provides a utility method to wrap the table (any potentially any other content that should be rendered + within it) into a columned layout. + """ + + null_para = Paragraph("NULL", style=PARA_STYLE_CELL_NULL) + + def __init__(self, dataframe, available_width, col_padding=16, header_exp_limit=0.45): + self._dataframe = dataframe + self.available_width = available_width + self.col_padding = col_padding + self.header_exp_limit = header_exp_limit + self.omitted_columns = [] + self.col_len_data = pandas.DataFrame(columns=["width", "max_width"], index=iter(dataframe)) + self.table_data = None + + def build_table(self, **kwargs): + if "colWidths" in kwargs: + raise ValueError("Can not override the calculated column widths") + + self.table_data = self._prepare_data() + self._drop_columns_that_dont_fit() + self.col_len_data["width"] += self._calc_content_cols_expansion() + header = self._setup_header() + + kwargs["colWidths"] = self.col_len_data["width"].tolist() + kwargs.setdefault("style", TABLE_STYLE_DATA) + kwargs.setdefault("repeatRows", 1) + + table_data = ( + header, + *(data.tolist() for _, data in self.table_data.iterrows()), + ) + + return Table(table_data, **kwargs) + + def split_in_columns(self, flowables, min_rows=5, col_padding=10): + # We don't want the columns to be glued together, so we add a padding for calculation + table_width = self._get_current_width() + col_padding + + # Adding one `col_padding` to the available width to compensate for the fact that + # only n-1 col "paddings" will be rendered for a BC with n cols + layout_columns = int((self.available_width + col_padding) / table_width) + + # Limiting the number of columns so each column has at least `min_rows` rows + layout_columns = min(layout_columns, int(len(self.table_data) / min_rows)) + + if layout_columns > 1: + columns = BalancedColumns(flowables, layout_columns, leftPadding=0, rightPadding=0, topPadding=0, bottomPadding=0) + # Converting the BC to a list to honor the `flowables` input type, for consistency + return [columns] if isinstance(flowables, Iterable) else columns + else: + return flowables + + def _setup_header(self): + header_cells = pandas.Series( + [Paragraph(label, style=PARA_STYLE_CELL_HEADER) for label in self.table_data.columns], + index=self.table_data.columns, + ) + expansible_width = self._get_expansible_width() + + min_max_widths = header_cells.map(self._calc_cell_width) + min_widths = min_max_widths.map(lambda t: t[0]) + min_exp_appetite = self._calc_expansion_appetite(min_widths) + + if min_exp_appetite.sum() <= expansible_width: + self.col_len_data["width"] += min_exp_appetite + + max_widths = min_max_widths.map(lambda t: t[1]) + max_exp_appetite = self._calc_expansion_appetite(max_widths) + if (max_exp_appetite.sum() + self._get_current_width()) / self.available_width <= self.header_exp_limit: + self.col_len_data["width"] += max_exp_appetite + else: + header_cells = header_cells.map(VerticalHeaderCell) + + return header_cells.tolist() + + def _get_expansible_width(self): + return self.available_width - self._get_current_width() + + def _get_current_width(self): + return self.col_len_data["width"].sum() + + def _calc_expansion_appetite(self, desired_widths): + """ + Given a series of "ideal" widths, return a series with how much each smaller column has to grow to match. + """ + return (desired_widths - self.col_len_data["width"]).apply(max, args=(0,)) + + def _calc_content_cols_expansion(self): + """ + Calculate how much each column has to grow to fit all the text without wrapping. + + The growth is limited by the available width and applied proportionally. + """ + expansion_appetite = self._calc_expansion_appetite(self.col_len_data["max_width"]) + expansible_width = self._get_expansible_width() + expand_factor = max(1, expansion_appetite.sum() / expansible_width) if expansible_width else 0 + return expansion_appetite * expand_factor + + def _drop_columns_that_dont_fit(self): + while True: + if self._get_expansible_width() >= 0: + break + largest_col = self.col_len_data["width"].idxmax() + self.table_data = self.table_data.drop(columns=largest_col) + self.col_len_data = self.col_len_data.drop(index=largest_col) + self.omitted_columns.append(largest_col) + + def _calc_cell_width(self, cell): + """ + Calculate the minimum and maximum widths required by a given cell (Paragraph). + + The min width considers wrapping only at the spaces, while the max width considers no wrapping. + """ + font_name = cell.style.fontName + font_size = cell.style.fontSize + space_width = stringWidth(" ", font_name, font_size) + words_width = [stringWidth(word, font_name, font_size) for word in cell.text.split(" ")] + min_width = max(words_width) + self.col_padding + max_width = sum(words_width) + self.col_padding + space_width * (len(words_width) - 1) + return min_width, max_width + + def _calc_col_width(self, col): + col_width = col.map(self._calc_cell_width) + min_width = col_width.max()[0] + max_width = col_width.map(lambda t: t[1]).max() + return min_width, max_width + + def _convert_col_values(self, col): + """ + Convert all values of a given column into Paragraphs. + + It applies different styles depending on the data type, and skips converting values that are already Paragraphs. + """ + para_style = PARA_STYLE_CELL_NUMERIC if is_numeric_dtype(col.dtype) else PARA_STYLE_CELL + + def _convert_value(value): + if isinstance(value, Paragraph): + return value + elif value in (None, NaN, nan): + return self.null_para + else: + return Paragraph(str(value), para_style) + + return col.map(_convert_value) + + def _prepare_data(self): + """ + Create a new DataFrame with the converted values from the input DataFrame. + + It also calculates the initial column widths. + """ + table_data = pandas.DataFrame() + for col_idx in self._dataframe.columns: + col = self._dataframe[col_idx] + table_data[col_idx] = self._convert_col_values(col) + self.col_len_data.loc[col_idx] = self._calc_col_width(table_data[col_idx]) + + # Freeing up the reference to the original Dataframe, in case it's ready to be garbage collected + del self._dataframe + + return table_data diff --git a/testgen/ui/pdf/style.py b/testgen/ui/pdf/style.py new file mode 100644 index 0000000..197674e --- /dev/null +++ b/testgen/ui/pdf/style.py @@ -0,0 +1,88 @@ +from reportlab.lib import enums +from reportlab.lib.colors import HexColor +from reportlab.lib.styles import ParagraphStyle +from reportlab.platypus import TableStyle + +COLOR_GRAY_BG = HexColor(0xF2F2F2) +COLOR_GREEN_BG = HexColor(0xDCE4DA) +COLOR_YELLOW_BG = HexColor(0xA0C84E40, hasAlpha=True) +COLOR_GREEN_TEXT = HexColor(0x139549) +COLOR_FADED_TEXT = HexColor(0x404040) + +PARA_STYLE_DEFAULT = ParagraphStyle( + "default", + fontSize=8, + fontName="Helvetica", +) + +PARA_STYLE_TEXT = ParagraphStyle( + "text", + PARA_STYLE_DEFAULT, + fontName="Times-Roman", +) + +PARA_STYLE_INFO = ParagraphStyle( + "info", + PARA_STYLE_DEFAULT, + fontName="Helvetica", + backColor=COLOR_YELLOW_BG, + borderPadding=12, + leftIndent=12, + rightIndent=12, + spaceBefore=18, + spaceAfter=18, +) + +PARA_STYLE_MONO = ParagraphStyle( + "monospaced", + PARA_STYLE_DEFAULT, + fontName="Courier", + borderPadding=4, + backColor=COLOR_GRAY_BG, + leftIndent=4, + rightIndent=4, + spaceBefore=8, + spaceAfter=8, +) + +PARA_STYLE_FOOTNOTE = ParagraphStyle( + "footnote", + PARA_STYLE_DEFAULT, + fontSize=6, + fontName="Helvetica-Oblique", + textColor=COLOR_FADED_TEXT, +) + +PARA_STYLE_TITLE = ParagraphStyle( + "title", + PARA_STYLE_DEFAULT, + fontSize=18, + leading=30, + alignment=enums.TA_CENTER, + spaceBefore=12, + spaceAfter=4, + textColor=COLOR_GREEN_TEXT, +) + +PARA_STYLE_H1 = ParagraphStyle( + "heading_1", + PARA_STYLE_TITLE, + fontSize=12, + leading=16, + alignment=enums.TA_LEFT, +) + +TABLE_STYLE_DEFAULT = TableStyle( + ( + ("ALIGN", (0, 0), (-1, -1), "LEFT"), + ("VALIGN", (0, 0), (-1, -1), "TOP"), + ("FONT", (0, 0), (-1, -1), "Helvetica", 7), + ) +) + +PARA_STYLE_CELL = ParagraphStyle( + "table_cell", + fontSize=7, + fontName="Helvetica", + leading=10, +) diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py index e2d8e88..34bbc68 100644 --- a/testgen/ui/pdf/test_result_report.py +++ b/testgen/ui/pdf/test_result_report.py @@ -1,13 +1,9 @@ - import pandas -from pandas.core.dtypes.common import is_numeric_dtype, is_string_dtype -from reportlab.lib import colors, enums +from reportlab.lib import colors from reportlab.lib.colors import HexColor from reportlab.lib.styles import ParagraphStyle from reportlab.lib.units import inch -from reportlab.pdfbase.pdfmetrics import stringWidth from reportlab.platypus import ( - BalancedColumns, CondPageBreak, KeepTogether, Paragraph, @@ -16,6 +12,19 @@ TableStyle, ) +from testgen.ui.pdf.dataframe_table import TABLE_STYLE_DATA, DataFrameTableBuilder +from testgen.ui.pdf.style import ( + COLOR_GRAY_BG, + COLOR_GREEN_BG, + PARA_STYLE_CELL, + PARA_STYLE_FOOTNOTE, + PARA_STYLE_H1, + PARA_STYLE_INFO, + PARA_STYLE_MONO, + PARA_STYLE_TEXT, + PARA_STYLE_TITLE, + TABLE_STYLE_DEFAULT, +) from testgen.ui.services.database_service import get_schema from testgen.ui.services.test_results_service import ( do_source_data_lookup, @@ -25,158 +34,34 @@ MARGIN = 0.4 * inch -SECTION_MIN_AVAILABLE_HEIGHT = 60 - -COLOR_GRAY_BG = HexColor(0xF2F2F2) - -COLOR_GREEN_BG = HexColor(0xDCE4DA) +SECTION_MIN_AVAILABLE_HEIGHT = 120 -COLOR_YELLOW_BG = HexColor(0xA0C84E40, hasAlpha=True) - -COLOR_GREEN_TEXT = HexColor(0x139549) - -COLOR_FADED_TEXT = HexColor(0x404040) - -COLOR_TEST_STATUS = { +RESULT_STATUS_COLORS = { "Passed": HexColor(0x94C465), "Warning": HexColor(0xFCD349), "Failed": HexColor(0xE94D4A), } -PARA_STYLE_DEFAULT = ParagraphStyle( - "default", - fontSize=8, - fontName="Helvetica", -) - -PARA_STYLE_TEXT = ParagraphStyle( - "text", - PARA_STYLE_DEFAULT, - fontName="Times-Roman", -) - -PARA_STYLE_INFO = ParagraphStyle( - "info", - PARA_STYLE_DEFAULT, - fontName="Helvetica", - backColor=COLOR_YELLOW_BG, - borderPadding=12, - leftIndent=12, - rightIndent=12, - spaceBefore=18, - spaceAfter=18, -) - - -PARA_STYLE_MONO = ParagraphStyle( - "monospaced", - PARA_STYLE_DEFAULT, - fontName="Courier", - borderPadding=4, - backColor=COLOR_GRAY_BG, - leftIndent=4, - rightIndent=4, - spaceBefore=8, - spaceAfter=8, -) - -PARA_STYLE_FOOTNOTE = ParagraphStyle( - "footnote", - PARA_STYLE_DEFAULT, - fontSize=6, - fontName="Helvetica-Oblique", - textColor=COLOR_FADED_TEXT, -) - - -PARA_STYLE_TITLE = ParagraphStyle( - "title", - PARA_STYLE_DEFAULT, - fontSize=18, - leading=30, - alignment=enums.TA_CENTER, - spaceBefore=12, - spaceAfter=4, - textColor=COLOR_GREEN_TEXT, -) - -PARA_STYLE_H1 = ParagraphStyle( - "heading_1", - PARA_STYLE_TITLE, - fontSize=12, - leading=16, - alignment=enums.TA_LEFT, -) +def build_summary_table(document, tr_data): + status_color = RESULT_STATUS_COLORS.get(tr_data["result_status"], COLOR_GRAY_BG) -TABLE_STYLE_DEFAULT = TableStyle( - ( - ("ALIGN", (0, 0), (-1, -1), "LEFT"), - ("VALIGN", (0, 0), (-1, -1), "TOP"), - ("FONT", (0, 0), (-1, -1), "Helvetica", 7), + TABLE_HEADER_CELL_CMD = ( + ("FONT", "Helvetica-Bold"), + ("ALIGN", "RIGHT"), + ("BACKGROUND", COLOR_GREEN_BG), ) -) - -PARA_STYLE_CELL = ParagraphStyle( - "table_cell", - fontSize=7, - fontName="Helvetica", -) - -PARA_STYLE_CELL_NUMERIC = ParagraphStyle( - "table_cell_numeric", - PARA_STYLE_CELL, - alignment=enums.TA_RIGHT, - fontName="Courier", -) - -PARA_STYLE_CELL_NULL = ParagraphStyle( - "table_cell_null", - PARA_STYLE_CELL_NUMERIC, - alignment=enums.TA_CENTER, - textColor=COLOR_FADED_TEXT, -) - - -# One time use styles - - - -TABLE_HEADER_CELL_CMD = ( - ("FONT", "Helvetica-Bold"), - ("ALIGN", "RIGHT"), - ("BACKGROUND", COLOR_GREEN_BG), -) - -TABLE_STYLE_SUMMARY = TableStyle( - ( - ("GRID", (0, 0), (-1, -1), 2, colors.white), - ("BACKGROUND", (0, 0), (-1, -1), COLOR_GRAY_BG), - *[(cmd[0], (0, 0), (0, -1), *cmd[1:]) for cmd in TABLE_HEADER_CELL_CMD], - ), - parent=TABLE_STYLE_DEFAULT, -) - -TABLE_STYLE_DATA = TableStyle( - ( - ("ALIGN", (0, 0), (-1, 0), "CENTER"), - ("VALIGN", (0, 0), (-1, 0), "MIDDLE"), - ("GRID", (0, 0), (-1, -1), 0.5, COLOR_GRAY_BG), - ("INNERGRID", (0, 0), (-1, 0), 1, colors.white), - ("BACKGROUND", (0, 0), (-1, 0), COLOR_GRAY_BG), - ("FONT", (0, 0), (-10, 0), "Helvetica-Bold"), - - ), - parent=TABLE_STYLE_DEFAULT, -) - -def get_report_content(document, tr_data): - yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE) - - status_color = COLOR_TEST_STATUS.get(tr_data["result_status"], COLOR_GRAY_BG) summary_table_style = TableStyle( ( + # All-table styles + ("GRID", (0, 0), (-1, -1), 2, colors.white), + ("BACKGROUND", (0, 0), (-1, -1), COLOR_GRAY_BG), + + # Header cells *[(cmd[0], (3, 3), (3, -1), *cmd[1:]) for cmd in TABLE_HEADER_CELL_CMD], + *[(cmd[0], (0, 0), (0, -1), *cmd[1:]) for cmd in TABLE_HEADER_CELL_CMD], + + # Layout ("SPAN", (1, 0), (4, 0)), ("SPAN", (5, 0), (5, 2)), ("SPAN", (2, 1), (4, 1)), @@ -200,7 +85,7 @@ def get_report_content(document, tr_data): ("VALIGN", (5, 0), (5, 0), "MIDDLE"), ("TEXTCOLOR", (5, 0), (5, 0), colors.white), ), - parent=TABLE_STYLE_SUMMARY, + parent=TABLE_STYLE_DEFAULT, ) test_timestamp = pandas.to_datetime(tr_data["test_time"]).strftime("%Y-%m-%d %H:%M:%S") @@ -227,144 +112,98 @@ def get_report_content(document, tr_data): ] summary_table_col_widths = [n * document.width for n in (.2, .1, .2, .2, .15, .15)] - yield Table(summary_table_data, style=summary_table_style, hAlign="LEFT", colWidths=summary_table_col_widths) + return Table(summary_table_data, style=summary_table_style, hAlign="LEFT", colWidths=summary_table_col_widths) - yield KeepTogether([ - Paragraph("Usage Notes", PARA_STYLE_H1), - Paragraph(f"{tr_data['usage_notes']}", PARA_STYLE_TEXT), - ]) +def build_history_table(document, tr_data): history_data = get_test_result_history(get_schema(), tr_data) history_table_style = TableStyle( ( - ("FONT", (1, 1), (2, -1), "Courier"), - ("ALIGN", (0, 1), (0, -1), "CENTER"), - ("ALIGN", (1, 1), (2, -1), "RIGHT"), - ("ALIGN", (3, 1), (3, -1), "CENTER"), + ("ALIGN", (3, 0), (3, -1), "CENTER"), ), - parent=TABLE_STYLE_DATA, + parent=TABLE_STYLE_DATA) + + test_timestamp = pandas.to_datetime(tr_data["test_time"]) + + style_per_status = { + status: ParagraphStyle(f"result_{status}", parent=PARA_STYLE_CELL, textColor=color) + for status, color in RESULT_STATUS_COLORS.items() + } + + for idx in history_data.index[history_data["test_date"] == test_timestamp]: + if idx > 0: + history_table_style.add("BACKGROUND", (0, idx + 1), (-1, idx + 1), COLOR_GRAY_BG) + + history_df = pandas.DataFrame() + history_df = history_df.assign( + test_date=history_data["test_date"].copy(), + threshold_value=history_data["threshold_value"].astype(float).copy(), + result_measure=history_data["result_measure"].astype(float).copy(), + result_status=history_data["result_status"].map( + lambda status: Paragraph(status, style=style_per_status[status]) + ).copy(), ) + history_df.columns = ("Test Date", "Threshold Value", "Measure Value", "Status") - history_iterator = iter(history_data.iterrows()) - historical_status = history_data["result_status"][0] - status_change_idx = 1 - while historical_status: - try: - idx, row = next(history_iterator) - except StopIteration: - row = {"result_status": None} - idx += 1 - - if row["result_status"] != historical_status: - history_table_style.add( - "TEXTCOLOR", - (3, status_change_idx), - (3, idx), - COLOR_TEST_STATUS.get(historical_status, COLOR_GRAY_BG) - ) - historical_status = row["result_status"] - status_change_idx = idx + 1 - - if idx > 1 and "test_date" in row and str(row["test_date"]) == test_timestamp: - history_table_style.add( - "BACKGROUND", (0, idx + 1), (-1, idx + 1), COLOR_GRAY_BG - ) - - history_table_data = ( - ("Test Date", "Threshold Value", "Measure Value", "Status"), - *[ - (r["test_date"], r["threshold_value"], r["result_measure"], r["result_status"]) - for _, r in history_data.iterrows() - ], - ) + table_builder = DataFrameTableBuilder(history_df, document.width) + table = table_builder.build_table(hAlign="LEFT", style=history_table_style) + return table_builder.split_in_columns(table) - history_table = Table(history_table_data, style=history_table_style, repeatRows=1, hAlign="LEFT") - yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT) - yield Paragraph("Result History", PARA_STYLE_H1) - yield BalancedColumns(history_table) if len(history_table_data) > 10 else history_table +def build_sample_data_content(document, sample_data_tuple): + sample_data_status, sample_data_msg, lookup_query, sample_data = sample_data_tuple + if sample_data_status in ("ND", "NA"): + yield Paragraph(sample_data_msg, style=PARA_STYLE_INFO) + elif sample_data_status == "ERR" or sample_data is None: + yield Paragraph("It was not possible to fetch the sample data this time.", style=PARA_STYLE_INFO) + else: + sample_data.columns = [col.replace("_", " ").title() for col in sample_data.columns] + df_table_builder = DataFrameTableBuilder(sample_data, document.width) + table_flowables = [df_table_builder.build_table(hAlign="LEFT")] + if df_table_builder.omitted_columns: + omitted_columns = ", ".join(df_table_builder.omitted_columns) + sample_data_msg = f"Note: The following columns were omitted from this table: {omitted_columns}" + if sample_data_msg: + table_flowables.append(Paragraph(sample_data_msg, style=PARA_STYLE_FOOTNOTE)) - yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT) - yield Paragraph("Sample Data", PARA_STYLE_H1) - col_padding = 16 + yield from df_table_builder.split_in_columns(table_flowables) - if tr_data["test_type"] == "CUSTOM": - bad_data_status, bad_data_msg, lookup_query, sample_data = do_source_data_lookup_custom(get_schema(), tr_data) - else: - bad_data_status, bad_data_msg, lookup_query, sample_data = do_source_data_lookup(get_schema(), tr_data) - if bad_data_status in ("ND", "NA"): - yield Paragraph(bad_data_msg, style=PARA_STYLE_INFO) - elif bad_data_status == "ERR" or sample_data is None: - yield Paragraph("It was not possible to fetch the sample data this time.", style=PARA_STYLE_INFO) +def build_sql_query_conntent(sample_data_tuple): + lookup_query = sample_data_tuple[2] + if lookup_query: + return Paragraph(lookup_query, PARA_STYLE_MONO) else: - table_data = sample_data.fillna(Paragraph("NULL", style=PARA_STYLE_CELL_NULL)) - col_len_data = pandas.DataFrame(columns=["min_chars", "max_chars", "min_width", "max_width"], index=iter(sample_data)) - - for col_idx in sample_data: - col = sample_data[col_idx] - para_style = PARA_STYLE_CELL_NUMERIC if is_numeric_dtype(col) else PARA_STYLE_CELL - if not is_string_dtype(sample_data[col_idx]): - col = sample_data[col_idx].astype(str) - - max_width = col.map(lambda cell: stringWidth(cell, para_style.fontName, para_style.fontSize)).max() - min_chars = col.map( - lambda cell: max([len(word) for word in cell.split(" ")]) - ).max() - max_chars = col.str.len().max() - col_padding = 16 - col_len_data.loc[col_idx] = ( - min_chars, - max_chars, - min_chars * max_width / max_chars + col_padding, - max_width + col_padding, - ) - table_data[col_idx] = col.map( - lambda cell: Paragraph(cell, style=para_style) if cell else Paragraph("NULL", PARA_STYLE_CELL_NUMERIC) - ) - - available_width = document.width - - while True: - if col_len_data["min_width"].sum() <= available_width: - break - largest_col = col_len_data["min_width"].idxmax() - table_data = table_data.drop(columns=largest_col) - col_len_data = col_len_data.drop(index=largest_col) - bad_data_msg = "Some too wide columns are omitted. Visit the website to check the full content." - - expandable_width = available_width - col_len_data["min_width"].sum() - col_len_data["expand_appetite"] = col_len_data["max_width"] - col_len_data["min_width"] - col_len_data["width"] = col_len_data["min_width"] + col_len_data["expand_appetite"] * max(1, col_len_data["expand_appetite"].sum() / expandable_width) - - sample_data_table = Table( - ( - [col.replace("_", " ").title() for col in table_data.columns], - *(data.tolist() for _, data in table_data.iterrows()), - ), - style=TABLE_STYLE_DATA, - hAlign="LEFT", - colWidths=col_len_data["width"].tolist(), - repeatRows=1, - ) - - layout_columns = int(available_width / (col_len_data["width"].sum() + col_padding)) - if layout_columns > 1 and len(table_data) > 10: - yield BalancedColumns(sample_data_table, layout_columns) - else: - yield sample_data_table - if bad_data_msg: - yield Paragraph(bad_data_msg, style=PARA_STYLE_FOOTNOTE) + return Paragraph("No sample data lookup query registered for this test.") - if lookup_query: - lookup_query_para = Paragraph(lookup_query, PARA_STYLE_MONO) + +def get_report_content(document, tr_data): + + yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE) + yield build_summary_table(document, tr_data) + + yield KeepTogether([ + Paragraph("Usage Notes", PARA_STYLE_H1), + Paragraph(f"{tr_data['usage_notes']}", PARA_STYLE_TEXT), + ]) + + yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT) + yield Paragraph("Result History", PARA_STYLE_H1) + yield build_history_table(document, tr_data) + + if tr_data["test_type"] == "CUSTOM": + sample_data_tuple = do_source_data_lookup_custom(get_schema(), tr_data) else: - lookup_query_para = Paragraph("No sample data lookup query registered for this test.") + sample_data_tuple = do_source_data_lookup(get_schema(), tr_data) + + yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT) + yield Paragraph("Sample Data", PARA_STYLE_H1) + yield from build_sample_data_content(document, sample_data_tuple) yield KeepTogether([ Paragraph("SQL Query", PARA_STYLE_H1), - lookup_query_para + build_sql_query_conntent(sample_data_tuple) ]) From ad724e7825685ff437a3fa283565eadfde96ea3f Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Tue, 8 Oct 2024 17:21:54 -0400 Subject: [PATCH 17/91] mic(pdf): Addressing team feedback --- pyproject.toml | 1 + testgen/ui/pdf/dataframe_table.py | 1 + testgen/ui/pdf/dk_logo.py | 59 ++++++++++++++++++++++++ testgen/ui/pdf/templates.py | 31 +++++++++++++ testgen/ui/pdf/test_result_report.py | 10 ++--- testgen/ui/views/test_results.py | 67 +++++++++++++++++++++------- 6 files changed, 147 insertions(+), 22 deletions(-) create mode 100644 testgen/ui/pdf/dk_logo.py create mode 100644 testgen/ui/pdf/templates.py diff --git a/pyproject.toml b/pyproject.toml index 116243b..ce6b605 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -239,6 +239,7 @@ ignore = ["TRY003", "S608", "S404", "F841", "B023"] "tests*" = ["S101", "T201"] "invocations/**" = ["ARG001", "T201"] "testgen/common/encrypt.py" = ["S413"] +"testgen/ui/pdf/dk_logo.py" = ["T201"] # See: https://coverage.readthedocs.io/en/latest/config.html [tool.coverage.run] diff --git a/testgen/ui/pdf/dataframe_table.py b/testgen/ui/pdf/dataframe_table.py index b24a5ae..18cde27 100644 --- a/testgen/ui/pdf/dataframe_table.py +++ b/testgen/ui/pdf/dataframe_table.py @@ -29,6 +29,7 @@ PARA_STYLE_CELL_NUMERIC, alignment=enums.TA_CENTER, textColor=COLOR_FADED_TEXT, + fontName="Courier-Oblique", ) PARA_STYLE_CELL_HEADER = ParagraphStyle( diff --git a/testgen/ui/pdf/dk_logo.py b/testgen/ui/pdf/dk_logo.py new file mode 100644 index 0000000..89cfb98 --- /dev/null +++ b/testgen/ui/pdf/dk_logo.py @@ -0,0 +1,59 @@ +__all__ = ["get_logo"] + +from reportlab.graphics.shapes import Drawing, Path +from reportlab.lib.colors import Color + +# The following paths were gotten from the `dk_logo.svg` file. As a convenience, it's possible to manually run this file +# to update the paths, in case the logo changes. Installing the `svglib` package is required for that. + +shapes = [ + Path(fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[37.3, 107.6, 37.9, 107.6, 38.4, 107.6, 38.9, 107.5, 36.699999999999996, 107.5, 34.4, 107.6, 32.0, 107.6, 37.3, 107.6], operators=[0, 2, 2, 1, 3], _fillRule=1), + Path(fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[37.3, 9.9, 32.0, 9.9, 34.4, 9.9, 36.7, 9.9, 38.9, 10.0, 38.3, 9.9, 37.8, 9.9, 37.3, 9.9], operators=[0, 1, 2, 2, 3], _fillRule=1), + Path(fillColor=Color(.666667,.815686,.27451,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[85.6, 58.3, 85.6, 56.599999999999994, 85.5, 55.0, 85.3, 53.4, 84.7, 45.6, 82.8, 38.599999999999994, 79.39999999999999, 32.599999999999994, 76.0, 26.5, 71.5, 21.7, 65.9, 18.0, 64.7, 17.2, 63.60000000000001, 16.4, 62.300000000000004, 15.8, 59.6, 14.4, 56.800000000000004, 13.3, 53.800000000000004, 12.4, 11.7, 58.7, 53.900000000000006, 105.1, 56.800000000000004, 104.19999999999999, 59.7, 103.1, 62.400000000000006, 101.69999999999999, 63.300000000000004, 101.19999999999999, 64.2, 100.6, 65.10000000000001, 99.99999999999999, 71.10000000000001, 96.19999999999999, 76.00000000000001, 91.19999999999999, 79.50000000000001, 84.79999999999998, 83.10000000000001, 78.39999999999998, 85.00000000000001, 70.89999999999998, 85.40000000000002, 62.399999999999984, 85.50000000000001, 61.499999999999986, 85.50000000000001, 60.59999999999999, 85.50000000000001, 59.69999999999998, 85.50000000000001, 59.29999999999998, 85.60000000000001, 58.99999999999998, 85.60000000000001, 58.59999999999998, 85.60000000000001, 58.49999999999998, 85.60000000000001, 58.49999999999998, 85.60000000000001, 58.39999999999998, 85.5, 58.5, 85.6, 58.4, 85.6, 58.3], operators=[0, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[53.8, 12.3, 49.199999999999996, 10.9, 44.199999999999996, 10.100000000000001, 38.9, 9.9, 36.699999999999996, 9.9, 34.4, 9.8, 32.0, 9.8, 16.5, 9.8, 13.8, 9.8, 11.7, 12.0, 11.7, 14.600000000000001, 11.7, 44.400000000000006, 11.7, 58.7, 53.8, 12.3, 53.8, 12.3, 53.8, 12.3, 53.8, 12.3], operators=[0, 2, 2, 1, 2, 1, 1, 1, 2, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[11.7, 73.0, 11.7, 102.8, 11.7, 105.5, 13.899999999999999, 107.6, 16.5, 107.6, 32.0, 107.6, 34.4, 107.6, 36.7, 107.6, 38.9, 107.5, 44.199999999999996, 107.4, 49.2, 106.5, 53.8, 105.1, 53.8, 105.1, 53.8, 105.1, 53.8, 105.1, 11.7, 58.7, 11.7, 73.0], operators=[0, 1, 2, 1, 2, 2, 2, 1, 1, 3], _fillRule=1), + Path(fillColor=Color(.666667,.815686,.27451,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[94.5, 9.9, 69.3, 9.9, 69.6, 10.1, 69.8, 10.3, 70.1, 10.6, 76.5, 15.0, 81.8, 20.6, 85.69999999999999, 27.700000000000003, 89.69999999999999, 34.900000000000006, 91.99999999999999, 43.2, 92.69999999999999, 52.5, 92.89999999999999, 54.4, 92.99999999999999, 56.4, 92.99999999999999, 58.4, 92.99999999999999, 58.5, 92.99999999999999, 58.6, 92.99999999999999, 58.699999999999996, 92.99999999999999, 58.8, 92.99999999999999, 58.8, 92.99999999999999, 58.9, 92.99999999999999, 59.3, 92.89999999999999, 59.699999999999996, 92.89999999999999, 60.199999999999996, 92.89999999999999, 61.3, 92.8, 62.3, 92.69999999999999, 63.4, 92.19999999999999, 73.5, 89.89999999999999, 82.4, 85.6, 90.1, 81.5, 97.5, 75.89999999999999, 103.3, 69.1, 107.69999999999999, 69.1, 107.69999999999999, 69.1, 107.79999999999998, 69.1, 107.79999999999998, 94.5, 107.79999999999998, 97.2, 107.79999999999998, 99.3, 105.59999999999998, 99.3, 102.99999999999999, 99.3, 14.999999999999986, 99.4, 12.1, 97.2, 9.9, 94.5, 9.9], operators=[0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[132.0, 28.0, 156.1, 28.0, 173.7, 28.0, 183.9, 41.4, 183.9, 58.9, 183.9, 76.3, 173.6, 89.5, 156.1, 89.5, 132.0, 89.5, 132.0, 28.0, 156.1, 79.7, 167.29999999999998, 79.7, 173.0, 70.2, 173.0, 58.800000000000004, 173.0, 47.300000000000004, 167.3, 37.7, 156.1, 37.7, 142.9, 37.7, 142.9, 79.7, 156.1, 79.7], operators=[0, 1, 2, 2, 1, 1, 3, 0, 2, 2, 1, 1, 1, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[231.9, 47.8, 231.9, 89.5, 221.5, 89.5, 221.5, 83.9, 218.6, 88.4, 212.8, 90.4, 207.7, 90.4, 196.7, 90.4, 187.2, 81.9, 187.2, 68.60000000000001, 187.2, 55.20000000000001, 196.7, 46.900000000000006, 207.6, 46.900000000000006, 212.9, 46.900000000000006, 218.7, 49.00000000000001, 221.5, 53.300000000000004, 221.5, 47.800000000000004, 231.9, 47.800000000000004, 221.4, 68.5, 221.4, 61.2, 215.3, 56.5, 209.4, 56.5, 203.0, 56.5, 197.70000000000002, 61.5, 197.70000000000002, 68.5, 197.70000000000002, 75.5, 203.00000000000003, 80.6, 209.4, 80.6, 215.7, 80.6, 221.4, 75.8, 221.4, 68.5], operators=[0, 1, 1, 1, 2, 2, 2, 2, 1, 1, 3, 0, 2, 2, 2, 2, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[263.0, 56.1, 254.2, 56.1, 254.2, 89.5, 243.79999999999998, 89.5, 243.79999999999998, 56.1, 236.29999999999998, 56.1, 236.29999999999998, 47.8, 243.79999999999998, 47.8, 243.79999999999998, 32.5, 254.2, 32.5, 254.2, 47.9, 263.0, 47.9, 263.0, 56.1], operators=[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[309.0, 47.8, 309.0, 89.5, 298.6, 89.5, 298.6, 83.9, 295.70000000000005, 88.4, 289.90000000000003, 90.4, 284.8, 90.4, 273.8, 90.4, 264.3, 81.9, 264.3, 68.60000000000001, 264.3, 55.20000000000001, 273.8, 46.900000000000006, 284.7, 46.900000000000006, 290.0, 46.900000000000006, 295.8, 49.00000000000001, 298.59999999999997, 53.300000000000004, 298.59999999999997, 47.800000000000004, 309.0, 47.800000000000004, 298.4, 68.5, 298.4, 61.2, 292.29999999999995, 56.5, 286.4, 56.5, 280.0, 56.5, 274.7, 61.5, 274.7, 68.5, 274.7, 75.5, 280.0, 80.6, 286.4, 80.6, 292.7, 80.6, 298.4, 75.8, 298.4, 68.5], operators=[0, 1, 1, 1, 2, 2, 2, 2, 1, 1, 3, 0, 2, 2, 2, 2, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[354.6, 89.5, 333.40000000000003, 66.1, 333.40000000000003, 89.5, 322.6, 89.5, 322.6, 28.0, 333.40000000000003, 28.0, 333.40000000000003, 51.3, 350.7, 28.0, 364.2, 28.0, 340.7, 58.6, 369.3, 89.5, 354.6, 89.5], operators=[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[373.7, 33.1, 373.7, 29.400000000000002, 376.8, 26.8, 380.5, 26.8, 384.2, 26.8, 387.2, 29.5, 387.2, 33.1, 387.2, 36.7, 384.3, 39.4, 380.5, 39.4, 376.9, 39.4, 373.7, 36.6, 373.7, 33.1, 375.3, 47.8, 385.7, 47.8, 385.7, 89.5, 375.3, 89.5, 375.3, 47.8], operators=[0, 2, 2, 2, 2, 3, 0, 1, 1, 1, 1, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[417.4, 56.1, 408.59999999999997, 56.1, 408.59999999999997, 89.5, 398.2, 89.5, 398.2, 56.1, 390.7, 56.1, 390.7, 47.8, 398.2, 47.8, 398.2, 32.5, 408.59999999999997, 32.5, 408.59999999999997, 47.9, 417.4, 47.9, 417.4, 56.1], operators=[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[417.6, 68.6, 417.6, 55.39999999999999, 428.20000000000005, 46.89999999999999, 440.3, 46.89999999999999, 447.5, 46.89999999999999, 453.40000000000003, 49.99999999999999, 457.3, 54.89999999999999, 449.90000000000003, 60.69999999999999, 447.8, 58.09999999999999, 444.20000000000005, 56.499999999999986, 440.50000000000006, 56.499999999999986, 433.30000000000007, 56.499999999999986, 428.1000000000001, 61.499999999999986, 428.1000000000001, 68.49999999999999, 428.1000000000001, 75.49999999999999, 433.30000000000007, 80.49999999999999, 440.50000000000006, 80.49999999999999, 444.20000000000005, 80.49999999999999, 447.70000000000005, 78.89999999999999, 449.90000000000003, 76.29999999999998, 457.3, 82.09999999999998, 453.5, 86.89999999999998, 447.6, 90.09999999999998, 440.3, 90.09999999999998, 428.3, 90.3, 417.6, 81.8, 417.6, 68.6], operators=[0, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[500.7, 66.1, 500.7, 89.5, 490.3, 89.5, 490.3, 67.1, 490.3, 60.49999999999999, 486.3, 57.099999999999994, 481.6, 57.099999999999994, 476.90000000000003, 57.099999999999994, 471.0, 59.699999999999996, 471.0, 67.69999999999999, 471.0, 89.49999999999999, 460.6, 89.49999999999999, 460.6, 25.499999999999986, 471.0, 25.499999999999986, 471.0, 54.09999999999999, 473.1, 49.09999999999999, 479.7, 46.899999999999984, 483.9, 46.899999999999984, 494.8, 46.9, 500.7, 54.0, 500.7, 66.1], operators=[0, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[547.5, 72.3, 515.3, 72.3, 516.5, 78.1, 520.9, 81.0, 527.0999999999999, 81.0, 531.6999999999999, 81.0, 535.8999999999999, 79.2, 538.3999999999999, 75.8, 545.2999999999998, 81.1, 541.4999999999999, 87.19999999999999, 534.0999999999998, 90.39999999999999, 526.5999999999998, 90.39999999999999, 514.0999999999998, 90.39999999999999, 504.5999999999998, 81.69999999999999, 504.5999999999998, 68.6, 504.5999999999998, 55.3, 514.5999999999998, 46.89999999999999, 526.4999999999998, 46.89999999999999, 538.4999999999998, 46.89999999999999, 547.7999999999997, 55.19999999999999, 547.7999999999997, 68.19999999999999, 547.8, 69.4, 547.7, 70.7, 547.5, 72.3, 537.4, 65.0, 536.8, 59.3, 532.4, 56.0, 526.6, 56.0, 521.0, 56.0, 516.5, 58.7, 515.3000000000001, 65.0, 537.4, 65.0], operators=[0, 1, 2, 2, 1, 2, 2, 2, 2, 2, 3, 0, 2, 2, 1, 3], _fillRule=1), + Path(fillColor=Color(.023529,.627451,.290196,1), fillOpacity=1.0, strokeColor=None, strokeOpacity=1.0, points=[591.9, 66.1, 591.9, 89.5, 581.5, 89.5, 581.5, 67.1, 581.5, 60.49999999999999, 577.5, 57.099999999999994, 572.8, 57.099999999999994, 568.0999999999999, 57.099999999999994, 562.1999999999999, 59.699999999999996, 562.1999999999999, 67.69999999999999, 562.1999999999999, 89.49999999999999, 551.8, 89.49999999999999, 551.8, 47.8, 562.1999999999999, 47.8, 562.1999999999999, 54.4, 564.3, 49.199999999999996, 570.9, 46.9, 575.0999999999999, 46.9, 585.9, 46.9, 591.9, 54.0, 591.9, 66.1], operators=[0, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 3], _fillRule=1), +] + + +def get_logo(width): + orig_width = 600 + orig_height = 110 + height = orig_height * width / orig_width + logo = Drawing(width, height, *shapes) + logo.translate(0, height) + scale = width / orig_width + logo.scale(scale, -scale) + return logo + + +if __name__ == "__main__": + + from svglib.svglib import svg2rlg + + drawing = svg2rlg("./testgen/testgen/ui/assets/dk_logo.svg") + + def extract_shapes(drawing): + if hasattr(drawing, "contents"): + for content in drawing.contents: + yield from extract_shapes(content) + else: + yield drawing + + print("shapes = [") + for shape in extract_shapes(drawing): + print(f" {shape.__class__.__name__}(", end="") + print(", ".join([f"{attr}={val!r}" for attr, val in shape.getProperties().items()]), end="") + print("),") + print("]\n") diff --git a/testgen/ui/pdf/templates.py b/testgen/ui/pdf/templates.py new file mode 100644 index 0000000..cba722a --- /dev/null +++ b/testgen/ui/pdf/templates.py @@ -0,0 +1,31 @@ +from reportlab.lib.units import inch +from reportlab.platypus import SimpleDocTemplate + +from testgen.ui.pdf.dk_logo import get_logo + +MARGIN = 0.4 * inch + + +class DatakitchenTemplate(SimpleDocTemplate): + + def __init__(self, filename): + super().__init__(filename, leftMargin=MARGIN, rightMargin=MARGIN, topMargin=MARGIN + 10, bottomMargin=MARGIN) + + def beforePage(self): + header_padding = 5 + header_base_y = self.pagesize[1] - 18 + self.canv.setFont("Helvetica", 8) + self.canv.drawString(MARGIN + header_padding, header_base_y , "DataOps Data Quality TestGen") + self.canv.line( + MARGIN + header_padding, + header_base_y - header_padding, + self.pagesize[0] - MARGIN, + header_base_y - header_padding + ) + + logo = get_logo(80) + logo.drawOn( + self.canv, + self.pagesize[0] - logo.width - MARGIN, + header_base_y + ) diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py index 34bbc68..73c6db8 100644 --- a/testgen/ui/pdf/test_result_report.py +++ b/testgen/ui/pdf/test_result_report.py @@ -2,12 +2,10 @@ from reportlab.lib import colors from reportlab.lib.colors import HexColor from reportlab.lib.styles import ParagraphStyle -from reportlab.lib.units import inch from reportlab.platypus import ( CondPageBreak, KeepTogether, Paragraph, - SimpleDocTemplate, Table, TableStyle, ) @@ -25,6 +23,7 @@ PARA_STYLE_TITLE, TABLE_STYLE_DEFAULT, ) +from testgen.ui.pdf.templates import DatakitchenTemplate from testgen.ui.services.database_service import get_schema from testgen.ui.services.test_results_service import ( do_source_data_lookup, @@ -32,8 +31,6 @@ get_test_result_history, ) -MARGIN = 0.4 * inch - SECTION_MIN_AVAILABLE_HEIGHT = 120 RESULT_STATUS_COLORS = { @@ -108,7 +105,7 @@ def build_summary_table(document, tr_data): ("Date", test_timestamp, None, "Table Group", tr_data["table_groups_name"]), ("Database/Schema", tr_data["schema_name"], None, "Test Suite", tr_data["test_suite"]), ("Table", tr_data["table_name"], None, "Data Quality Dimension", tr_data["dq_dimension"]), - ("Column", tr_data["column_names"], None, "Risk Level", tr_data["severity"]), + ("Column", tr_data["column_names"], None, "Disposition", tr_data["disposition"] or "No Decision"), ] summary_table_col_widths = [n * document.width for n in (.2, .1, .2, .2, .15, .15)] @@ -179,7 +176,6 @@ def build_sql_query_conntent(sample_data_tuple): def get_report_content(document, tr_data): - yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE) yield build_summary_table(document, tr_data) @@ -208,5 +204,5 @@ def get_report_content(document, tr_data): def create_report(filename, tr_data): - doc = SimpleDocTemplate(filename, leftMargin=MARGIN, rightMargin=MARGIN, topMargin=MARGIN, bottomMargin=MARGIN) + doc = DatakitchenTemplate(filename) doc.build(flowables=list(get_report_content(doc, tr_data))) diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 1acee8f..0126878 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -1,6 +1,8 @@ import tempfile import typing from datetime import date +from io import BytesIO +from zipfile import ZipFile import pandas as pd import plotly.express as px @@ -244,7 +246,8 @@ def get_test_results_uncached(str_schema, str_run_id, str_sel_test_status, test_ END::VARCHAR as test_definition_id_current, r.auto_gen, - tt.threshold_description, tt.usage_notes, r.test_time -- These are used in the PDF report + -- These are used in the PDF report + tt.threshold_description, tt.usage_notes, r.test_time FROM run_results r INNER JOIN {str_schema}.test_types tt @@ -543,23 +546,33 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co view_bad_data(v_col3, selected_row) with v_col4: + + report_eligible_rows = [ + row for row in selected_rows + if row["result_status"] != "Passed" and row["disposition"] in (None, "Confirmed") + ] + if st.button( - ":material/file_save: Report", + ":material/file_save: Issue Report", use_container_width=True, + disabled=not report_eligible_rows, + help="Generate PDF reports for the selected results that are not muted or dismissed and are not Passed", ): - - def _generate(): - with tempfile.NamedTemporaryFile() as pdf_file: - create_report(pdf_file.name, selected_row) - return pdf_file.read() - - download_dialog( - dialog_title="Download Issue Report", - file_name="testgen_issue_report.pdf", - mime_type="application/pdf", - file_content_func=_generate, - ) - + dialog_title = "Download Issue Report" + if len(report_eligible_rows) == 1: + download_dialog( + dialog_title=dialog_title, + file_name=get_report_file_name(report_eligible_rows[0]), + mime_type="application/pdf", + file_content_func=lambda: get_report_content(report_eligible_rows[0]), + ) + else: + download_dialog( + dialog_title=dialog_title, + file_name="testgen_issue_reports.zip", + mime_type="application/zip", + file_content_func=lambda: get_report_content_zip(report_eligible_rows), + ) with pg_col1: fm.show_subheader(selected_row["test_name_short"]) st.markdown(f"###### {selected_row['test_description']}") @@ -714,3 +727,27 @@ def view_edit_test(button_container, test_definition_id): with button_container: if st.button("🖊️ Edit Test", help="Edit the Test Definition", use_container_width=True): show_test_form_by_id(test_definition_id) + + +def get_report_file_name(tr_data): + td_id = tr_data["test_definition_id_runtime"][:6] + tr_time = pd.Timestamp(tr_data["test_time"]).strftime("%Y%m%d_%H%M%S") + return f"testgen_issue_report_{td_id}_{tr_time}.pdf" + + +def get_report_content(tr_data): + with BytesIO() as buffer: + create_report(buffer, tr_data) + buffer.seek(0) + return buffer.read() + + +def get_report_content_zip(tr_data_list): + with tempfile.NamedTemporaryFile() as zip_file: + with ZipFile(zip_file.name, "w") as zip_writer: + for tr_data in tr_data_list: + zip_writer.writestr( + get_report_file_name(tr_data), + get_report_content(tr_data), + ) + return zip_file.read() From 2f6c43b8800219260a7ffa707866a1c8632f8c6a Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Tue, 8 Oct 2024 21:10:35 -0400 Subject: [PATCH 18/91] mic(pdf): Adding a progress bar to the download dialog --- .../ui/components/widgets/download_dialog.py | 67 +++++++++++++++---- testgen/ui/views/test_results.py | 41 ++++-------- 2 files changed, 67 insertions(+), 41 deletions(-) diff --git a/testgen/ui/components/widgets/download_dialog.py b/testgen/ui/components/widgets/download_dialog.py index 34ec928..9b9d4c3 100644 --- a/testgen/ui/components/widgets/download_dialog.py +++ b/testgen/ui/components/widgets/download_dialog.py @@ -1,34 +1,73 @@ -from collections.abc import Callable -from typing import Any +import tempfile +from collections.abc import Callable, Generator, Iterable +from zipfile import ZipFile import streamlit as st +def zip_multi_file_data( + zip_file_name: str, + file_data_func: Callable[[Generator, ...], tuple[str, str, str|bytes]], + args_list: list[Iterable], +): + + def _file_content_func(progress_gen, *args): + + progress = 0.0 + step = 1.0 / len(args_list) + + def _file_gen(): + while True: + f_progress = yield + progress_gen.send(progress + step / f_progress) + + with tempfile.NamedTemporaryFile() as zip_file: + with ZipFile(zip_file.name, "w") as zip_writer: + progress_gen.send(None) + for args in args_list: + file_name, _, file_data = file_data_func(_file_gen(), *args) + zip_writer.writestr(file_name, file_data) + progress += step + zip_content = zip_file.read() + + return zip_file_name, "application/zip", zip_content + + return _file_content_func + + + def download_dialog( dialog_title: str, - file_name: str, - mime_type: str, - file_content_func: Callable[[], Any], + file_content_func: Callable[[Generator, ...], tuple[str, str, str|bytes]], + args: Iterable = (), + progress_bar_msg: str = "Generating file...", ): """Wrapping a dialog and a download button together to allow generating the file contents only when needed.""" def _dialog_content(): - # Encapsulating the dialog content in a container just to force its height and avoid the dialog to - # have its height changed when the button is rendered. + + with st.container(height=70, border=False): + p_bar = st.progress(0.0, progress_bar_msg) + with st.container(height=55, border=False): - spinner_col, button_col, _ = st.columns([.3, .4, .3]) + _, button_col, _ = st.columns([.3, .4, .3]) + + def _get_progress_gen(): + while True: + progress = yield + p_bar.progress(progress, progress_bar_msg) + + file_name, file_type, file_content = file_content_func(_get_progress_gen(), *args) - with spinner_col: - with st.spinner(text="Generating file..."): - data = file_content_func() + p_bar.progress(1.0, "Done!") with button_col: st.download_button( label=":material/download: Download", - data=data, + data=file_content, file_name=file_name, - mime=mime_type, - use_container_width=True + mime=file_type, + use_container_width=True, ) return st.dialog(title=dialog_title, width="small")(_dialog_content)() diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 0126878..3b86534 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -1,8 +1,6 @@ -import tempfile import typing from datetime import date from io import BytesIO -from zipfile import ZipFile import pandas as pd import plotly.express as px @@ -14,7 +12,7 @@ import testgen.ui.services.query_service as dq from testgen.common import date_service from testgen.ui.components import widgets as testgen -from testgen.ui.components.widgets.download_dialog import download_dialog +from testgen.ui.components.widgets.download_dialog import download_dialog, zip_multi_file_data from testgen.ui.navigation.page import Page from testgen.ui.pdf.test_result_report import create_report from testgen.ui.services import authentication_service, project_service @@ -562,17 +560,17 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co if len(report_eligible_rows) == 1: download_dialog( dialog_title=dialog_title, - file_name=get_report_file_name(report_eligible_rows[0]), - mime_type="application/pdf", - file_content_func=lambda: get_report_content(report_eligible_rows[0]), + file_content_func=get_report_file_data, + args=(report_eligible_rows[0],), ) else: - download_dialog( - dialog_title=dialog_title, - file_name="testgen_issue_reports.zip", - mime_type="application/zip", - file_content_func=lambda: get_report_content_zip(report_eligible_rows), + zip_func = zip_multi_file_data( + "testgen_issue_reports.zip", + get_report_file_data, + [(arg,) for arg in selected_rows], ) + download_dialog(dialog_title=dialog_title, file_content_func=zip_func) + with pg_col1: fm.show_subheader(selected_row["test_name_short"]) st.markdown(f"###### {selected_row['test_description']}") @@ -729,25 +727,14 @@ def view_edit_test(button_container, test_definition_id): show_test_form_by_id(test_definition_id) -def get_report_file_name(tr_data): +def get_report_file_data(progress_gen, tr_data): td_id = tr_data["test_definition_id_runtime"][:6] tr_time = pd.Timestamp(tr_data["test_time"]).strftime("%Y%m%d_%H%M%S") - return f"testgen_issue_report_{td_id}_{tr_time}.pdf" - + file_name = f"testgen_issue_report_{td_id}_{tr_time}.pdf" -def get_report_content(tr_data): with BytesIO() as buffer: + progress_gen.send(None) create_report(buffer, tr_data) + progress_gen.send(1.0) buffer.seek(0) - return buffer.read() - - -def get_report_content_zip(tr_data_list): - with tempfile.NamedTemporaryFile() as zip_file: - with ZipFile(zip_file.name, "w") as zip_writer: - for tr_data in tr_data_list: - zip_writer.writestr( - get_report_file_name(tr_data), - get_report_content(tr_data), - ) - return zip_file.read() + return file_name, "application/pdf", buffer.read() From 7c1aa58d6d82ed7ebbea898f9a6f73d4e8adea39 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 8 Oct 2024 23:02:00 -0400 Subject: [PATCH 19/91] fix(upgrade): add where clause in sql update statements with joins --- testgen/template/dbupgrade/0108_incremental_upgrade.sql | 3 ++- testgen/template/dbupgrade/0109_incremental_upgrade.sql | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/testgen/template/dbupgrade/0108_incremental_upgrade.sql b/testgen/template/dbupgrade/0108_incremental_upgrade.sql index df8b7cb..a7f4980 100644 --- a/testgen/template/dbupgrade/0108_incremental_upgrade.sql +++ b/testgen/template/dbupgrade/0108_incremental_upgrade.sql @@ -16,7 +16,8 @@ DROP INDEX ix_td_pc_stc_tst; SET test_suite_id = ts.id FROM test_definitions td INNER JOIN test_suites AS ts ON td.test_suite = ts.test_suite AND td.project_code = ts.project_code - WHERE td.test_suite_id is NULL; + WHERE td.test_suite_id is NULL + AND test_definitions.id = td.id; ALTER TABLE test_definitions ALTER COLUMN test_suite_id SET NOT NULL; diff --git a/testgen/template/dbupgrade/0109_incremental_upgrade.sql b/testgen/template/dbupgrade/0109_incremental_upgrade.sql index 028dcc4..ceb3304 100644 --- a/testgen/template/dbupgrade/0109_incremental_upgrade.sql +++ b/testgen/template/dbupgrade/0109_incremental_upgrade.sql @@ -18,7 +18,8 @@ ALTER TABLE test_runs ADD COLUMN test_suite_id UUID; UPDATE test_runs SET test_suite_id = ts.id FROM test_runs tr -INNER JOIN test_suites AS ts ON tr.test_suite = ts.test_suite AND tr.project_code = ts.project_code; +INNER JOIN test_suites AS ts ON tr.test_suite = ts.test_suite AND tr.project_code = ts.project_code + WHERE test_runs.id = tr.id; ALTER TABLE test_runs ALTER COLUMN test_suite_id SET NOT NULL; @@ -27,7 +28,8 @@ ALTER TABLE test_runs ALTER COLUMN test_suite_id SET NOT NULL; SET test_suite_id = ts.id FROM test_results tr INNER JOIN test_suites AS ts ON tr.test_suite = ts.test_suite AND tr.project_code = ts.project_code - WHERE tr.test_suite_id is NULL; + WHERE tr.test_suite_id is NULL + AND test_results.id = tr.id; ALTER TABLE test_results ALTER COLUMN test_suite_id SET NOT NULL; ALTER TABLE test_results ALTER COLUMN test_run_id SET NOT NULL; From deaf6487cbc36ce49be04d17b5eb5209a260551a Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Thu, 10 Oct 2024 09:39:10 -0400 Subject: [PATCH 20/91] fix(pdf): Fixing the data table header aligment plus some other improvements --- .../ui/components/widgets/download_dialog.py | 14 ++++++ testgen/ui/pdf/dataframe_table.py | 43 ++++++++++++------- testgen/ui/pdf/test_result_report.py | 37 ++++++++++------ 3 files changed, 65 insertions(+), 29 deletions(-) diff --git a/testgen/ui/components/widgets/download_dialog.py b/testgen/ui/components/widgets/download_dialog.py index 9b9d4c3..feb04ca 100644 --- a/testgen/ui/components/widgets/download_dialog.py +++ b/testgen/ui/components/widgets/download_dialog.py @@ -41,9 +41,12 @@ def download_dialog( file_content_func: Callable[[Generator, ...], tuple[str, str, str|bytes]], args: Iterable = (), progress_bar_msg: str = "Generating file...", + key: str = "download_dialog", ): """Wrapping a dialog and a download button together to allow generating the file contents only when needed.""" + file_ready_key = f"{key}:file_ready" + def _dialog_content(): with st.container(height=70, border=False): @@ -52,6 +55,16 @@ def _dialog_content(): with st.container(height=55, border=False): _, button_col, _ = st.columns([.3, .4, .3]) + # The goal of this `file_ready` state is to prevent the file to be generated again after the user clicks + # the download button. Streamlit's way to close a dialog is to hit st.rerun(), which we should call when + # we get True from the download button being pushed, however it has to be rendered again for that, which + # means the file will be generated again. To avoid that, we simply call st.rerun() BEFORE generating the + # file, based on this session state. The drawback is that the dialog will unexpectedly close once by the + # next time it is opened after being closed by the user before "Download" is clicked. + if st.session_state.get(file_ready_key): + del st.session_state[file_ready_key] + st.rerun() + def _get_progress_gen(): while True: progress = yield @@ -60,6 +73,7 @@ def _get_progress_gen(): file_name, file_type, file_content = file_content_func(_get_progress_gen(), *args) p_bar.progress(1.0, "Done!") + st.session_state[file_ready_key] = True with button_col: st.download_button( diff --git a/testgen/ui/pdf/dataframe_table.py b/testgen/ui/pdf/dataframe_table.py index 18cde27..ff2f8c2 100644 --- a/testgen/ui/pdf/dataframe_table.py +++ b/testgen/ui/pdf/dataframe_table.py @@ -42,15 +42,22 @@ TABLE_STYLE_DATA = TableStyle( ( - ("VALIGN", (0, 0), (-1, 0), "MIDDLE"), + # All table ("GRID", (0, 0), (-1, -1), 0.5, COLOR_GRAY_BG), - ("INNERGRID", (0, 0), (-1, 0), 1, colors.white), - ("BACKGROUND", (0, 0), (-1, 0), COLOR_GRAY_BG), - ("LEFTPADDING", (0, 0), (-1, 0), 4), - ("RIGHTPADDING", (0, 0), (-1, 0), 4), - ("TOPPADDING", (0, 0), (-1, 0), 6), - ("BOTTOMPADDING", (0, 0), (-1, 0), 6), + # Header + *[ + (cmd[0], (0, 0), (-1, 0), *cmd[1:]) + for cmd in ( + ("INNERGRID", 1, colors.white), + ("BACKGROUND", COLOR_GRAY_BG), + ("VALIGN", "MIDDLE"), + ("LEFTPADDING", 4), + ("RIGHTPADDING", 4), + ("TOPPADDING", 6), + ("BOTTOMPADDING", 6), + ) + ], ), parent=TABLE_STYLE_DEFAULT, ) @@ -102,7 +109,7 @@ def drawOn(self, canvas, x, y, _sW=0): ret = self.flowable.drawOn( canvas, y, - -(x + self.available_width + (self.available_width - self.flowable_width) / 2), + -(x + self.available_width - (self.available_width - self.flowable_width) / 2), _sW, ) canvas.restoreState() @@ -124,11 +131,11 @@ class DataFrameTableBuilder: null_para = Paragraph("NULL", style=PARA_STYLE_CELL_NULL) - def __init__(self, dataframe, available_width, col_padding=16, header_exp_limit=0.45): + def __init__(self, dataframe, available_width, col_padding=16, max_header_exp_factor=0.4): self._dataframe = dataframe self.available_width = available_width self.col_padding = col_padding - self.header_exp_limit = header_exp_limit + self.max_header_exp_factor = max_header_exp_factor self.omitted_columns = [] self.col_len_data = pandas.DataFrame(columns=["width", "max_width"], index=iter(dataframe)) self.table_data = None @@ -165,8 +172,10 @@ def split_in_columns(self, flowables, min_rows=5, col_padding=10): layout_columns = min(layout_columns, int(len(self.table_data) / min_rows)) if layout_columns > 1: - columns = BalancedColumns(flowables, layout_columns, leftPadding=0, rightPadding=0, topPadding=0, bottomPadding=0) - # Converting the BC to a list to honor the `flowables` input type, for consistency + columns = BalancedColumns( + flowables, layout_columns, leftPadding=0, rightPadding=0, topPadding=0, bottomPadding=0 + ) + # Honoring the `flowables` input type, for consistency return [columns] if isinstance(flowables, Iterable) else columns else: return flowables @@ -176,18 +185,22 @@ def _setup_header(self): [Paragraph(label, style=PARA_STYLE_CELL_HEADER) for label in self.table_data.columns], index=self.table_data.columns, ) - expansible_width = self._get_expansible_width() min_max_widths = header_cells.map(self._calc_cell_width) + min_widths = min_max_widths.map(lambda t: t[0]) min_exp_appetite = self._calc_expansion_appetite(min_widths) - if min_exp_appetite.sum() <= expansible_width: + # If the minimal expansion fits into the available width, the columns are expanded. + # Otherwise, the header is converted to vertical text + if min_exp_appetite.sum() <= self._get_expansible_width(): self.col_len_data["width"] += min_exp_appetite + # If the maximum expansion would grow the table width under the `max_header_exp_factor`, + # it's expanded to match max_widths = min_max_widths.map(lambda t: t[1]) max_exp_appetite = self._calc_expansion_appetite(max_widths) - if (max_exp_appetite.sum() + self._get_current_width()) / self.available_width <= self.header_exp_limit: + if max_exp_appetite.sum() / self._get_current_width() <= self.max_header_exp_factor: self.col_len_data["width"] += max_exp_appetite else: header_cells = header_cells.map(VerticalHeaderCell) diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py index 73c6db8..f0fa019 100644 --- a/testgen/ui/pdf/test_result_report.py +++ b/testgen/ui/pdf/test_result_report.py @@ -39,15 +39,9 @@ "Failed": HexColor(0xE94D4A), } + def build_summary_table(document, tr_data): status_color = RESULT_STATUS_COLORS.get(tr_data["result_status"], COLOR_GRAY_BG) - - TABLE_HEADER_CELL_CMD = ( - ("FONT", "Helvetica-Bold"), - ("ALIGN", "RIGHT"), - ("BACKGROUND", COLOR_GREEN_BG), - ) - summary_table_style = TableStyle( ( # All-table styles @@ -55,8 +49,18 @@ def build_summary_table(document, tr_data): ("BACKGROUND", (0, 0), (-1, -1), COLOR_GRAY_BG), # Header cells - *[(cmd[0], (3, 3), (3, -1), *cmd[1:]) for cmd in TABLE_HEADER_CELL_CMD], - *[(cmd[0], (0, 0), (0, -1), *cmd[1:]) for cmd in TABLE_HEADER_CELL_CMD], + *[ + (cmd[0], *coords, *cmd[1:]) + for coords in ( + ((3, 3), (3, -1)), + ((0, 0), (0, -1)) + ) + for cmd in ( + ("FONT", "Helvetica-Bold"), + ("ALIGN", "RIGHT"), + ("BACKGROUND", COLOR_GREEN_BG), + ) + ], # Layout ("SPAN", (1, 0), (4, 0)), @@ -76,11 +80,16 @@ def build_summary_table(document, tr_data): ("FONT", (1, 1), (1, 1), "Helvetica-Bold"), # Status cell - ("BACKGROUND", (5, 0), (5, 0), status_color), - ("FONT", (5, 0), (5, 0), "Helvetica", 14), - ("ALIGN", (5, 0), (5, 0), "CENTER"), - ("VALIGN", (5, 0), (5, 0), "MIDDLE"), - ("TEXTCOLOR", (5, 0), (5, 0), colors.white), + *[ + (cmd[0], (5, 0), (5, 0), *cmd[1:]) + for cmd in ( + ("BACKGROUND", status_color), + ("FONT", "Helvetica", 14), + ("ALIGN", "CENTER"), + ("VALIGN", "MIDDLE"), + ("TEXTCOLOR", colors.white), + ) + ], ), parent=TABLE_STYLE_DEFAULT, ) From 5696b06d4dd3167e990f80312fe2da99d975d630 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Thu, 10 Oct 2024 10:00:57 -0400 Subject: [PATCH 21/91] fix(pdf): Addressing code review feedback --- testgen/ui/components/widgets/download_dialog.py | 5 ++--- testgen/ui/services/test_results_service.py | 2 +- testgen/ui/views/test_results.py | 9 ++++++++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/testgen/ui/components/widgets/download_dialog.py b/testgen/ui/components/widgets/download_dialog.py index feb04ca..9466f06 100644 --- a/testgen/ui/components/widgets/download_dialog.py +++ b/testgen/ui/components/widgets/download_dialog.py @@ -35,7 +35,6 @@ def _file_gen(): return _file_content_func - def download_dialog( dialog_title: str, file_content_func: Callable[[Generator, ...], tuple[str, str, str|bytes]], @@ -53,7 +52,7 @@ def _dialog_content(): p_bar = st.progress(0.0, progress_bar_msg) with st.container(height=55, border=False): - _, button_col, _ = st.columns([.3, .4, .3]) + _, button_col = st.columns([.8, .2]) # The goal of this `file_ready` state is to prevent the file to be generated again after the user clicks # the download button. Streamlit's way to close a dialog is to hit st.rerun(), which we should call when @@ -72,7 +71,7 @@ def _get_progress_gen(): file_name, file_type, file_content = file_content_func(_get_progress_gen(), *args) - p_bar.progress(1.0, "Done!") + p_bar.progress(1.0, "File ready for download.") st.session_state[file_ready_key] = True with button_col: diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py index e64ef0c..039bee5 100644 --- a/testgen/ui/services/test_results_service.py +++ b/testgen/ui/services/test_results_service.py @@ -76,7 +76,7 @@ def do_source_data_lookup_custom(db_schema, tr_data): else: return "OK", None, str_sql, df else: - return "NA", "A source data lookup for this Test is not available.", None, None + return "NA", "Source data lookup is not available for this test.", None, None except Exception as e: return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}", str_sql, None diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index abed5a6..111d902 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -550,11 +550,18 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co if row["result_status"] != "Passed" and row["disposition"] in (None, "Confirmed") ] + if do_multi_select: + report_btn_help = ( + "Generate PDF reports for the selected results that are not muted or dismissed and are not Passed" + ) + else: + report_btn_help = "Generate PDF report for selected result" + if st.button( ":material/file_save: Issue Report", use_container_width=True, disabled=not report_eligible_rows, - help="Generate PDF reports for the selected results that are not muted or dismissed and are not Passed", + help=report_btn_help, ): dialog_title = "Download Issue Report" if len(report_eligible_rows) == 1: From e3a1311f32bb4cd220bfe8735586d7ddf600d203 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 10 Oct 2024 12:28:06 -0400 Subject: [PATCH 22/91] fix(auth): handle cookies retrieved inconsistently --- testgen/ui/navigation/router.py | 15 +++++++++++---- testgen/ui/session.py | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/testgen/ui/navigation/router.py b/testgen/ui/navigation/router.py index d010ee9..011ebb8 100644 --- a/testgen/ui/navigation/router.py +++ b/testgen/ui/navigation/router.py @@ -10,6 +10,7 @@ from testgen.utils.singleton import Singleton LOG = logging.getLogger("testgen") +COOKIES_READY_RERUNS = 2 class Router(Singleton): @@ -32,12 +33,16 @@ def run(self, hide_sidebar=False) -> None: session.current_page_args = st.query_params # This hack is needed because the auth cookie is not retrieved on the first run - # We have to store the page and wait for the second run - + # We have to store the page and wait for the second or third run if not session.cookies_ready: - session.cookies_ready = True + session.cookies_ready = 1 session.page_pending_cookies = current_page - else: + + # Sometimes the cookie is ready on the second rerun and other times only on the third -_- + # so we have to make sure the page renders correctly in both cases + # and also handle the login page! + elif session.cookies_ready == COOKIES_READY_RERUNS or session.authentication_status or (session.page_pending_cookies and not session.page_pending_cookies.url_path): + session.cookies_ready = COOKIES_READY_RERUNS current_page = session.page_pending_cookies or current_page session.page_pending_cookies = None @@ -48,6 +53,8 @@ def run(self, hide_sidebar=False) -> None: session.current_page = current_page.url_path current_page.run() + else: + session.cookies_ready += 1 def navigate(self, /, to: str, with_args: dict = {}) -> None: # noqa: B006 diff --git a/testgen/ui/session.py b/testgen/ui/session.py index 5c7459a..0802132 100644 --- a/testgen/ui/session.py +++ b/testgen/ui/session.py @@ -7,7 +7,7 @@ class TestgenSession(Singleton): - cookies_ready: bool + cookies_ready: int logging_in: bool logging_out: bool page_pending_cookies: st.Page From a86be0293b0d6d1e16b9d352c670b2b15de9af8d Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Mon, 14 Oct 2024 12:26:05 -0400 Subject: [PATCH 23/91] fix(misc): Download dialog code cleanup (close, progress) --- .../ui/components/widgets/download_dialog.py | 55 ++++++++----------- testgen/ui/views/test_results.py | 7 +-- 2 files changed, 25 insertions(+), 37 deletions(-) diff --git a/testgen/ui/components/widgets/download_dialog.py b/testgen/ui/components/widgets/download_dialog.py index 9466f06..a908043 100644 --- a/testgen/ui/components/widgets/download_dialog.py +++ b/testgen/ui/components/widgets/download_dialog.py @@ -1,31 +1,31 @@ import tempfile -from collections.abc import Callable, Generator, Iterable +from collections.abc import Callable, Iterable from zipfile import ZipFile import streamlit as st +PROGRESS_UPDATE_TYPE = Callable[[float], None] + +FILE_DATA_TYPE = tuple[str, str, str|bytes] def zip_multi_file_data( zip_file_name: str, - file_data_func: Callable[[Generator, ...], tuple[str, str, str|bytes]], + file_data_func: Callable[[PROGRESS_UPDATE_TYPE, ...], FILE_DATA_TYPE], args_list: list[Iterable], -): +) -> Callable[[PROGRESS_UPDATE_TYPE, ...], FILE_DATA_TYPE]: - def _file_content_func(progress_gen, *args): + def _file_content_func(update_main_progress, *args): progress = 0.0 step = 1.0 / len(args_list) - def _file_gen(): - while True: - f_progress = yield - progress_gen.send(progress + step / f_progress) + def _update_progress(f_progress): + update_main_progress(progress + step * f_progress) with tempfile.NamedTemporaryFile() as zip_file: with ZipFile(zip_file.name, "w") as zip_writer: - progress_gen.send(None) for args in args_list: - file_name, _, file_data = file_data_func(_file_gen(), *args) + file_name, _, file_data = file_data_func(_update_progress, *args) zip_writer.writestr(file_name, file_data) progress += step zip_content = zip_file.read() @@ -37,15 +37,12 @@ def _file_gen(): def download_dialog( dialog_title: str, - file_content_func: Callable[[Generator, ...], tuple[str, str, str|bytes]], + file_content_func: Callable[[PROGRESS_UPDATE_TYPE, ...], FILE_DATA_TYPE], args: Iterable = (), progress_bar_msg: str = "Generating file...", - key: str = "download_dialog", ): """Wrapping a dialog and a download button together to allow generating the file contents only when needed.""" - file_ready_key = f"{key}:file_ready" - def _dialog_content(): with st.container(height=70, border=False): @@ -54,33 +51,25 @@ def _dialog_content(): with st.container(height=55, border=False): _, button_col = st.columns([.8, .2]) - # The goal of this `file_ready` state is to prevent the file to be generated again after the user clicks - # the download button. Streamlit's way to close a dialog is to hit st.rerun(), which we should call when - # we get True from the download button being pushed, however it has to be rendered again for that, which - # means the file will be generated again. To avoid that, we simply call st.rerun() BEFORE generating the - # file, based on this session state. The drawback is that the dialog will unexpectedly close once by the - # next time it is opened after being closed by the user before "Download" is clicked. - if st.session_state.get(file_ready_key): - del st.session_state[file_ready_key] - st.rerun() - - def _get_progress_gen(): - while True: - progress = yield - p_bar.progress(progress, progress_bar_msg) + def _update_progress(progress: float): + p_bar.progress(progress, progress_bar_msg) - file_name, file_type, file_content = file_content_func(_get_progress_gen(), *args) + file_name, file_type, file_content = file_content_func(_update_progress, *args) p_bar.progress(1.0, "File ready for download.") - st.session_state[file_ready_key] = True - with button_col: - st.download_button( + @st.fragment + def render_button(): + if st.download_button( label=":material/download: Download", data=file_content, file_name=file_name, mime=file_type, use_container_width=True, - ) + ): + st.rerun() + + with button_col: + render_button() return st.dialog(title=dialog_title, width="small")(_dialog_content)() diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 111d902..a7605d3 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -12,7 +12,7 @@ import testgen.ui.services.query_service as dq from testgen.common import date_service from testgen.ui.components import widgets as testgen -from testgen.ui.components.widgets.download_dialog import download_dialog, zip_multi_file_data +from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data from testgen.ui.navigation.page import Page from testgen.ui.pdf.test_result_report import create_report from testgen.ui.services import authentication_service, project_service @@ -734,14 +734,13 @@ def view_edit_test(button_container, test_definition_id): show_test_form_by_id(test_definition_id) -def get_report_file_data(progress_gen, tr_data): +def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE: td_id = tr_data["test_definition_id_runtime"][:6] tr_time = pd.Timestamp(tr_data["test_time"]).strftime("%Y%m%d_%H%M%S") file_name = f"testgen_issue_report_{td_id}_{tr_time}.pdf" with BytesIO() as buffer: - progress_gen.send(None) create_report(buffer, tr_data) - progress_gen.send(1.0) + update_progress(1.0) buffer.seek(0) return file_name, "application/pdf", buffer.read() From 26d019ddee490c16f80a62dbcf22b6304a22f215 Mon Sep 17 00:00:00 2001 From: Astor Date: Mon, 21 Oct 2024 17:00:43 -0300 Subject: [PATCH 24/91] astor/TG-806 --- testgen/ui/views/test_definitions.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index f9f1ffd..c0eaf09 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -530,12 +530,19 @@ def show_test_form( if dynamic_attribute in ["custom_query"]: show_custom_query = True else: - test_definition[dynamic_attribute] = current_column.text_input( - label=actual_dynamic_attributes_labels, - max_chars=4000 if dynamic_attribute in ["match_column_names", "match_groupby_names", "groupby_names"] else 1000, - value=value, - help=actual_dynamic_attributes_help, - ) + if "threshold" in dynamic_attribute: + test_definition[dynamic_attribute] = current_column.number_input( + label=actual_dynamic_attributes_labels, + value=value, + help=actual_dynamic_attributes_help, + ) + else: + test_definition[dynamic_attribute] = current_column.text_input( + label=actual_dynamic_attributes_labels, + max_chars=4000 if dynamic_attribute in ["match_column_names", "match_groupby_names", "groupby_names"] else 1000, + value=value, + help=actual_dynamic_attributes_help, + ) # Custom Query if show_custom_query: From 755521643de43db4ef1fd5a6732824d0eb692cb4 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Wed, 23 Oct 2024 11:56:59 -0400 Subject: [PATCH 25/91] feat(ui): bind grid selection to query parameters the selected row is bound to the query parameters to make it possible to share a direct URL to a detail view. --- testgen/ui/services/form_service.py | 73 ++++++++++++++++++++++--- testgen/ui/views/profiling_anomalies.py | 8 ++- testgen/ui/views/profiling_results.py | 8 ++- testgen/ui/views/test_definitions.py | 2 + testgen/ui/views/test_results.py | 12 +++- 5 files changed, 89 insertions(+), 14 deletions(-) diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index 819c81d..01b648a 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -19,6 +19,7 @@ import testgen.common.date_service as date_service import testgen.ui.services.authentication_service as authentication_service import testgen.ui.services.database_service as db +from testgen.ui.navigation.router import Router """ Shared rendering of UI elements @@ -762,14 +763,31 @@ def render_insert_form( def render_grid_select( - df, + df: pd.DataFrame, show_columns, str_prompt=None, int_height=400, - do_multi_select=False, + do_multi_select: bool | None = None, + selection_mode: typing.Literal["single", "multiple", "disabled"] = "single", show_column_headers=None, render_highlights=True, + bind_to_query_name: str | None = None, + bind_to_query_prop: str | None = None, + key: str = "aggrid", ): + """ + :param do_multi_select: DEPRECATED. boolean to choose between single + or multiple selection. + :param selection_mode: one of single, multiple or disabled. defaults + to single. + :param bind_to_query_name: name of the query param where to bind the + selected row. + :param bind_to_query_prop: name of the property of the selected row + which value will be set in the query param. + :param key: Streamlit cache key for the grid. required when binding + selection to query. + """ + show_prompt(str_prompt) # Set grid formatting @@ -837,12 +855,40 @@ def render_grid_select( } """ ) + data_changed: bool = True + rendering_counter = st.session_state.get(f"{key}_counter") or 0 + previous_dataframe = st.session_state.get(f"{key}_dataframe") + + if previous_dataframe is not None: + data_changed = not df.equals(previous_dataframe) dct_col_to_header = dict(zip(show_columns, show_column_headers, strict=True)) if show_column_headers else None gb = GridOptionsBuilder.from_dataframe(df) - selection_mode = "multiple" if do_multi_select else "single" - gb.configure_selection(selection_mode=selection_mode, use_checkbox=do_multi_select) + selection_mode_ = selection_mode + if do_multi_select is not None: + selection_mode_ = "multiple" if do_multi_select else "single" + + pre_selected_rows: typing.Any = {} + if bind_to_query_name and bind_to_query_prop: + bound_value = st.query_params.get(bind_to_query_name) + bound_items_indexes = df[df[bind_to_query_prop] == bound_value].index + if len(bound_items_indexes) > 0: + # https://github.com/PablocFonseca/streamlit-aggrid/issues/207#issuecomment-1793039564 + pre_selected_rows = {str(bound_items_indexes[0]): True} + else: + if data_changed and st.query_params.get(bind_to_query_name): + rendering_counter += 1 + Router().set_query_params({bind_to_query_name: None}) + + gb.configure_selection( + selection_mode=selection_mode_, + use_checkbox=selection_mode_ == "multiple", + pre_selected_rows=pre_selected_rows, + ) + + if bind_to_query_prop and bind_to_query_prop.isalnum(): + gb.configure_grid_options(getRowId=JsCode(f"""function(row) {{ return row.data.{bind_to_query_prop}; }}""")) all_columns = list(df.columns) @@ -853,8 +899,8 @@ def render_grid_select( "field": column, "header_name": str_header if str_header else ut_prettify_header(column), "hide": column not in show_columns, - "headerCheckboxSelection": do_multi_select and column == show_columns[0], - "headerCheckboxSelectionFilteredOnly": do_multi_select and column == show_columns[0], + "headerCheckboxSelection": selection_mode_ == "multiple" and column == show_columns[0], + "headerCheckboxSelectionFilteredOnly": selection_mode_ == "multiple" and column == show_columns[0], } highlight_kwargs = {"cellStyle": cellstyle_jscode} @@ -888,7 +934,8 @@ def render_grid_select( theme="balham", enable_enterprise_modules=False, allow_unsafe_jscode=True, - update_mode=GridUpdateMode.SELECTION_CHANGED, + update_mode=GridUpdateMode.NO_UPDATE, + update_on=["selectionChanged"], data_return_mode=DataReturnMode.FILTERED_AND_SORTED, columns_auto_size_mode=ColumnsAutoSizeMode.FIT_CONTENTS, height=int_height, @@ -897,10 +944,18 @@ def render_grid_select( "padding-bottom": "0px !important", } }, + key=f"{key}_{selection_mode_}_{rendering_counter}", + reload_data=data_changed, ) - if len(grid_data["selected_rows"]): - return grid_data["selected_rows"] + st.session_state[f"{key}_counter"] = rendering_counter + st.session_state[f"{key}_dataframe"] = df + + selected_rows = grid_data["selected_rows"] + if len(selected_rows) > 0: + if bind_to_query_name and bind_to_query_prop: + Router().set_query_params({bind_to_query_name: selected_rows[0][bind_to_query_prop]}) + return selected_rows def render_logo(logo_path: str = logo_file): diff --git a/testgen/ui/views/profiling_anomalies.py b/testgen/ui/views/profiling_anomalies.py index 7f69db6..54bf6e1 100644 --- a/testgen/ui/views/profiling_anomalies.py +++ b/testgen/ui/views/profiling_anomalies.py @@ -29,6 +29,7 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | f"Profiling run with ID '{run_id}' does not exist. Redirecting to list of Profiling Runs ...", "profiling-runs", ) + return run_date, _table_group_id, table_group_name, project_code = run_parentage run_date = date_service.get_timezoned_timestamp(st.session_state, run_date) @@ -130,7 +131,12 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | # Show main grid and retrieve selections selected = fm.render_grid_select( - df_pa, lst_show_columns, int_height=400, do_multi_select=do_multi_select + df_pa, + lst_show_columns, + int_height=400, + do_multi_select=do_multi_select, + bind_to_query_name="selected", + bind_to_query_prop="id", ) with export_button_column: diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py index eb2e4f0..5089e75 100644 --- a/testgen/ui/views/profiling_results.py +++ b/testgen/ui/views/profiling_results.py @@ -30,6 +30,7 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str | f"Profiling run with ID '{run_id}' does not exist. Redirecting to list of Profiling Runs ...", "profiling-runs", ) + return run_date, table_group_id, table_group_name, project_code = run_parentage run_date = date_service.get_timezoned_timestamp(st.session_state, run_date) @@ -105,7 +106,12 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str | with st.expander("📜 **Table CREATE script with suggested datatypes**"): st.code(generate_create_script(df), "sql") - selected_row = fm.render_grid_select(df, show_columns) + selected_row = fm.render_grid_select( + df, + show_columns, + bind_to_query_name="selected", + bind_to_query_prop="id", + ) with export_button_column: testgen.flex_row_end() diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index f9f1ffd..f8bc5ec 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -713,6 +713,8 @@ def show_test_defs_grid( do_multi_select=do_multi_select, show_column_headers=show_column_headers, render_highlights=False, + bind_to_query_name="selected", + bind_to_query_prop="id", ) with export_container: diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index a7605d3..ed97aa9 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -50,6 +50,7 @@ def render(self, run_id: str, status: str | None = None, test_type: str | None = f"Test run with ID '{run_id}' does not exist. Redirecting to list of Test Runs ...", "test-runs", ) + return run_date, test_suite_name, project_code = run_parentage run_date = date_service.get_timezoned_timestamp(st.session_state, run_date) @@ -478,7 +479,12 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co ] selected_rows = fm.render_grid_select( - df, lst_show_columns, do_multi_select=do_multi_select, show_column_headers=lst_show_headers + df, + lst_show_columns, + do_multi_select=do_multi_select, + show_column_headers=lst_show_headers, + bind_to_query_name="selected", + bind_to_query_prop="test_result_id", ) with export_container: @@ -523,7 +529,7 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co if not selected_rows: st.markdown(":orange[Select a record to see more information.]") else: - selected_row = selected_rows[len(selected_rows) - 1] + selected_row = selected_rows[0] dfh = get_test_result_history(selected_row) show_hist_columns = ["test_date", "threshold_value", "result_measure", "result_status"] @@ -582,7 +588,7 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co fm.show_subheader(selected_row["test_name_short"]) st.markdown(f"###### {selected_row['test_description']}") st.caption(empty_if_null(selected_row["measure_uom_description"])) - fm.render_grid_select(dfh, show_hist_columns) + fm.render_grid_select(dfh, show_hist_columns, selection_mode="disabled") with pg_col2: ut_tab1, ut_tab2 = st.tabs(["History", "Test Definition"]) with ut_tab1: From eb3f71af69c2573724cd995da3ed0b6728647265 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Mon, 28 Oct 2024 19:18:57 -0400 Subject: [PATCH 26/91] fix(ui): send the selection ID to AGgrid --- testgen/ui/services/form_service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index 01b648a..58c1bcf 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -872,10 +872,10 @@ def render_grid_select( pre_selected_rows: typing.Any = {} if bind_to_query_name and bind_to_query_prop: bound_value = st.query_params.get(bind_to_query_name) - bound_items_indexes = df[df[bind_to_query_prop] == bound_value].index - if len(bound_items_indexes) > 0: + bound_items = df[df[bind_to_query_prop] == bound_value] + if len(bound_items) > 0: # https://github.com/PablocFonseca/streamlit-aggrid/issues/207#issuecomment-1793039564 - pre_selected_rows = {str(bound_items_indexes[0]): True} + pre_selected_rows = {str(bound_items.iloc[0][bind_to_query_prop]): True} else: if data_changed and st.query_params.get(bind_to_query_name): rendering_counter += 1 From 6b16b536baa4a2a574a3af7d699e3e23337ea7a8 Mon Sep 17 00:00:00 2001 From: ci bot Date: Thu, 31 Oct 2024 19:22:05 +0000 Subject: [PATCH 27/91] TG-777: Inline SQL functions and remove profiling setup --- pyproject.toml | 1 + .../queries/execute_cat_tests_query.py | 4 + testgen/commands/queries/profiling_query.py | 3 + testgen/common/read_file.py | 34 ++++ .../050_populate_new_schema_metadata.sql | 34 ++-- .../project_profiling_query_mssql.yaml | 4 +- .../mssql/profiling/templated_functions.yaml | 46 +++++ .../00_drop_existing_functions_mssql.sql | 8 - .../01_create_functions_mssql.sql | 12 -- .../02_create_functions_mssql.sql | 54 ------ .../create_qc_schema_mssql.sql | 4 - .../grant_execute_privileges_mssql.sql | 1 - .../project_profiling_query_postgresql.yaml | 20 +-- .../profiling/templated_functions.yaml | 109 ++++++++++++ .../create_functions_postgresql.sql | 157 ------------------ .../create_qc_schema_postgresql.sql | 1 - .../grant_execute_privileges_postgresql.sql | 2 - .../project_profiling_query_redshift.yaml | 4 +- .../profiling/templated_functions.yaml | 101 +++++++++++ .../create_functions_redshift.sql | 115 ------------- .../create_qc_schema_redshift.sql | 1 - .../grant_execute_privileges_redshift.sql | 2 - .../project_profiling_query_snowflake.yaml | 4 +- .../profiling/templated_functions.yaml | 55 ++++++ .../create_functions_snowflake.sql | 69 -------- .../create_qc_schema_snowflake.sql | 1 - .../grant_execute_privileges_snowflake.sql | 6 - testgen/ui/services/test_results_service.py | 5 +- testgen/ui/views/profiling_anomalies.py | 5 +- tests/unit/test_read_file.py | 15 ++ 30 files changed, 409 insertions(+), 468 deletions(-) create mode 100644 testgen/template/flavors/mssql/profiling/templated_functions.yaml delete mode 100644 testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql delete mode 100644 testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql delete mode 100644 testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql delete mode 100644 testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql delete mode 100644 testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql create mode 100644 testgen/template/flavors/postgresql/profiling/templated_functions.yaml delete mode 100644 testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql delete mode 100644 testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql delete mode 100644 testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql create mode 100644 testgen/template/flavors/redshift/profiling/templated_functions.yaml delete mode 100644 testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql delete mode 100644 testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql delete mode 100644 testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql create mode 100644 testgen/template/flavors/snowflake/profiling/templated_functions.yaml delete mode 100644 testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql delete mode 100644 testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql delete mode 100644 testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql create mode 100644 tests/unit/test_read_file.py diff --git a/pyproject.toml b/pyproject.toml index 6282758..c848c77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ requires-python = ">=3.10" dependencies = [ "PyYAML==6.0.1", "click==8.1.3", + "regex==2024.9.11", "sqlalchemy==1.4.46", "snowflake-sqlalchemy==1.4.7", "pyodbc==5.0.0", diff --git a/testgen/commands/queries/execute_cat_tests_query.py b/testgen/commands/queries/execute_cat_tests_query.py index fc91e2b..ac905d3 100644 --- a/testgen/commands/queries/execute_cat_tests_query.py +++ b/testgen/commands/queries/execute_cat_tests_query.py @@ -2,6 +2,7 @@ from testgen.common import date_service, read_template_sql_file from testgen.common.database import database_service +from testgen.common.read_file import replace_templated_functions class CCATExecutionSQL: @@ -60,6 +61,9 @@ def _ReplaceParms(self, strInputString): strInputString = strInputString.replace("{RUN_DATE}", self.run_date) + if "{{DKFN_" in strInputString: + strInputString = replace_templated_functions(strInputString, self.flavor) + # Adding escape character where ':' is referenced strInputString = strInputString.replace(":", "\\:") diff --git a/testgen/commands/queries/profiling_query.py b/testgen/commands/queries/profiling_query.py index db5ff1e..84cc50f 100644 --- a/testgen/commands/queries/profiling_query.py +++ b/testgen/commands/queries/profiling_query.py @@ -1,6 +1,7 @@ import typing from testgen.common import date_service, read_template_sql_file, read_template_yaml_file +from testgen.common.read_file import replace_templated_functions class CProfilingSQL: @@ -98,6 +99,8 @@ def ReplaceParms(self, strInputString): strInputString = strInputString.replace("{CONTINGENCY_COLUMNS}", self.contingency_columns) strInputString = strInputString.replace("{CONTINGENCY_MAX_VALUES}", self.contingency_max_values) strInputString = strInputString.replace("{PROCESS_ID}", str(self.process_id)) + if "{{DKFN_" in strInputString: + strInputString = replace_templated_functions(strInputString, self.flavor) return strInputString diff --git a/testgen/common/read_file.py b/testgen/common/read_file.py index dda3ff8..bfc2e9b 100644 --- a/testgen/common/read_file.py +++ b/testgen/common/read_file.py @@ -7,6 +7,7 @@ from importlib.abc import Traversable from importlib.resources import as_file, files +import regex import yaml LOG = logging.getLogger("testgen") @@ -67,3 +68,36 @@ def read_template_yaml_file(template_file_name: str, sub_directory: str | None = raise ValueError(f"{template_file_name}: File is empty") return template + + +@cache +def read_template_yaml_function(function_name: str, db_flavour: str) -> str: + yaml_functions = read_template_yaml_file( + "templated_functions.yaml", + sub_directory=f"flavors/{db_flavour}/profiling", + ) + template = yaml_functions[function_name] + return template + + +def replace_templated_functions(query: str, db_flavour: str) -> str: + # see regexr.com/872jv for regex explanation + # Regex package is needed due to variable number of capture groups ('re' package only returns last) + # Use double curly braces for the function call in sql {{ }} + # Separate function arguments with double semi colon ;; + # Arguments in the template yaml take the form {$} like {$1} + # Space is required after the closing braces + # e.g. "{{DKFN_ISNUM;;{COLUM_NAME}}} " + # Function template replacement is the last step of templating, therefore cannot use other templated parameters inside. + # If needed, those must be arguments to the templated function. + # I.E OK TO DO sql: "{{DKFN_FOO;;{COLUM_NAME}}}" and yaml: "FOO: foo({$1})" + # NOT OK TO DO sql: "{{DKFN_FOO}}" and yaml: "FOO: foo({"COLUM_NAME"})" + while match := regex.search(r"{{DKFN_([\w\d]+)(?:;;(.+?))*}}(\s)", query): + function_name = match.captures(1)[0] + function_arguments = match.captures(2) + function_template = read_template_yaml_function(function_name, db_flavour) + function_template = function_template + match.captures(3)[0] + for index, function_arg in enumerate(function_arguments, start=1): + function_template = function_template.replace(f"{{${index}}}", function_arg) + query = query.replace(match.captures(0)[0], function_template) + return query diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql index a9643cf..f30d83c 100644 --- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql +++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql @@ -302,7 +302,7 @@ VALUES ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', ' ('4002', 'Avg_Shift', 'postgresql', 'ABS( (AVG({COLUMN_NAME}::FLOAT) - {BASELINE_AVG}) / SQRT(((COUNT({COLUMN_NAME})::FLOAT-1)*STDDEV({COLUMN_NAME})^2 + ({BASELINE_VALUE_CT}::FLOAT-1) * {BASELINE_SD}::FLOAT^2) /NULLIF(COUNT({COLUMN_NAME})::FLOAT + {BASELINE_VALUE_CT}::FLOAT, 0) ))', '>=', '{THRESHOLD_VALUE}'), ('4003', 'Condition_Flag', 'postgresql', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('4004', 'Constant', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), - ('4005', 'Daily_Record_Ct', 'postgresql', '{DATA_QC_SCHEMA}.DATEDIFF(''DAY'', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'), + ('4005', 'Daily_Record_Ct', 'postgresql', '{{DKFN_DATEDIFF_DAY;;MIN({COLUMN_NAME});;MAX({COLUMN_NAME})}} +1-COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'), ('4006', 'Dec_Trunc', 'postgresql', 'ROUND(SUM(ABS({COLUMN_NAME})::DECIMAL(18,4) % 1), 0)', '<', '{THRESHOLD_VALUE}'), ('4007', 'Distinct_Date_Ct', 'postgresql', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'), ('4008', 'Distinct_Value_Ct', 'postgresql', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'), @@ -315,11 +315,11 @@ VALUES ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', ' ('4015', 'Min_Date', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('4016', 'Min_Val', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('4017', 'Missing_Pct', 'postgresql', 'ABS( 2.0 * ASIN( SQRT( {BASELINE_VALUE_CT}::FLOAT / {BASELINE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( {COLUMN_NAME} )::FLOAT / NULLIF(COUNT(*), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'), - ('4018', 'Monthly_Rec_Ct', 'postgresql', '(MAX({DATA_QC_SCHEMA}.DATEDIFF(''MON'', {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) - MIN({DATA_QC_SCHEMA}.DATEDIFF(''MON'', {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) + 1) - COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF(''MON'', {COLUMN_NAME}, ''{RUN_DATE}''::DATE))', '>', '{THRESHOLD_VALUE}'), + ('4018', 'Monthly_Rec_Ct', 'postgresql', '(MAX({{DKFN_DATEDIFF_MONTH;;{COLUMN_NAME};;''{RUN_DATE}''::DATE}} ) - MIN({{DKFN_DATEDIFF_MONTH;;{COLUMN_NAME};;''{RUN_DATE}''::DATE}} ) + 1) - COUNT(DISTINCT {{DKFN_DATEDIFF_MONTH;;{COLUMN_NAME};;''{RUN_DATE}''::DATE}} )', '>', '{THRESHOLD_VALUE}'), ('4019', 'Outlier_Pct_Above', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'), ('4020', 'Outlier_Pct_Below', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'), ('4021', 'Pattern_Match', 'postgresql', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') ~ ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), - ('4022', 'Recency', 'postgresql', '{DATA_QC_SCHEMA}.DATEDIFF(''DAY'', MAX({COLUMN_NAME}), ''{RUN_DATE}''::DATE)', '>', '{THRESHOLD_VALUE}'), + ('4022', 'Recency', 'postgresql', '{{DKFN_DATEDIFF_DAY;;MAX({COLUMN_NAME});;''{RUN_DATE}''::DATE}} ', '>', '{THRESHOLD_VALUE}'), ('4023', 'Required', 'postgresql', 'COUNT(*) - COUNT({COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'), ('4024', 'Row_Ct', 'postgresql', 'COUNT(*)', '<', '{THRESHOLD_VALUE}'), ('4025', 'Row_Ct_Pct', 'postgresql', 'ABS(ROUND(100.0 * (COUNT(*) - {BASELINE_CT})::DECIMAL(18,4) / {BASELINE_CT}::DECIMAL(18,4), 2))', '>', '{THRESHOLD_VALUE}'), @@ -327,7 +327,7 @@ VALUES ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', ' ('4027', 'US_State', 'postgresql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN (''AL'',''AK'',''AS'',''AZ'',''AR'',''CA'',''CO'',''CT'',''DE'',''DC'',''FM'',''FL'',''GA'',''GU'',''HI'',''ID'',''IL'',''IN'',''IA'',''KS'',''KY'',''LA'',''ME'',''MH'',''MD'',''MA'',''MI'',''MN'',''MS'',''MO'',''MT'',''NE'',''NV'',''NH'',''NJ'',''NM'',''NY'',''NC'',''ND'',''MP'',''OH'',''OK'',''OR'',''PW'',''PA'',''PR'',''RI'',''SC'',''SD'',''TN'',''TX'',''UT'',''VT'',''VI'',''VA'',''WA'',''WV'',''WI'',''WY'',''AE'',''AP'',''AA'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'), ('4028', 'Unique', 'postgresql', 'COUNT(*) - COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'), ('4029', 'Unique_Pct', 'postgresql', 'ABS( 2.0 * ASIN( SQRT({BASELINE_UNIQUE_CT}::FLOAT / {BASELINE_VALUE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( DISTINCT {COLUMN_NAME} )::FLOAT / NULLIF(COUNT( {COLUMN_NAME} ), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'), - ('4030', 'Weekly_Rec_Ct', 'postgresql', 'MAX({DATA_QC_SCHEMA}.DATEDIFF(''WEEK'', ''1800-01-01''::DATE, {COLUMN_NAME})) - MIN({DATA_QC_SCHEMA}.DATEDIFF(''WEEK'', ''1800-01-01''::DATE, {COLUMN_NAME}))+1 - COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF(''WEEK'', ''1800-01-01''::DATE, {COLUMN_NAME}))', '>', '{THRESHOLD_VALUE}'), + ('4030', 'Weekly_Rec_Ct', 'postgresql', 'MAX({{DKFN_DATEDIFF_WEEK;;''1800-01-01''::DATE;;{COLUMN_NAME}}} ) - MIN({{DKFN_DATEDIFF_WEEK;;''1800-01-01''::DATE;;{COLUMN_NAME}}} )+1 - COUNT(DISTINCT {{DKFN_DATEDIFF_WEEK;;''1800-01-01''::DATE;;{COLUMN_NAME}}} )', '>', '{THRESHOLD_VALUE}'), ('1031', 'Variability_Increase', 'redshift', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'), ('1032', 'Variability_Decrease', 'redshift', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '<', '{THRESHOLD_VALUE}'), ('2031', 'Variability_Increase', 'snowflake', '100.0*STDDEV(CAST("{COLUMN_NAME}" AS FLOAT))/{BASELINE_SD}', '>', '{THRESHOLD_VALUE}'), @@ -443,8 +443,8 @@ VALUES ('1040', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'redshift', NULL, 'SELECT column_name, table_name, data_type FROM information_schema.columns WHERE table_schema = ''{TARGET_SCHEMA}'' AND column_name = ''{COLUMN_NAME}'' ORDER BY data_type;' ), ('1041', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), ('1042', '1010', 'Profile Anomaly' , 'Quoted_Values', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), - ('1043', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'redshift', NULL, 'SELECT A.* FROM ( SELECT TOP 10 DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM ( SELECT TOP 10 DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), - ('1044', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'redshift', NULL, 'SELECT A.* FROM ( SELECT TOP 10 DISTINCT ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS A UNION ALL SELECT B.* FROM ( SELECT TOP 10 DISTINCT ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), + ('1043', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'redshift', NULL, 'SELECT A.* FROM ( SELECT TOP 10 DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM ( SELECT TOP 10 DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), + ('1044', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'redshift', NULL, 'SELECT A.* FROM ( SELECT TOP 10 DISTINCT ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_DATE;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS A UNION ALL SELECT B.* FROM ( SELECT TOP 10 DISTINCT ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_DATE;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), ('1045', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}") SIMILAR TO ''(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), ('1046', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ), ('1047', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ), @@ -455,7 +455,7 @@ VALUES ('1052', '1020', 'Profile Anomaly' , 'Recency_Six_Months', 'redshift', NULL, 'created_in_ui' ), ('1053', '1021', 'Profile Anomaly' , 'Unexpected US States', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ), ('1054', '1022', 'Profile Anomaly' , 'Unexpected Emails', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ), - ('1055', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'redshift', NULL, 'SELECT A.* FROM ( SELECT TOP 10 DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM ( SELECT TOP 10 DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), + ('1055', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'redshift', NULL, 'SELECT A.* FROM ( SELECT TOP 10 DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM ( SELECT TOP 10 DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), ('1056', '1024', 'Profile Anomaly' , 'Invalid_Zip3_USA', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') <> ''999'' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}" LIMIT 500;'), ('1057', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" ~ ''^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'' AND "{COLUMN_NAME}" !~ ''\\s(and|but|or|yet)\\s'' GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;' ), @@ -469,8 +469,8 @@ VALUES ('1065', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'postgresql', NULL, 'SELECT column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY columns.table_name;' ), ('1066', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), ('1067', '1010', 'Profile Anomaly' , 'Quoted_Values', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), - ('1068', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'postgresql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS B ORDER BY data_type, count DESC;' ), - ('1069', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'postgresql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10) AS B ORDER BY data_type, count DESC;' ), + ('1068', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'postgresql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS B ORDER BY data_type, count DESC;' ), + ('1069', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'postgresql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_DATE;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_DATE;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10) AS B ORDER BY data_type, count DESC;' ), ('1070', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}") SIMILAR TO ''(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), ('1071', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ), ('1072', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ), @@ -481,7 +481,7 @@ VALUES ('1077', '1020', 'Profile Anomaly' , 'Recency_Six_Months', 'postgresql', NULL, 'created_in_ui' ), ('1078', '1021', 'Profile Anomaly' , 'Unexpected US States', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ), ('1079', '1022', 'Profile Anomaly' , 'Unexpected Emails', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ), - ('1080', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'postgresql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS B ORDER BY data_type, count DESC;' ), + ('1080', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'postgresql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS B ORDER BY data_type, count DESC;' ), ('1081', '1024', 'Profile Anomaly' , 'Invalid_Zip3_USA', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') <> ''999'' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}" LIMIT 500;'), ('1082', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" ~ ''^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'' AND "{COLUMN_NAME}" !~ ''\s(and|but|or|yet)\s'' GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;' ), @@ -507,7 +507,7 @@ VALUES ('1101', '1024', 'Test Results', 'Outlier_Pct_Above', 'postgresql', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: FLOAT > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'), ('1102', '1025', 'Test Results', 'Outlier_Pct_Below', 'postgresql', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" :: FLOAT < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'), ('1103', '1026', 'Test Results', 'Pattern_Match', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT SIMILAR TO ''{BASELINE_VALUE}'' GROUP BY "{COLUMN_NAME}";'), - ('1104', '1028', 'Test Results', 'Recency', 'postgresql', NULL, 'SELECT DISTINCT col AS latest_date_available, ''{TEST_DATE}'' :: DATE as test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE {DATA_QC_SCHEMA}.DATEDIFF(''day'', col, ''{TEST_DATE}''::DATE) > {THRESHOLD_VALUE};'), + ('1104', '1028', 'Test Results', 'Recency', 'postgresql', NULL, 'SELECT DISTINCT col AS latest_date_available, ''{TEST_DATE}'' :: DATE as test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE {{DKFN_DATEDIFF_DAY;;col;;''{TEST_DATE}''::DATE}} > {THRESHOLD_VALUE};'), ('1105', '1030', 'Test Results', 'Required', 'postgresql', NULL, 'SELECT * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL LIMIT 500;'), ('1106', '1031', 'Test Results', 'Row_Ct', 'postgresql', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, ABS(ROUND(100 * (current_count - {THRESHOLD_VALUE}) :: NUMERIC / {THRESHOLD_VALUE} :: NUMERIC,2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE};'), ('1107', '1032', 'Test Results', 'Row_Ct_Pct', 'postgresql', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, {BASELINE_CT} AS baseline_count, ABS(ROUND(100 * (current_count - {BASELINE_CT}) :: NUMERIC / {BASELINE_CT} :: NUMERIC,2)) AS row_count_pct_difference FROM cte;'), @@ -529,8 +529,8 @@ VALUES ('1122', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'mssql', NULL, 'SELECT TOP 500 column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY table_name;' ), ('1123', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), ('1124', '1010', 'Profile Anomaly' , 'Quoted_Values', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" LIKE ''"%"'' OR "{COLUMN_NAME}" LIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), - ('1125', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'mssql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), - ('1126', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'mssql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT TOP 10 ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT TOP 10 ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), + ('1125', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'mssql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), + ('1126', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'mssql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT TOP 10 ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_DATE;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT TOP 10 ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_DATE;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), ('1127', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LEN("{COLUMN_NAME}") > 1 AND ( LOWER("{COLUMN_NAME}") LIKE ''%..%'' OR LOWER("{COLUMN_NAME}") LIKE ''%--%'' OR (LEN(REPLACE("{COLUMN_NAME}", ''0'', ''''))= 0 ) OR (LEN(REPLACE("{COLUMN_NAME}", ''9'', ''''))= 0 ) OR (LEN(REPLACE(LOWER("{COLUMN_NAME}"), ''x'', ''''))= 0 ) OR (LEN(REPLACE(LOWER("{COLUMN_NAME}"), ''z'', ''''))= 0 ) ) THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), ('1128', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ), ('1129', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ), @@ -541,7 +541,7 @@ VALUES ('1134', '1020', 'Profile Anomaly' , 'Recency_Six_Months', 'mssql', NULL, 'created_in_ui' ), ('1135', '1021', 'Profile Anomaly' , 'Unexpected US States', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;' ), ('1136', '1022', 'Profile Anomaly' , 'Unexpected Emails', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;' ), - ('1137', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'mssql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), + ('1137', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'mssql', NULL, 'SELECT A.* FROM ( SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS A UNION ALL SELECT B.* FROM ( SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS B ORDER BY data_type, count DESC;' ), ('1138', '1024', 'Profile Anomaly' , 'Invalid_Zip3_USA', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') <> ''999'' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}";'), ('1139', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE ( "{COLUMN_NAME}" LIKE ''%,%,%,%'' OR "{COLUMN_NAME}" LIKE ''%|%|%|%'' OR "{COLUMN_NAME}" LIKE ''%^%^%^%'' OR "{COLUMN_NAME}" LIKE ''%'' + CHAR(9) + ''%'' + CHAR(9) + ''%'' + CHAR(9) + ''%'' ) AND NOT ( "{COLUMN_NAME}" LIKE ''% and %'' OR "{COLUMN_NAME}" LIKE ''% but %'' OR "{COLUMN_NAME}" LIKE ''% or %'' OR "{COLUMN_NAME}" LIKE ''% yet %'' ) AND ISNULL(CAST(LEN("{COLUMN_NAME}") - LEN(REPLACE("{COLUMN_NAME}", '','', '''')) as FLOAT) / CAST(NULLIF(LEN("{COLUMN_NAME}") - LEN(REPLACE("{COLUMN_NAME}", '' '', '''')), 0) as FLOAT), 1) > 0.6 GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ), @@ -708,8 +708,8 @@ ORDER BY check_period DESC;'), ('1179', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'snowflake', NULL, 'SELECT DISTINCT column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY table_name; ' ), ('1180', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), ('1181', '1010', 'Profile Anomaly' , 'Quoted_Values', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), - ('1182', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ), - ('1183', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isdate("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ), + ('1182', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ), + ('1183', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_DATE;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_DATE;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ), ('1184', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''-{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''0{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''9{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''x{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''z{2,}'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ), ('1185', '1014', 'Profile Anomaly' , 'Small Divergent Value Ct', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ), ('1186', '1015', 'Profile Anomaly' , 'Boolean_Value_Mismatch', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC;' ), @@ -720,7 +720,7 @@ ORDER BY check_period DESC;'), ('1191', '1020', 'Profile Anomaly' , 'Recency_Six_Months', 'snowflake', NULL, 'created_in_ui' ), ('1192', '1021', 'Profile Anomaly' , 'Unexpected US States', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ), ('1193', '1022', 'Profile Anomaly' , 'Unexpected Emails', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;' ), - ('1194', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {DATA_QC_SCHEMA}.fndk_isnum("{COLUMN_NAME}") != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ), + ('1194', '1023', 'Profile Anomaly' , 'Small_Numeric_Value_Ct', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE {{DKFN_IS_NUM;;"{COLUMN_NAME}"}} != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ), ('1195', '1024', 'Profile Anomaly' , 'Invalid_Zip3_USA', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE TRANSLATE("{COLUMN_NAME}",''012345678'',''999999999'') <> ''999'' GROUP BY "{COLUMN_NAME}" ORDER BY count DESC, "{COLUMN_NAME}" LIMIT 500;'), ('1196', '1025', 'Profile Anomaly' , 'Delimited_Data_Embedded', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, ''^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'') AND NOT REGEXP_LIKE("{COLUMN_NAME}"::VARCHAR, ''.*\\s(and|but|or|yet)\\s.*'') GROUP BY "{COLUMN_NAME}" ORDER BY COUNT(*) DESC LIMIT 500;' ), diff --git a/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml b/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml index 5ebda4a..5c5e433 100644 --- a/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +++ b/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml @@ -57,8 +57,8 @@ strTemplate05_A: COUNT(DISTINCT UPPER(REPLACE(TRANSLATE("{COL_NAME}",' '''',.-', END ) AS filled_value_ct, LEFT(MIN(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS min_text, LEFT(MAX(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS max_text, - SUM({DATA_QC_SCHEMA}.fndk_isnum(LEFT("{COL_NAME}", 31))) AS numeric_ct, - SUM({DATA_QC_SCHEMA}.fndk_isdate(LEFT("{COL_NAME}", 26))) AS date_ct, + SUM({{DKFN_IS_NUM;;LEFT("{COL_NAME}", 31)}} ) AS numeric_ct, + SUM({{DKFN_IS_DATE;;LEFT("{COL_NAME}", 26)}} ) AS date_ct, CASE WHEN CAST(SUM( CASE WHEN UPPER("{COL_NAME}") LIKE '[1-9]% [A-Z]% %' AND CHARINDEX(' ', "{COL_NAME}") BETWEEN 2 and 6 THEN 1 diff --git a/testgen/template/flavors/mssql/profiling/templated_functions.yaml b/testgen/template/flavors/mssql/profiling/templated_functions.yaml new file mode 100644 index 0000000..86d064b --- /dev/null +++ b/testgen/template/flavors/mssql/profiling/templated_functions.yaml @@ -0,0 +1,46 @@ +IS_NUM: CASE + WHEN TRY_CAST(NULLIF({$1}, '') AS float) IS NOT NULL THEN 1 + ELSE 0 + END + +IS_DATE: CASE WHEN TRY_CAST(NULLIF({$1}, '') AS float) IS NOT NULL + AND LEFT(NULLIF({$1}, ''),4) BETWEEN 1800 AND 2200 THEN + CASE + WHEN LEN((NULLIF({$1}, ''))) > 11 THEN 0 + /* YYYYMMDD */ + WHEN TRY_CONVERT(DATE, NULLIF({$1}, ''), 112) IS NOT NULL THEN 1 + + /* YYYY-MM-DD */ + WHEN TRY_CONVERT(DATE, NULLIF({$1}, ''), 23) IS NOT NULL THEN 1 + + /* MM/DD/YYYY */ + WHEN TRY_CONVERT(DATE, NULLIF({$1}, ''), 101) IS NOT NULL THEN 1 + + /* MM/DD/YY */ + WHEN TRY_CONVERT(DATE, NULLIF({$1}, ''), 1) IS NOT NULL THEN 1 + + /*MM-DD-YYYY */ + WHEN TRY_CONVERT(DATE, NULLIF({$1}, ''), 110) IS NOT NULL THEN 1 + + /*MM-DD-YY */ + WHEN TRY_CONVERT(DATE, NULLIF({$1}, ''), 10) IS NOT NULL THEN 1 + + + ELSE 0 END + /*DD MMM YYYY */ + WHEN (TRY_CONVERT(DATE, NULLIF({$1}, ''), 106) IS NOT NULL + AND LEFT(NULLIF({$1}, ''), 4) BETWEEN 1800 AND 2200) + THEN 1 + + /* YYYY-MM-DD HH:MM:SS SSSSSS */ + WHEN (TRY_CONVERT(DATETIME2, NULLIF({$1}, ''), 121) IS NOT NULL + AND LEFT(NULLIF({$1}, ''), 4) BETWEEN 1800 AND 2200) + THEN 1 + + /* YYYY-MM-DD HH:MM:SS */ + WHEN (TRY_CONVERT(DATETIME2, NULLIF({$1}, ''), 120) IS NOT NULL + AND LEFT(NULLIF({$1}, ''), 4) BETWEEN 1800 AND 2200) + THEN 1 + ELSE 0 + END + diff --git a/testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql b/testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql deleted file mode 100644 index ff358ce..0000000 --- a/testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +++ /dev/null @@ -1,8 +0,0 @@ --- Step 1: Drop both functions if they exist -BEGIN - IF OBJECT_ID('{DATA_QC_SCHEMA}.fndk_isnum', 'FN') IS NOT NULL - DROP FUNCTION {DATA_QC_SCHEMA}.fndk_isnum; - - IF OBJECT_ID('{DATA_QC_SCHEMA}.fndk_isdate', 'FN') IS NOT NULL - DROP FUNCTION {DATA_QC_SCHEMA}.fndk_isdate; -END diff --git a/testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql b/testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql deleted file mode 100644 index 1547fe5..0000000 --- a/testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +++ /dev/null @@ -1,12 +0,0 @@ --- Step 2: Create isnum function -CREATE FUNCTION {DATA_QC_SCHEMA}.fndk_isnum (@strparm VARCHAR(500)) -RETURNS INT -AS -BEGIN - IF TRY_CAST(NULLIF(@strparm, '') AS float) IS NOT NULL - BEGIN - RETURN(1) - END - - RETURN(0) -END; diff --git a/testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql b/testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql deleted file mode 100644 index 874938f..0000000 --- a/testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +++ /dev/null @@ -1,54 +0,0 @@ --- Step 3: Create isdate function - -CREATE FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(@strparm VARCHAR(500)) - RETURNS INT -AS -BEGIN - DECLARE @ret INT - - SET @ret = - - CASE WHEN TRY_CAST(NULLIF(@strparm, '') AS float) IS NOT NULL - AND LEFT(NULLIF(@strparm, ''),4) BETWEEN 1800 AND 2200 THEN - CASE - WHEN LEN((NULLIF(@strparm, ''))) > 11 THEN 0 - -- YYYYMMDD - WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 112) IS NOT NULL THEN 1 - - -- YYYY-MM-DD - WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 23) IS NOT NULL THEN 1 - - -- MM/DD/YYYY - WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 101) IS NOT NULL THEN 1 - - -- MM/DD/YY - WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 1) IS NOT NULL THEN 1 - - --MM-DD-YYYY - WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 110) IS NOT NULL THEN 1 - - --MM-DD-YY - WHEN TRY_CONVERT(DATE, NULLIF(@strparm, ''), 10) IS NOT NULL THEN 1 - - - ELSE 0 END - --DD MMM YYYY - WHEN (TRY_CONVERT(DATE, NULLIF(@strparm, ''), 106) IS NOT NULL - AND LEFT(NULLIF(@strparm, ''), 4) BETWEEN 1800 AND 2200) - THEN 1 - - -- YYYY-MM-DD HH:MM:SS SSSSSS - WHEN (TRY_CONVERT(DATETIME2, NULLIF(@strparm, ''), 121) IS NOT NULL - AND LEFT(NULLIF(@strparm, ''), 4) BETWEEN 1800 AND 2200) - THEN 1 - - -- YYYY-MM-DD HH:MM:SS - WHEN (TRY_CONVERT(DATETIME2, NULLIF(@strparm, ''), 120) IS NOT NULL - AND LEFT(NULLIF(@strparm, ''), 4) BETWEEN 1800 AND 2200) - THEN 1 - ELSE 0 - END - RETURN @ret - -END -; diff --git a/testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql b/testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql deleted file mode 100644 index 5bd4d06..0000000 --- a/testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +++ /dev/null @@ -1,4 +0,0 @@ -IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = '{DATA_QC_SCHEMA}') -BEGIN - EXEC('CREATE SCHEMA {DATA_QC_SCHEMA}') -END diff --git a/testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql b/testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql deleted file mode 100644 index 22b4576..0000000 --- a/testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +++ /dev/null @@ -1 +0,0 @@ -GRANT EXECUTE ON SCHEMA::{DATA_QC_SCHEMA} TO {DB_USER}; diff --git a/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml b/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml index db02274..e32c609 100644 --- a/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +++ b/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml @@ -51,8 +51,8 @@ strTemplate05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a END ) AS filled_value_ct, LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text, LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text, - SUM({DATA_QC_SCHEMA}.fndk_isnum(LEFT("{COL_NAME}", 31))) AS numeric_ct, - SUM({DATA_QC_SCHEMA}.fndk_isdate(LEFT("{COL_NAME}", 26))) AS date_ct, + SUM({{DKFN_IS_NUM;;LEFT("{COL_NAME}", 31)}} ) AS numeric_ct, + SUM({{DKFN_IS_DATE;;LEFT("{COL_NAME}", 26)}} ) AS date_ct, CASE WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[0-9]{1,5}[a-zA-Z]?\s\w{1,5}\.?\s?\w*\s?\w*\s[a-zA-Z]{1,6}\.?\s?[0-9]{0,5}[A-Z]{0,1}$' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'STREET_ADDR' @@ -142,31 +142,31 @@ strTemplate11_D: CASE END as min_date, MAX("{COL_NAME}") as max_date, SUM(CASE - WHEN {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 12 THEN 1 + WHEN {{DKFN_DATEDIFF_MONTH;;"{COL_NAME}";;'{RUN_DATE}'}} > 12 THEN 1 ELSE 0 END) AS before_1yr_date_ct, SUM(CASE - WHEN {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 60 THEN 1 + WHEN {{DKFN_DATEDIFF_MONTH;;"{COL_NAME}";;'{RUN_DATE}'}} > 60 THEN 1 ELSE 0 END) AS before_5yr_date_ct, SUM(CASE - WHEN {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1 + WHEN {{DKFN_DATEDIFF_MONTH;;"{COL_NAME}";;'{RUN_DATE}'}} > 240 THEN 1 ELSE 0 END) AS before_20yr_date_ct, SUM(CASE - WHEN {DATA_QC_SCHEMA}.DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 + WHEN {{DKFN_DATEDIFF_DAY;;"{COL_NAME}";;'{RUN_DATE}'}} BETWEEN 0 AND 365 THEN 1 ELSE 0 END) AS within_1yr_date_ct, SUM(CASE - WHEN {DATA_QC_SCHEMA}.DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1 + WHEN {{DKFN_DATEDIFF_DAY;;"{COL_NAME}";;'{RUN_DATE}'}} BETWEEN 0 AND 30 THEN 1 ELSE 0 END) AS within_1mo_date_ct, SUM(CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0 END) AS future_date_ct, - COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present, - COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF('WEEK', "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present, - COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present, + COUNT(DISTINCT {{DKFN_DATEDIFF_DAY;;"{COL_NAME}";;'{RUN_DATE}'}} ) as date_days_present, + COUNT(DISTINCT {{DKFN_DATEDIFF_WEEK;;"{COL_NAME}";;'{RUN_DATE}'}} ) as date_weeks_present, + COUNT(DISTINCT {{DKFN_DATEDIFF_MONTH;;"{COL_NAME}";;'{RUN_DATE}'}} ) as date_months_present, strTemplate11_else: NULL as min_date, diff --git a/testgen/template/flavors/postgresql/profiling/templated_functions.yaml b/testgen/template/flavors/postgresql/profiling/templated_functions.yaml new file mode 100644 index 0000000..cf9d854 --- /dev/null +++ b/testgen/template/flavors/postgresql/profiling/templated_functions.yaml @@ -0,0 +1,109 @@ +DATEDIFF_DAY: DATE({$2}) - DATE({$1}) + +DATEDIFF_WEEK: (DATE({$2}) - DATE({$1})) / 7 + +DATEDIFF_MONTH: (DATE_PART('year', {$2}::TIMESTAMP) - DATE_PART('year', {$1}::TIMESTAMP)) * 12 + (DATE_PART('month', {$2}::TIMESTAMP) - DATE_PART('month', {$1}::TIMESTAMP)) + +DATEDIFF_QUARTER: ((DATE_PART('year', {$2}::TIMESTAMP) - DATE_PART('year', {$1}::TIMESTAMP)) * 4) + (DATE_PART('quarter', {$2}::TIMESTAMP) - DATE_PART('quarter', {$1}::TIMESTAMP)) + +DATEDIFF_YEAR: DATE_PART('year', {$2}::TIMESTAMP) - DATE_PART('year', {$1}::TIMESTAMP) + +IS_NUM: CASE + WHEN {$1} ~ E'^\\s*[+-]?\\$?\\s*[0-9]+(,[0-9]{3})*(\\.[0-9]*)?[\\%]?\\s*$' THEN 1 + ELSE 0 + END + +IS_DATE: CASE + /* YYYY-MM-DD HH:MM:SS SSSSSS or YYYY-MM-DD HH:MM:SS */ + WHEN {$1} ~ '^(\\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])\\s(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\s[0-9]{6})?$' + THEN CASE + WHEN LEFT({$1}, 4)::INT BETWEEN 1800 AND 2200 + AND ( + ( SUBSTRING ({$1}, 6, 2) IN ('01', '03', '05', '07', '08', + '10', '12') + AND SUBSTRING ({$1}, 9, 2)::INT BETWEEN 1 AND 31 ) + OR ( SUBSTRING ({$1}, 6, 2) IN ('04', '06', '09') + AND SUBSTRING ({$1}, 9, 2)::INT BETWEEN 1 AND 30 ) + OR ( SUBSTRING ({$1}, 6, 2) = '02' + AND SUBSTRING ({$1}, 9, 2)::INT ::INT BETWEEN 1 AND 29) + ) + THEN 1 + ELSE 0 + END + /* YYYYMMDDHHMMSSSSSS or YYYYMMDD */ + WHEN {$1} ~ '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])([0-5][0-9])([0-5][0-9])([0-9]{6})$' + OR {$1} ~ '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])$' + THEN CASE + WHEN LEFT({$1}, 4)::INT BETWEEN 1800 AND 2200 + AND ( + ( SUBSTRING({$1}, 5, 2) IN ('01', '03', '05', '07', '08', + '10', '12') + AND SUBSTRING({$1}, 7, 2)::INT BETWEEN 1 AND 31 ) + OR ( SUBSTRING({$1}, 5, 2) IN ('04', '06', '09') + AND SUBSTRING({$1}, 7, 2)::INT BETWEEN 1 AND 30 ) + OR ( SUBSTRING({$1}, 5, 2) = '02' + AND SUBSTRING({$1}, 7, 2)::INT::INT BETWEEN 1 AND 29) + ) + THEN 1 + ELSE 0 + END + /* Exclude anything else long */ + WHEN LENGTH({$1}) > 11 THEN 0 + /* YYYY-MMM/MM-DD */ + WHEN REGEXP_REPLACE(UPPER({$1}), '(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)', '12', 'g') + ~ '[12][09][0-9][0-9]-[0-1]?[0-9]-[0-3]?[0-9]' + THEN CASE + WHEN SPLIT_PART({$1}, '-', 1)::INT BETWEEN 1800 AND 2200 + AND ( + ( UPPER(SPLIT_PART({$1}, '-', 2)) IN ('01', '03', '05', '07', '08', + '1', '3', '5', '7', '8', '10', '12', + 'JAN', 'MAR', 'MAY', 'JUL', 'AUG', + 'OCT', 'DEC') + AND SPLIT_PART({$1}, '-', 3)::INT BETWEEN 1 AND 31 ) + OR ( UPPER(SPLIT_PART({$1}, '-', 2)) IN ('04', '06', '09', '4', '6', '9', '11', + 'APR', 'JUN', 'SEP', 'NOV') + AND SPLIT_PART({$1}, '-', 3)::INT BETWEEN 1 AND 30 ) + OR ( UPPER(SPLIT_PART({$1}, '-', 2)) IN ('02', '2', 'FEB') + AND SPLIT_PART({$1}, '-', 3)::INT BETWEEN 1 AND 29) + ) + THEN 1 + ELSE 0 + END + /* MM/-DD/-YY/YYYY */ + WHEN REPLACE({$1}, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[12][09][0-9][0-9]$' + OR REPLACE({$1}, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[0-9][0-9]$' + THEN + CASE + WHEN SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 1)::INT BETWEEN 1 AND 12 + AND ( + ( SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 1)::INT IN (1, 3, 5, 7, 8, 10, 12) + AND SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 2)::INT BETWEEN 1 AND 31 ) + OR ( SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 1)::INT IN (4, 6, 9, 11) + AND SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 2)::INT BETWEEN 1 AND 30 ) + OR ( SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 1)::INT = 2 + AND SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 2)::INT BETWEEN 1 AND 29) + ) + AND + ('20' || RIGHT(SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 3), 2))::INT BETWEEN 1800 AND 2200 + THEN 1 + ELSE 0 + END + /* DD-MMM-YYYY */ + WHEN UPPER({$1}) ~ '[0-3]?[0-9]-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-[12][09][0-9][0-9]' + THEN + CASE + WHEN SPLIT_PART({$1}, '-', 3)::INT BETWEEN 1800 AND 2200 + AND ( + ( UPPER(SPLIT_PART({$1}, '-', 2)) IN ('JAN', 'MAR', 'MAY', 'JUL', 'AUG', 'OCT', 'DEC') + AND SPLIT_PART({$1}, '-', 1)::INT BETWEEN 1 AND 31 ) + OR ( UPPER(SPLIT_PART({$1}, '-', 2)) IN ('APR', 'JUN', 'SEP', 'NOV') + AND SPLIT_PART({$1}, '-', 1)::INT BETWEEN 1 AND 30 ) + OR ( UPPER(SPLIT_PART({$1}, '-', 2)) = 'FEB' + AND SPLIT_PART({$1}, '-', 1)::INT BETWEEN 1 AND 29) + ) + THEN 1 + ELSE 0 + END + ELSE 0 + END + diff --git a/testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql b/testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql deleted file mode 100644 index cff460f..0000000 --- a/testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +++ /dev/null @@ -1,157 +0,0 @@ -CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.DATEDIFF(difftype character varying, firstdate timestamp without time zone, seconddate timestamp without time zone) -RETURNS BIGINT AS $$ - SELECT - CASE - WHEN UPPER(difftype) IN ('DAY', 'DD', 'D') THEN - DATE(seconddate) - DATE(firstdate) - WHEN UPPER(difftype) IN ('WEEK','WK', 'W') THEN - (DATE(seconddate) - DATE(firstdate)) / 7 - WHEN UPPER(difftype) IN ('MON', 'MONTH', 'MM') THEN - (DATE_PART('year', seconddate) - DATE_PART('year', firstdate)) * 12 + (DATE_PART('month', seconddate) - DATE_PART('month', firstdate)) - WHEN UPPER(difftype) IN ('QUARTER', 'QTR', 'Q') THEN - ((DATE_PART('year', seconddate) - DATE_PART('year', firstdate)) * 4) + (DATE_PART('quarter', seconddate) - DATE_PART('quarter', firstdate)) - WHEN UPPER(difftype) IN ('YEAR', 'YY', 'Y') THEN - DATE_PART('year', seconddate) - DATE_PART('year', firstdate) - ELSE - NULL::BIGINT - END; -$$ LANGUAGE sql IMMUTABLE STRICT; - -CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fn_charcount(instring character varying, searchstring character varying) returns bigint - language plpgsql -as -$$ - BEGIN - RETURN (CHAR_LENGTH(instring) - CHAR_LENGTH(REPLACE(instring, searchstring, ''))) / CHAR_LENGTH(searchstring); - END; -$$; - - -CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fn_parsefreq(top_freq_values VARCHAR(1000), rowno INTEGER, colno INTEGER) returns VARCHAR(1000) - language plpgsql -as -$$ - BEGIN - RETURN SPLIT_PART(SPLIT_PART(top_freq_values, CHR(10), rowno), '|', colno+1); - END; -$$; - - -CREATE -OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isnum(VARCHAR) - RETURNS INTEGER - IMMUTABLE - AS - $$ -SELECT CASE - WHEN $1 ~ E'^\\s*[+-]?\\$?\\s*[0-9]+(,[0-9]{3})*(\\.[0-9]*)?[\\%]?\\s*$' THEN 1 - ELSE 0 - END; -$$ -LANGUAGE sql; - - - - - -CREATE -OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(VARCHAR) - RETURNS INTEGER - IMMUTABLE - AS $$ -SELECT CASE - -- YYYY-MM-DD HH:MM:SS SSSSSS or YYYY-MM-DD HH:MM:SS - WHEN $1 ~ '^(\\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])\\s(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\s[0-9]{6})?$' - THEN CASE - WHEN LEFT($1, 4):: INT BETWEEN 1800 AND 2200 - AND ( - ( SUBSTRING ($1, 6, 2) IN ('01', '03', '05', '07', '08', - '10', '12') - AND SUBSTRING ($1, 9, 2):: INT BETWEEN 1 AND 31 ) - OR ( SUBSTRING ($1, 6, 2) IN ('04', '06', '09') - AND SUBSTRING ($1, 9, 2):: INT BETWEEN 1 AND 30 ) - OR ( SUBSTRING ($1, 6, 2) = '02' - AND SUBSTRING ($1, 9, 2):: INT :: INT BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 -END - -- YYYYMMDDHHMMSSSSSS or YYYYMMDD -WHEN $1 ~ '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])([0-5][0-9])([0-5][0-9])([0-9]{6})$' - OR $1 ~ '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])$' - THEN CASE - WHEN LEFT($1, 4)::INT BETWEEN 1800 AND 2200 - AND ( - ( SUBSTRING($1, 5, 2) IN ('01', '03', '05', '07', '08', - '10', '12') - AND SUBSTRING($1, 7, 2)::INT BETWEEN 1 AND 31 ) - OR ( SUBSTRING($1, 5, 2) IN ('04', '06', '09') - AND SUBSTRING($1, 7, 2)::INT BETWEEN 1 AND 30 ) - OR ( SUBSTRING($1, 5, 2) = '02' - AND SUBSTRING($1, 7, 2)::INT::INT BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 -END - -- Exclude anything else long -WHEN LENGTH($1) > 11 THEN 0 - -- YYYY-MMM/MM-DD - WHEN REGEXP_REPLACE(UPPER($1), '(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)', '12', 'g') - ~ '[12][09][0-9][0-9]-[0-1]?[0-9]-[0-3]?[0-9]' - THEN CASE - WHEN SPLIT_PART($1, '-', 1)::INT BETWEEN 1800 AND 2200 - AND ( - ( UPPER(SPLIT_PART($1, '-', 2)) IN ('01', '03', '05', '07', '08', - '1', '3', '5', '7', '8', '10', '12', - 'JAN', 'MAR', 'MAY', 'JUL', 'AUG', - 'OCT', 'DEC') - AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 31 ) - OR ( UPPER(SPLIT_PART($1, '-', 2)) IN ('04', '06', '09', '4', '6', '9', '11', - 'APR', 'JUN', 'SEP', 'NOV') - AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 30 ) - OR ( UPPER(SPLIT_PART($1, '-', 2)) IN ('02', '2', 'FEB') - AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 -END - -- MM/-DD/-YY/YYYY -WHEN REPLACE($1, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[12][09][0-9][0-9]$' - OR REPLACE($1, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[0-9][0-9]$' - THEN - CASE - WHEN SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT BETWEEN 1 AND 12 - AND ( - ( SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT IN (1, 3, 5, 7, 8, 10, 12) - AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 31 ) - OR ( SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT IN (4, 6, 9, 11) - AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 30 ) - OR ( SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT = 2 - AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 29) - ) - AND - ('20' || RIGHT(SPLIT_PART(REPLACE($1, '-', '/'), '/', 3), 2))::INT BETWEEN 1800 AND 2200 - THEN 1 - ELSE 0 -END - -- DD-MMM-YYYY -WHEN UPPER($1) ~ '[0-3]?[0-9]-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-[12][09][0-9][0-9]' - THEN - CASE - WHEN SPLIT_PART($1, '-', 3)::INT BETWEEN 1800 AND 2200 - AND ( - ( UPPER(SPLIT_PART($1, '-', 2)) IN ('JAN', 'MAR', 'MAY', 'JUL', 'AUG', 'OCT', 'DEC') - AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 31 ) - OR ( UPPER(SPLIT_PART($1, '-', 2)) IN ('APR', 'JUN', 'SEP', 'NOV') - AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 30 ) - OR ( UPPER(SPLIT_PART($1, '-', 2)) = 'FEB' - AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 -END -ELSE 0 -END -as isdate - $$ - LANGUAGE sql; diff --git a/testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql b/testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql deleted file mode 100644 index 4cd79fe..0000000 --- a/testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +++ /dev/null @@ -1 +0,0 @@ -CREATE SCHEMA IF NOT exists {DATA_QC_SCHEMA}; diff --git a/testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql b/testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql deleted file mode 100644 index ac6d077..0000000 --- a/testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +++ /dev/null @@ -1,2 +0,0 @@ -GRANT ALL PRIVILEGES ON SCHEMA {DATA_QC_SCHEMA} TO {DB_USER}; -GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA {DATA_QC_SCHEMA} TO {DB_USER}; diff --git a/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml b/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml index 8856fb2..b876a4d 100644 --- a/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +++ b/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml @@ -51,8 +51,8 @@ strTemplate05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a END ) AS filled_value_ct, LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text, LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text, - SUM({DATA_QC_SCHEMA}.fndk_isnum(LEFT("{COL_NAME}", 31))) AS numeric_ct, - SUM({DATA_QC_SCHEMA}.fndk_isdate(LEFT("{COL_NAME}", 26))) AS date_ct, + SUM({{DKFN_IS_NUM;;LEFT("{COL_NAME}", 31)}} ) AS numeric_ct, + SUM({{DKFN_IS_DATE;;LEFT("{COL_NAME}", 26)}} ) AS date_ct, CASE WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'STREET_ADDR' diff --git a/testgen/template/flavors/redshift/profiling/templated_functions.yaml b/testgen/template/flavors/redshift/profiling/templated_functions.yaml new file mode 100644 index 0000000..4953e25 --- /dev/null +++ b/testgen/template/flavors/redshift/profiling/templated_functions.yaml @@ -0,0 +1,101 @@ +IS_NUM: CASE + WHEN {$1} ~ '^\\s*[+-]?\\$?\\s*[0-9]+(,[0-9]{3})*(\\.[0-9]*)?[\\%]?\\s*$' THEN 1 + ELSE 0 + END + +IS_DATE: CASE + /* YYYY-MM-DD HH:MM:SS SSSSSS or YYYY-MM-DD HH:MM:SS */ + WHEN {$1} ~ + '^(\\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])\\s(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\s[0-9]{6})?$' + THEN CASE + WHEN LEFT({$1}, 4)::INT BETWEEN 1800 AND 2200 + AND ( + (SUBSTRING({$1}, 6, 2) IN ('01', '03', '05', '07', '08', + '10', '12') + AND SUBSTRING({$1}, 9, 2)::INT BETWEEN 1 AND 31) + OR (SUBSTRING({$1}, 6, 2) IN ('04', '06', '09') + AND SUBSTRING({$1}, 9, 2)::INT BETWEEN 1 AND 30) + OR (SUBSTRING({$1}, 6, 2) = '02' + AND SUBSTRING({$1}, 9, 2)::INT ::INT BETWEEN 1 AND 29) + ) + THEN 1 + ELSE 0 + END + /* YYYYMMDDHHMMSSSSSS or YYYYMMDD */ + WHEN {$1} ~ + '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])([0-5][0-9])([0-5][0-9])([0-9]{6})$' + OR {$1} ~ '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])$' + THEN CASE + WHEN LEFT({$1}, 4)::INT BETWEEN 1800 AND 2200 + AND ( + (SUBSTRING({$1}, 5, 2) IN ('01', '03', '05', '07', '08', + '10', '12') + AND SUBSTRING({$1}, 7, 2)::INT BETWEEN 1 AND 31) + OR (SUBSTRING({$1}, 5, 2) IN ('04', '06', '09') + AND SUBSTRING({$1}, 7, 2)::INT BETWEEN 1 AND 30) + OR (SUBSTRING({$1}, 5, 2) = '02' + AND SUBSTRING({$1}, 7, 2)::INT::INT BETWEEN 1 AND 29) + ) + THEN 1 + ELSE 0 + END + /* Exclude anything else long */ + WHEN LENGTH({$1}) > 11 THEN 0 + /* YYYY-MMM/MM-DD */ + WHEN REGEXP_REPLACE(UPPER({$1}), '(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)', '12') + ~ '[12][09][0-9][0-9]-[0-1]?[0-9]-[0-3]?[0-9]' + THEN CASE + WHEN SPLIT_PART({$1}, '-', 1)::INT BETWEEN 1800 AND 2200 + AND ( + (UPPER(SPLIT_PART({$1}, '-', 2)) IN ('01', '03', '05', '07', '08', + '1', '3', '5', '7', '8', '10', '12', + 'JAN', 'MAR', 'MAY', 'JUL', 'AUG', + 'OCT', 'DEC') + AND SPLIT_PART({$1}, '-', 3)::INT BETWEEN 1 AND 31) + OR (UPPER(SPLIT_PART({$1}, '-', 2)) IN ('04', '06', '09', '4', '6', '9', '11', + 'APR', 'JUN', 'SEP', 'NOV') + AND SPLIT_PART({$1}, '-', 3)::INT BETWEEN 1 AND 30) + OR (UPPER(SPLIT_PART({$1}, '-', 2)) IN ('02', '2', 'FEB') + AND SPLIT_PART({$1}, '-', 3)::INT BETWEEN 1 AND 29) + ) + THEN 1 + ELSE 0 + END + /* MM/-DD/-YY/YYYY */ + WHEN REPLACE({$1}, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[12][09][0-9][0-9]$' + OR REPLACE({$1}, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[0-9][0-9]$' + THEN + CASE + WHEN SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 1)::INT BETWEEN 1 AND 12 + AND ( + (SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 1)::INT IN (1, 3, 5, 7, 8, 10, 12) + AND SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 2)::INT BETWEEN 1 AND 31) + OR (SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 1)::INT IN (4, 6, 9, 11) + AND SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 2)::INT BETWEEN 1 AND 30) + OR (SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 1)::INT = 2 + AND SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 2)::INT BETWEEN 1 AND 29) + ) + AND + ('20' + RIGHT(SPLIT_PART(REPLACE({$1}, '-', '/'), '/', 3), 2))::INT BETWEEN 1800 AND 2200 + THEN 1 + ELSE 0 + END + /* DD-MMM-YYYY */ + WHEN UPPER({$1}) ~ '[0-3]?[0-9]-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-[12][09][0-9][0-9]' + THEN + CASE + WHEN SPLIT_PART({$1}, '-', 3)::INT BETWEEN 1800 AND 2200 + AND ( + (UPPER(SPLIT_PART({$1}, '-', 2)) IN ('JAN', 'MAR', 'MAY', 'JUL', 'AUG', 'OCT', 'DEC') + AND SPLIT_PART({$1}, '-', 1)::INT BETWEEN 1 AND 31) + OR (UPPER(SPLIT_PART({$1}, '-', 2)) IN ('APR', 'JUN', 'SEP', 'NOV') + AND SPLIT_PART({$1}, '-', 1)::INT BETWEEN 1 AND 30) + OR (UPPER(SPLIT_PART({$1}, '-', 2)) = 'FEB' + AND SPLIT_PART({$1}, '-', 1)::INT BETWEEN 1 AND 29) + ) + THEN 1 + ELSE 0 + END + ELSE 0 + END + diff --git a/testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql b/testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql deleted file mode 100644 index 0270a38..0000000 --- a/testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +++ /dev/null @@ -1,115 +0,0 @@ -CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isnum(VARCHAR) - RETURNS INTEGER - IMMUTABLE - AS - $$ -SELECT CASE - WHEN $1 ~ '^\\s*[+-]?\\$?\\s*[0-9]+(,[0-9]{3})*(\\.[0-9]*)?[\\%]?\\s*$' THEN 1 - ELSE 0 - END; -$$ -LANGUAGE sql; - - -CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(VARCHAR) - RETURNS INTEGER - IMMUTABLE - AS $$ -SELECT CASE - -- YYYY-MM-DD HH:MM:SS SSSSSS or YYYY-MM-DD HH:MM:SS - WHEN $1 ~ - '^(\\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])\\s(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\s[0-9]{6})?$' - THEN CASE - WHEN LEFT($1, 4):: INT BETWEEN 1800 AND 2200 - AND ( - (SUBSTRING($1, 6, 2) IN ('01', '03', '05', '07', '08', - '10', '12') - AND SUBSTRING($1, 9, 2):: INT BETWEEN 1 AND 31) - OR (SUBSTRING($1, 6, 2) IN ('04', '06', '09') - AND SUBSTRING($1, 9, 2):: INT BETWEEN 1 AND 30) - OR (SUBSTRING($1, 6, 2) = '02' - AND SUBSTRING($1, 9, 2):: INT :: INT BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 - END - -- YYYYMMDDHHMMSSSSSS or YYYYMMDD - WHEN $1 ~ - '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])([0-5][0-9])([0-5][0-9])([0-9]{6})$' - OR $1 ~ '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])$' - THEN CASE - WHEN LEFT($1, 4)::INT BETWEEN 1800 AND 2200 - AND ( - (SUBSTRING($1, 5, 2) IN ('01', '03', '05', '07', '08', - '10', '12') - AND SUBSTRING($1, 7, 2)::INT BETWEEN 1 AND 31) - OR (SUBSTRING($1, 5, 2) IN ('04', '06', '09') - AND SUBSTRING($1, 7, 2)::INT BETWEEN 1 AND 30) - OR (SUBSTRING($1, 5, 2) = '02' - AND SUBSTRING($1, 7, 2)::INT::INT BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 - END - -- Exclude anything else long - WHEN LENGTH($1) > 11 THEN 0 - -- YYYY-MMM/MM-DD - WHEN REGEXP_REPLACE(UPPER($1), '(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)', '12') - ~ '[12][09][0-9][0-9]-[0-1]?[0-9]-[0-3]?[0-9]' - THEN CASE - WHEN SPLIT_PART($1, '-', 1)::INT BETWEEN 1800 AND 2200 - AND ( - (UPPER(SPLIT_PART($1, '-', 2)) IN ('01', '03', '05', '07', '08', - '1', '3', '5', '7', '8', '10', '12', - 'JAN', 'MAR', 'MAY', 'JUL', 'AUG', - 'OCT', 'DEC') - AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 31) - OR (UPPER(SPLIT_PART($1, '-', 2)) IN ('04', '06', '09', '4', '6', '9', '11', - 'APR', 'JUN', 'SEP', 'NOV') - AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 30) - OR (UPPER(SPLIT_PART($1, '-', 2)) IN ('02', '2', 'FEB') - AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 - END - -- MM/-DD/-YY/YYYY - WHEN REPLACE($1, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[12][09][0-9][0-9]$' - OR REPLACE($1, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[0-9][0-9]$' - THEN - CASE - WHEN SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT BETWEEN 1 AND 12 - AND ( - (SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT IN (1, 3, 5, 7, 8, 10, 12) - AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 31) - OR (SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT IN (4, 6, 9, 11) - AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 30) - OR (SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT = 2 - AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 29) - ) - AND - ('20' + RIGHT(SPLIT_PART(REPLACE($1, '-', '/'), '/', 3), 2))::INT BETWEEN 1800 AND 2200 - THEN 1 - ELSE 0 - END - -- DD-MMM-YYYY - WHEN UPPER($1) ~ '[0-3]?[0-9]-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-[12][09][0-9][0-9]' - THEN - CASE - WHEN SPLIT_PART($1, '-', 3)::INT BETWEEN 1800 AND 2200 - AND ( - (UPPER(SPLIT_PART($1, '-', 2)) IN ('JAN', 'MAR', 'MAY', 'JUL', 'AUG', 'OCT', 'DEC') - AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 31) - OR (UPPER(SPLIT_PART($1, '-', 2)) IN ('APR', 'JUN', 'SEP', 'NOV') - AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 30) - OR (UPPER(SPLIT_PART($1, '-', 2)) = 'FEB' - AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 - END - ELSE 0 - END - AS isdate; - $$ - LANGUAGE sql; diff --git a/testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql b/testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql deleted file mode 100644 index 4cd79fe..0000000 --- a/testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +++ /dev/null @@ -1 +0,0 @@ -CREATE SCHEMA IF NOT exists {DATA_QC_SCHEMA}; diff --git a/testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql b/testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql deleted file mode 100644 index ac6d077..0000000 --- a/testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +++ /dev/null @@ -1,2 +0,0 @@ -GRANT ALL PRIVILEGES ON SCHEMA {DATA_QC_SCHEMA} TO {DB_USER}; -GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA {DATA_QC_SCHEMA} TO {DB_USER}; diff --git a/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml b/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml index 5b3ab3e..4538d10 100644 --- a/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +++ b/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml @@ -52,8 +52,8 @@ strTemplate05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a END ) AS filled_value_ct, LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text, LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text, - SUM({DATA_QC_SCHEMA}.fndk_isnum(LEFT("{COL_NAME}", 31))) AS numeric_ct, - SUM({DATA_QC_SCHEMA}.fndk_isdate(LEFT("{COL_NAME}", 26))) AS date_ct, + SUM({{DKFN_IS_NUM;;LEFT("{COL_NAME}", 31)}} ) AS numeric_ct, + SUM({{DKFN_IS_DATE;;LEFT("{COL_NAME}", 26)}} ) AS date_ct, CASE WHEN CAST(SUM( CASE WHEN REGEXP_LIKE("{COL_NAME}"::VARCHAR, '^[0-9]{1,5}[a-zA-Z]?\\s\\w{1,5}\\.?\\s?\\w*\\s?\\w*\\s[a-zA-Z]{1,6}\\.?\\s?[0-9]{0,5}[A-Z]{0,1}$') THEN 1 END) AS FLOAT) / CAST(COUNT("{COL_NAME}") AS FLOAT) > 0.8 THEN 'STREET_ADDR' diff --git a/testgen/template/flavors/snowflake/profiling/templated_functions.yaml b/testgen/template/flavors/snowflake/profiling/templated_functions.yaml new file mode 100644 index 0000000..1afbdea --- /dev/null +++ b/testgen/template/flavors/snowflake/profiling/templated_functions.yaml @@ -0,0 +1,55 @@ +IS_NUM: CASE + WHEN REGEXP_LIKE({$1}::VARCHAR, '^\\s*[+-]?\\$?\\s*[0-9]+(,[0-9]{3})*(\\.[0-9]*)?[\\%]?\\s*$') THEN 1 + ELSE 0 + END + +IS_DATE: CASE + /* YYYY-MM-DD HH:MM:SS SSSSSS */ + WHEN TRY_TO_DATE({$1}, 'YYYY-MM-DD HH:MI:SS SSSSSS') IS NOT NULL THEN 1 + + /* YYYY-MM-DD HH:MM:SS */ + WHEN TRY_TO_DATE({$1}, 'YYYY-MM-DD HH:MI:SS') IS NOT NULL THEN 1 + + /* YYYYMMDDHHMMSSSSSS */ + WHEN TRY_TO_DATE({$1}, 'YYYYMMDDHHMISSSSSS') IS NOT NULL THEN 1 + + /* YYYYMMDDHHMMSS */ + WHEN TRY_TO_DATE({$1}, 'YYYYMMDDHHMISS') IS NOT NULL THEN 1 + + /* YYYYMMDD */ + WHEN LENGTH({$1}) = 8 AND TRY_TO_DATE({$1}, 'YYYYMMDD') IS NOT NULL THEN 1 + + /* YYYY-MON-DD HH:MM:SS SSSSSS */ + /* WHEN TRY_TO_DATE({$1}, 'YYYY-MON-DD HH:MI:SS SSSSSS') IS NOT NULL THEN 1 */ + + /* YYYY-MON-DD HH:MM:SS */ + /* WHEN TRY_TO_DATE({$1}, 'YYYY-MON-DD HH:MI:SS') IS NOT NULL THEN 1 */ + + /* Exclude anything else long */ + WHEN LENGTH({$1}) > 11 THEN 0 + + /* YYYY-MON-DD */ + WHEN TRY_TO_DATE({$1}, 'YYYY-MON-DD') IS NOT NULL THEN 1 + + /* YYYY-MM-DD */ + WHEN TRY_TO_DATE({$1}, 'YYYY-MM-DD') IS NOT NULL THEN 1 + + /* MM/DD/YYYY */ + WHEN TRY_TO_DATE({$1}, 'MM/DD/YYYY') IS NOT NULL THEN 1 + + /* MM/DD/YY */ + WHEN TRY_TO_DATE({$1}, 'MM/DD/YY') IS NOT NULL THEN 1 + + /* MM-DD-YYYY */ + WHEN TRY_TO_DATE({$1}, 'MM-DD-YYYY') IS NOT NULL THEN 1 + + /* MM-DD-YY */ + WHEN TRY_TO_DATE({$1}, 'MM-DD-YY') IS NOT NULL THEN 1 + + /* DD-MMM-YYYY */ + WHEN TRY_TO_DATE({$1}, 'DD-MON-YYYY') IS NOT NULL THEN 1 + + + ELSE 0 + END + diff --git a/testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql b/testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql deleted file mode 100644 index f271a24..0000000 --- a/testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +++ /dev/null @@ -1,69 +0,0 @@ -CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isnum(strparm VARCHAR) -RETURNS INTEGER -LANGUAGE SQL -IMMUTABLE -AS -$$ -SELECT CASE - WHEN REGEXP_LIKE(strparm::VARCHAR, '^\\s*[+-]?\\$?\\s*[0-9]+(,[0-9]{3})*(\\.[0-9]*)?[\\%]?\\s*$') THEN 1 - ELSE 0 - END -$$; - - -CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(strparm VARCHAR) -RETURNS INTEGER -LANGUAGE SQL -IMMUTABLE -AS -$$ -SELECT CASE - -- YYYY-MM-DD HH:MM:SS SSSSSS - WHEN TRY_TO_DATE(strparm, 'YYYY-MM-DD HH:MI:SS SSSSSS') IS NOT NULL THEN 1 - - -- YYYY-MM-DD HH:MM:SS - WHEN TRY_TO_DATE(strparm, 'YYYY-MM-DD HH:MI:SS') IS NOT NULL THEN 1 - - -- YYYYMMDDHHMMSSSSSS - WHEN TRY_TO_DATE(strparm, 'YYYYMMDDHHMISSSSSS') IS NOT NULL THEN 1 - - -- YYYYMMDDHHMMSS - WHEN TRY_TO_DATE(strparm, 'YYYYMMDDHHMISS') IS NOT NULL THEN 1 - - -- YYYYMMDD - WHEN LENGTH(strparm) = 8 AND TRY_TO_DATE(strparm, 'YYYYMMDD') IS NOT NULL THEN 1 - - -- YYYY-MON-DD HH:MM:SS SSSSSS - --WHEN TRY_TO_DATE(strparm, 'YYYY-MON-DD HH:MI:SS SSSSSS') IS NOT NULL THEN 1 - - -- YYYY-MON-DD HH:MM:SS - --WHEN TRY_TO_DATE(strparm, 'YYYY-MON-DD HH:MI:SS') IS NOT NULL THEN 1 - - -- Exclude anything else long - WHEN LENGTH(strparm) > 11 THEN 0 - - -- YYYY-MON-DD - WHEN TRY_TO_DATE(strparm, 'YYYY-MON-DD') IS NOT NULL THEN 1 - - -- YYYY-MM-DD - WHEN TRY_TO_DATE(strparm, 'YYYY-MM-DD') IS NOT NULL THEN 1 - - -- MM/DD/YYYY - WHEN TRY_TO_DATE(strparm, 'MM/DD/YYYY') IS NOT NULL THEN 1 - - -- MM/DD/YY - WHEN TRY_TO_DATE(strparm, 'MM/DD/YY') IS NOT NULL THEN 1 - - --MM-DD-YYYY - WHEN TRY_TO_DATE(strparm, 'MM-DD-YYYY') IS NOT NULL THEN 1 - - --MM-DD-YY - WHEN TRY_TO_DATE(strparm, 'MM-DD-YY') IS NOT NULL THEN 1 - - --DD-MMM-YYYY - WHEN TRY_TO_DATE(strparm, 'DD-MON-YYYY') IS NOT NULL THEN 1 - - - ELSE 0 - END -$$; diff --git a/testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql b/testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql deleted file mode 100644 index 4cd79fe..0000000 --- a/testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +++ /dev/null @@ -1 +0,0 @@ -CREATE SCHEMA IF NOT exists {DATA_QC_SCHEMA}; diff --git a/testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql b/testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql deleted file mode 100644 index 2a60aa7..0000000 --- a/testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +++ /dev/null @@ -1,6 +0,0 @@ - -CREATE ROLE IF NOT EXISTS dk_qc_role; -GRANT ALL PRIVILEGES ON SCHEMA {DATA_QC_SCHEMA} TO ROLE dk_qc_role; -GRANT USAGE ON FUNCTION {DATA_QC_SCHEMA}.fndk_isnum(VARCHAR) TO dk_qc_role; -GRANT USAGE ON FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(VARCHAR) TO dk_qc_role; -GRANT ROLE dk_qc_role TO USER {DB_USER}; \ No newline at end of file diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py index 039bee5..9dba905 100644 --- a/testgen/ui/services/test_results_service.py +++ b/testgen/ui/services/test_results_service.py @@ -1,6 +1,7 @@ import pandas as pd from testgen.common import ConcatColumnList +from testgen.common.read_file import replace_templated_functions from testgen.ui.services import database_service as db from testgen.ui.services.string_service import empty_if_null from testgen.ui.services.test_definition_service import get_test_definition @@ -103,7 +104,6 @@ def do_source_data_lookup(db_schema, tr_data, sql_only=False): def replace_parms(df_test, str_query): if df_test.empty: raise ValueError("This test definition is no longer present.") - str_query = str_query.replace("{TARGET_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"])) str_query = str_query.replace("{TABLE_NAME}", empty_if_null(tr_data["table_name"])) str_query = str_query.replace("{COLUMN_NAME}", empty_if_null(tr_data["column_names"])) @@ -143,6 +143,9 @@ def replace_parms(df_test, str_query): str_substitute = ConcatColumnList(df_test.at[0, "match_groupby_names"], "") str_query = str_query.replace("{CONCAT_MATCH_GROUPBY}", str_substitute) + if "{{DKFN_" in str_query: + str_query = replace_templated_functions(str_query, lst_query[0]["sql_flavor"]) + if str_query is None or str_query == "": raise ValueError("Lookup query is not defined for this Test Type.") return str_query diff --git a/testgen/ui/views/profiling_anomalies.py b/testgen/ui/views/profiling_anomalies.py index 54bf6e1..1450e42 100644 --- a/testgen/ui/views/profiling_anomalies.py +++ b/testgen/ui/views/profiling_anomalies.py @@ -8,6 +8,7 @@ import testgen.ui.services.form_service as fm import testgen.ui.services.query_service as dq from testgen.common import date_service +from testgen.common.read_file import replace_templated_functions from testgen.ui.components import widgets as testgen from testgen.ui.navigation.page import Page from testgen.ui.services import project_service @@ -388,7 +389,7 @@ def get_lookup_query(test_id, detail_exp, column_names): return sql_query def replace_parms(str_query): - str_query = ( + str_query: str = ( get_lookup_query(selected_row["anomaly_id"], selected_row["detail"], selected_row["column_name"]) if lst_query[0]["lookup_query"] == "created_in_ui" else lst_query[0]["lookup_query"] @@ -399,6 +400,8 @@ def replace_parms(str_query): str_query = str_query.replace("{DATA_QC_SCHEMA}", lst_query[0]["project_qc_schema"]) str_query = str_query.replace("{DETAIL_EXPRESSION}", selected_row["detail"]) str_query = str_query.replace("{PROFILE_RUN_DATE}", selected_row["profiling_starttime"]) + if "{{DKFN_" in str_query: + str_query = replace_templated_functions(str_query, lst_query[0]["sql_flavor"]) if str_query is None or str_query == "": raise ValueError("Lookup query is not defined for this Anomoly Type.") return str_query diff --git a/tests/unit/test_read_file.py b/tests/unit/test_read_file.py new file mode 100644 index 0000000..a5aa0fd --- /dev/null +++ b/tests/unit/test_read_file.py @@ -0,0 +1,15 @@ +import pytest + +from testgen.common.read_file import replace_templated_functions + + +@pytest.mark.unit +def test_replace_templated_functions(): + fn = replace_templated_functions( + "SELECT {{DKFN_DATEDIFF_YEAR;;'{COL_NAME}'::DATE;;'1970-01-01'}} FROM ATABLE WHERE {{DKFN_DATEDIFF_MONTH;;'{COL_NAME}'::DATE;;'1970-01-01'}} > 36", + "postgresql", + ) + assert ( + fn + == "SELECT DATE_PART('year', '1970-01-01'::TIMESTAMP) - DATE_PART('year', '{COL_NAME}'::DATE::TIMESTAMP) FROM ATABLE WHERE (DATE_PART('year', '1970-01-01'::TIMESTAMP) - DATE_PART('year', '{COL_NAME}'::DATE::TIMESTAMP)) * 12 + (DATE_PART('month', '1970-01-01'::TIMESTAMP) - DATE_PART('month', '{COL_NAME}'::DATE::TIMESTAMP)) > 36" + ) From 8451e7419ae6c035d86e94139ca3d2b64775b3b7 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Thu, 31 Oct 2024 13:34:59 -0400 Subject: [PATCH 28/91] fix(overview): truncate tests percentage --- testgen/ui/views/overview.py | 4 ++-- testgen/utils/__init__.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index c25a62b..779f700 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -12,7 +12,7 @@ from testgen.ui.queries import project_queries from testgen.ui.services import test_suite_service from testgen.ui.session import session -from testgen.utils import to_int +from testgen.utils import to_int, truncate STALE_PROFILE_DAYS = 30 PAGE_ICON = "home" @@ -172,7 +172,7 @@ def render_table_group_card(table_group: pd.Series, project_code: str, key: int) total_tests = to_int(table_group["latest_tests_ct"]) if total_tests: passed_tests = to_int(table_group["latest_tests_passed_ct"]) - testgen.text(f"{round(passed_tests * 100 / total_tests)}% passed") + testgen.text(f"{truncate(passed_tests * 100 / total_tests)}% passed") testgen.text(f"{total_tests} tests in {to_int(table_group['latest_tests_suite_ct'])} test suites", "margin: 12px 0 12px;") testgen.summary_bar( diff --git a/testgen/utils/__init__.py b/testgen/utils/__init__.py index d7475d5..bd4bda8 100644 --- a/testgen/utils/__init__.py +++ b/testgen/utils/__init__.py @@ -1,3 +1,5 @@ +import math + import pandas as pd @@ -5,3 +7,9 @@ def to_int(value: float | int) -> int: if pd.notnull(value): return int(value) return 0 + + +def truncate(value: float) -> int: + if 0 < value < 1: + return 1 + return math.trunc(value) From 716ff8a1c63222af6c6d22574558021e5bc3a921 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Thu, 31 Oct 2024 13:35:44 -0400 Subject: [PATCH 29/91] feat(overview): add sort dropdown for table groups list --- testgen/ui/views/overview.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index 779f700..2776d56 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -3,6 +3,7 @@ import pandas as pd import streamlit as st +from pandas.api.types import is_string_dtype import testgen.ui.services.database_service as db from testgen.common import date_service @@ -38,7 +39,31 @@ def render(self, project_code: str | None = None, **_kwargs): if render_empty_state(project_code): return - st.html(f'
Table Groups ({len(table_groups_df.index)})
') + table_group_header_col, _, table_group_sort_col = st.columns([0.4, 0.4, 0.2]) + table_group_header_col.html(f'
Table Groups ({len(table_groups_df.index)})
') + with table_group_sort_col: + ascending_fields: list[str] = ["table_groups_name"] + sort_options = pd.DataFrame({ + "value": ["table_groups_name", "latest_profile_start,latest_tests_start"], + "label": ["Name", "Latest Activity"], + }) + + sort_by = testgen.select( + label="Sorted by", + options=sort_options, + required=True, + default_value="latest_profile_start,latest_tests_start", + display_column="label", + value_column="value", + ) + ascending = sort_by in ascending_fields + + table_groups_df.sort_values( + by=sort_by.split(","), + ascending=ascending, + inplace=True, + key=lambda column: column.str.lower() if is_string_dtype(column) else column, + ) for index, table_group in table_groups_df.iterrows(): render_table_group_card(table_group, project_code, index) @@ -142,7 +167,7 @@ def render_table_group_card(table_group: pd.Series, project_code: str, key: int) testgen.flex_row_start() testgen.text(f""" {to_int(table_group['latest_profile_table_ct'])} tables  |  - {to_int(table_group['latest_profile_column_ct'])} tables  | + {to_int(table_group['latest_profile_column_ct'])} columns  | """) testgen.link( label=f"{anomaly_count} hygiene issues", @@ -320,7 +345,7 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame: GROUP BY test_suite_id ), latest_tests AS ( - SELECT suites.table_groups_id, + SELECT suites.table_groups_id, latest_run.test_starttime, COUNT(DISTINCT latest_run.test_suite_id) as test_suite_ct, COUNT(*) as test_ct, SUM( @@ -366,7 +391,7 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame: latest_run.id = latest_results.test_run_id ) LEFT JOIN {schema}.test_suites as suites ON (suites.id = lrd.test_suite_id) - GROUP BY suites.table_groups_id + GROUP BY suites.table_groups_id, latest_run.test_starttime ) SELECT groups.id::VARCHAR(50), groups.table_groups_name, @@ -379,6 +404,7 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame: latest_profile.likely_ct as latest_anomalies_likely_ct, latest_profile.possible_ct as latest_anomalies_possible_ct, latest_profile.dismissed_ct as latest_anomalies_dismissed_ct, + latest_tests.test_starttime as latest_tests_start, latest_tests.test_suite_ct as latest_tests_suite_ct, latest_tests.test_ct as latest_tests_ct, latest_tests.passed_ct as latest_tests_passed_ct, From fd076da24ac37fb8905b89f2698ff3803c878fe5 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Thu, 31 Oct 2024 14:00:35 -0400 Subject: [PATCH 30/91] feat(overview): add input field to filter table groups list --- testgen/ui/views/overview.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index 2776d56..5c1a1b7 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -39,17 +39,23 @@ def render(self, project_code: str | None = None, **_kwargs): if render_empty_state(project_code): return - table_group_header_col, _, table_group_sort_col = st.columns([0.4, 0.4, 0.2]) + table_group_header_col, table_group_filter_col, table_group_sort_col = st.columns([0.6, 0.2, 0.2]) table_group_header_col.html(f'
Table Groups ({len(table_groups_df.index)})
') + with table_group_filter_col: + name_filter = st.text_input(label="Search by table group name") + table_groups_df = table_groups_df.loc[ + table_groups_df["table_groups_name"].str.contains(name_filter, case=False) + ] + with table_group_sort_col: ascending_fields: list[str] = ["table_groups_name"] sort_options = pd.DataFrame({ "value": ["table_groups_name", "latest_profile_start,latest_tests_start"], - "label": ["Name", "Latest Activity"], + "label": ["Table group name", "Latest activity"], }) sort_by = testgen.select( - label="Sorted by", + label="Sort by", options=sort_options, required=True, default_value="latest_profile_start,latest_tests_start", @@ -57,13 +63,13 @@ def render(self, project_code: str | None = None, **_kwargs): value_column="value", ) ascending = sort_by in ascending_fields + table_groups_df.sort_values( + by=sort_by.split(","), + ascending=ascending, + inplace=True, + key=lambda column: column.str.lower() if is_string_dtype(column) else column, + ) - table_groups_df.sort_values( - by=sort_by.split(","), - ascending=ascending, - inplace=True, - key=lambda column: column.str.lower() if is_string_dtype(column) else column, - ) for index, table_group in table_groups_df.iterrows(): render_table_group_card(table_group, project_code, index) From 69d490b4c84cb7c941b399813d714d3746c76102 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Fri, 1 Nov 2024 13:18:11 -0400 Subject: [PATCH 31/91] fix(overview): sort by latest activity date instead of dual sorting --- testgen/ui/views/overview.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index 5c1a1b7..2a7463c 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -48,9 +48,12 @@ def render(self, project_code: str | None = None, **_kwargs): ] with table_group_sort_col: + table_groups_df["latest_activity_date"] = table_groups_df[ + ["latest_profile_start", "latest_tests_start"] + ].max(axis=1) ascending_fields: list[str] = ["table_groups_name"] sort_options = pd.DataFrame({ - "value": ["table_groups_name", "latest_profile_start,latest_tests_start"], + "value": ["table_groups_name", "latest_activity_date"], "label": ["Table group name", "Latest activity"], }) @@ -58,14 +61,14 @@ def render(self, project_code: str | None = None, **_kwargs): label="Sort by", options=sort_options, required=True, - default_value="latest_profile_start,latest_tests_start", + default_value="latest_activity_date", display_column="label", value_column="value", ) - ascending = sort_by in ascending_fields + table_groups_df.sort_values( - by=sort_by.split(","), - ascending=ascending, + by=typing.cast(str, sort_by), + ascending=sort_by in ascending_fields, inplace=True, key=lambda column: column.str.lower() if is_string_dtype(column) else column, ) @@ -351,7 +354,8 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame: GROUP BY test_suite_id ), latest_tests AS ( - SELECT suites.table_groups_id, latest_run.test_starttime, + SELECT suites.table_groups_id, + MAX(latest_run.test_starttime) AS test_starttime, COUNT(DISTINCT latest_run.test_suite_id) as test_suite_ct, COUNT(*) as test_ct, SUM( @@ -397,7 +401,7 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame: latest_run.id = latest_results.test_run_id ) LEFT JOIN {schema}.test_suites as suites ON (suites.id = lrd.test_suite_id) - GROUP BY suites.table_groups_id, latest_run.test_starttime + GROUP BY suites.table_groups_id ) SELECT groups.id::VARCHAR(50), groups.table_groups_name, From 493d4c38d2ed361e702bcac0a2581ff93bb71fd7 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Thu, 17 Oct 2024 17:19:44 -0400 Subject: [PATCH 32/91] feat(connections): use the new wizard for setting up table groups When in the connection screen and no table groups exists, users will be presented with a two-step wizard to create the table group and (optionally) run the profiling. --- testgen/ui/components/frontend/css/shared.css | 56 ++- .../frontend/js/components/button.js | 74 ++- testgen/ui/components/widgets/__init__.py | 2 + testgen/ui/components/widgets/button.py | 10 +- .../components/widgets/testgen_component.py | 2 +- testgen/ui/components/widgets/wizard.py | 213 ++++++++ testgen/ui/forms.py | 117 +++++ testgen/ui/queries/table_group_queries.py | 8 +- testgen/ui/services/connection_service.py | 2 +- testgen/ui/services/table_group_service.py | 4 +- testgen/ui/session.py | 27 +- testgen/ui/views/connections.py | 456 ------------------ testgen/ui/views/connections/__init__.py | 3 + testgen/ui/views/connections/forms.py | 250 ++++++++++ testgen/ui/views/connections/models.py | 8 + testgen/ui/views/connections/page.py | 444 +++++++++++++++++ testgen/ui/views/table_groups/__init__.py | 2 + testgen/ui/views/table_groups/forms.py | 170 +++++++ .../{table_groups.py => table_groups/page.py} | 4 +- testgen/utils/singleton.py | 4 +- 20 files changed, 1344 insertions(+), 512 deletions(-) create mode 100644 testgen/ui/components/widgets/wizard.py create mode 100644 testgen/ui/forms.py delete mode 100644 testgen/ui/views/connections.py create mode 100644 testgen/ui/views/connections/__init__.py create mode 100644 testgen/ui/views/connections/forms.py create mode 100644 testgen/ui/views/connections/models.py create mode 100644 testgen/ui/views/connections/page.py create mode 100644 testgen/ui/views/table_groups/__init__.py create mode 100644 testgen/ui/views/table_groups/forms.py rename testgen/ui/views/{table_groups.py => table_groups/page.py} (99%) diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index 3284332..04aab9a 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -24,8 +24,9 @@ body { --primary-text-color: #000000de; --secondary-text-color: #0000008a; --disabled-text-color: #00000042; - --caption-text-color: rgba(49, 51, 63, 0.6); - /* Match Streamlit's caption color */ + --caption-text-color: rgba(49, 51, 63, 0.6); /* Match Streamlit's caption color */ + --border-color: rgba(0, 0, 0, .12); + --dk-card-background: #fff; --sidebar-background-color: white; --sidebar-item-hover-color: #f5f5f5; @@ -34,22 +35,28 @@ body { --field-underline-color: #9e9e9e; - --button-text-color: var(--primary-text-color); - - --button-hover-state-background: var(--primary-color); --button-hover-state-opacity: 0.12; - --button-basic-text-color: var(--primary-color); --button-basic-background: transparent; + --button-basic-text-color: rgba(0, 0, 0, .54); + --button-basic-hover-state-background: rgba(0, 0, 0, .54); - --button-flat-text-color: rgba(255, 255, 255); - --button-flat-background: rgba(0, 0, 0, .54); + --button-basic-flat-text-color: rgba(0, 0, 0); + --button-basic-flat-background: rgba(0, 0, 0, .54); - --button-stroked-text-color: var(--primary-color); - --button-stroked-background: transparent; - --button-stroked-border: 1px solid rgba(0, 0, 0, .12); + --button-basic-stroked-text-color: rgba(0, 0, 0, .54); + --button-basic-stroked-background: transparent; - --dk-card-background: #fff; + --button-primary-background: transparent; + --button-primary-text-color: var(--primary-color); + --button-primary-hover-state-background: var(--primary-color); + + --button-primary-flat-text-color: rgba(255, 255, 255); + --button-primary-flat-background: var(--primary-color); + + --button-primary-stroked-text-color: var(--primary-color); + --button-primary-stroked-background: transparent; + --button-stroked-border: 1px solid var(--border-color); } @media (prefers-color-scheme: dark) { @@ -57,8 +64,9 @@ body { --primary-text-color: rgba(255, 255, 255); --secondary-text-color: rgba(255, 255, 255, .7); --disabled-text-color: rgba(255, 255, 255, .5); - --caption-text-color: rgba(250, 250, 250, .6); - /* Match Streamlit's caption color */ + --caption-text-color: rgba(250, 250, 250, .6); /* Match Streamlit's caption color */ + --border-color: rgba(255, 255, 255, .25); + --dk-card-background: #14181f; --sidebar-background-color: #14181f; --sidebar-item-hover-color: #10141b; @@ -66,13 +74,17 @@ body { --sidebar-active-item-border-color: #b4e3c9; --dk-text-value-background: unset; - --button-text-color: var(--primary-text-color); - - --button-flat-background: rgba(255, 255, 255, .54); - - --button-stroked-border: 1px solid rgba(255, 255, 255, .12); - - --dk-card-background: #14181f; + --button-basic-background: transparent; + --button-basic-text-color: rgba(255, 255, 255); + --button-basic-hover-state-background: rgba(255, 255, 255, .54); + + --button-basic-flat-text-color: rgba(255, 255, 255); + --button-basic-flat-background: rgba(255, 255, 255, .54); + + --button-basic-stroked-text-color: rgba(255, 255, 255, .85); + --button-basic-stroked-background: transparent; + + --button-stroked-border: 1px solid var(--border-color); } } @@ -441,4 +453,4 @@ body { .pl-7 { padding-left: 40px; } -/* */ \ No newline at end of file +/* */ diff --git a/testgen/ui/components/frontend/js/components/button.js b/testgen/ui/components/frontend/js/components/button.js index e3670e3..893a1b1 100644 --- a/testgen/ui/components/frontend/js/components/button.js +++ b/testgen/ui/components/frontend/js/components/button.js @@ -2,6 +2,7 @@ * @typedef Properties * @type {object} * @property {(string)} type + * @property {(string|null)} color * @property {(string|null)} label * @property {(string|null)} icon * @property {(string|null)} tooltip @@ -21,6 +22,11 @@ const BUTTON_TYPE = { ICON: 'icon', STROKED: 'stroked', }; +const BUTTON_COLOR = { + BASIC: 'basic', + PRIMARY: 'primary', +}; + const Button = (/** @type Properties */ props) => { loadStylesheet('button', stylesheet); @@ -32,6 +38,10 @@ const Button = (/** @type Properties */ props) => { if (isIconOnly) { // Force a 40px width for the parent iframe & handle window resizing enforceElementWidth(window.frameElement, 40); } + + if (props.width?.val) { + enforceElementWidth(window.frameElement, props.width?.val); + } } if (props.tooltip) { @@ -42,10 +52,10 @@ const Button = (/** @type Properties */ props) => { const onClickHandler = props.onclick || (() => emitEvent('ButtonClicked')); return button( { - class: `tg-button tg-${props.type.val}-button ${props.type.val !== 'icon' && isIconOnly ? 'tg-icon-button' : ''}`, - style: props.style?.val, + class: `tg-button tg-${props.type.val}-button tg-${props.color?.val ?? 'basic'}-button ${props.type.val !== 'icon' && isIconOnly ? 'tg-icon-button' : ''}`, + style: () => `width: ${props.width?.val ?? '100%'}; ${props.style?.val}`, onclick: onClickHandler, - disabled: !!props.disabled?.val, + disabled: props.disabled, }, span({class: 'tg-button-focus-state-indicator'}, ''), props.icon ? i({class: 'material-symbols-rounded'}, props.icon) : undefined, @@ -56,7 +66,6 @@ const Button = (/** @type Properties */ props) => { const stylesheet = new CSSStyleSheet(); stylesheet.replace(` button.tg-button { - width: 100%; height: 40px; position: relative; @@ -75,8 +84,6 @@ button.tg-button { cursor: pointer; font-size: 14px; - color: var(--button-text-color); - background: var(--button-basic-background); } button.tg-button .tg-button-focus-state-indicator::before { @@ -89,21 +96,9 @@ button.tg-button .tg-button-focus-state-indicator::before { position: absolute; pointer-events: none; border-radius: inherit; - background: var(--button-hover-state-background); -} - -button.tg-button.tg-basic-button { - color: var(--button-basic-text-color); -} - -button.tg-button.tg-flat-button { - color: var(--button-flat-text-color); - background: var(--button-flat-background); } button.tg-button.tg-stroked-button { - color: var(--button-stroked-text-color); - background: var(--button-stroked-background); border: var(--button-stroked-border); } @@ -135,6 +130,49 @@ button.tg-button > i:has(+ span) { button.tg-button:hover:not([disabled]) .tg-button-focus-state-indicator::before { opacity: var(--button-hover-state-opacity); } + + +/* Basic button colors */ +button.tg-button.tg-basic-button { + color: var(--button-basic-text-color); + background: var(--button-basic-background); +} + +button.tg-button.tg-basic-button .tg-button-focus-state-indicator::before { + background: var(--button-basic-hover-state-background); +} + +button.tg-button.tg-basic-button.tg-flat-button { + color: var(--button-basic-flat-text-color); + background: var(--button-basic-flat-background); +} + +button.tg-button.tg-basic-button.tg-stroked-button { + color: var(--button-basic-stroked-text-color); + background: var(--button-basic-stroked-background); +} +/* ... */ + +/* Primary button colors */ +button.tg-button.tg-primary-button { + color: var(--button-primary-text-color); + background: var(--button-primary-background); +} + +button.tg-button.tg-primary-button .tg-button-focus-state-indicator::before { + background: var(--button-primary-hover-state-background); +} + +button.tg-button.tg-primary-button.tg-flat-button { + color: var(--button-primary-flat-text-color); + background: var(--button-primary-flat-background); +} + +button.tg-button.tg-primary-button.tg-stroked-button { + color: var(--button-primary-stroked-text-color); + background: var(--button-primary-stroked-background); +} +/* ... */ `); export { Button }; diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py index c847d35..d58047e 100644 --- a/testgen/ui/components/widgets/__init__.py +++ b/testgen/ui/components/widgets/__init__.py @@ -1,5 +1,6 @@ # ruff: noqa: F401 +from testgen.ui.components.utils.component import component from testgen.ui.components.widgets.breadcrumbs import breadcrumbs from testgen.ui.components.widgets.button import button from testgen.ui.components.widgets.card import card @@ -23,3 +24,4 @@ from testgen.ui.components.widgets.sorting_selector import sorting_selector from testgen.ui.components.widgets.summary_bar import summary_bar from testgen.ui.components.widgets.testgen_component import testgen_component +from testgen.ui.components.widgets.wizard import wizard, WizardStep diff --git a/testgen/ui/components/widgets/button.py b/testgen/ui/components/widgets/button.py index 4b0a2d0..3c32630 100644 --- a/testgen/ui/components/widgets/button.py +++ b/testgen/ui/components/widgets/button.py @@ -3,17 +3,20 @@ from testgen.ui.components.utils.component import component ButtonType = typing.Literal["basic", "flat", "icon", "stroked"] +ButtonColor = typing.Literal["basic", "primary"] TooltipPosition = typing.Literal["left", "right"] def button( type_: ButtonType = "basic", + color: ButtonColor = "primary", label: str | None = None, icon: str | None = None, tooltip: str | None = None, tooltip_position: TooltipPosition = "left", on_click: typing.Callable[..., None] | None = None, disabled: bool = False, + width: str | int | float | None = None, style: str | None = None, key: str | None = None, ) -> typing.Any: @@ -26,7 +29,7 @@ def button( :param on_click: click handler for this button """ - props = {"type": type_, "disabled": disabled} + props = {"type": type_, "disabled": disabled, "color": color} if type_ != "icon": if not label: raise ValueError(f"A label is required for {type_} buttons") @@ -38,6 +41,11 @@ def button( if tooltip: props.update({"tooltip": tooltip, "tooltipPosition": tooltip_position}) + if width: + props.update({"width": width}) + if isinstance(width, (int, float,)): + props.update({"width": f"{width}px"}) + if style: props.update({"style": style}) diff --git a/testgen/ui/components/widgets/testgen_component.py b/testgen/ui/components/widgets/testgen_component.py index 447686e..7fb2be2 100644 --- a/testgen/ui/components/widgets/testgen_component.py +++ b/testgen/ui/components/widgets/testgen_component.py @@ -6,7 +6,7 @@ def testgen_component( - component_id: typing.Literal["profiling_runs", "test_runs"], + component_id: typing.Literal["profiling_runs", "test_runs", "database_flavor_selector"], props: dict, event_handlers: dict | None, ) -> dict | None: diff --git a/testgen/ui/components/widgets/wizard.py b/testgen/ui/components/widgets/wizard.py new file mode 100644 index 0000000..8a055f2 --- /dev/null +++ b/testgen/ui/components/widgets/wizard.py @@ -0,0 +1,213 @@ +import dataclasses +import logging +import inspect +import typing + +import streamlit as st +from streamlit.delta_generator import DeltaGenerator + +from testgen.ui.components import widgets as testgen +from testgen.ui.navigation.router import Router +from testgen.ui.session import temp_value + +ResultsType = typing.TypeVar("ResultsType", bound=typing.Any | None) +StepResults = tuple[typing.Any, bool] +logger = logging.getLogger("testgen") + + +def wizard( + *, + key: str, + steps: list[typing.Callable[..., StepResults] | "WizardStep"], + on_complete: typing.Callable[..., bool], + complete_label: str = "Complete", + navigate_to: str | None = None, + navigate_to_args: dict | None = None, +) -> None: + """ + Creates a Wizard with the provided steps and handles the session for + each step internally. + + For each step callable instances of WizardStep for the current step + and previous steps are optionally provided as keyword arguments with + specific names. + + Optional arguments that can be accessed as follows: + + ``` + def step_fn(current_step: WizardStep = ..., step_0: WizardStep = ...) + ... + ``` + + For the `on_complete` callable, on top of passing each WizardStep, a + Streamlit DeltaGenerator is also passed to allow rendering content + inside the step's body. + + ``` + def on_complete(container: DeltaGenerator, step_0: WizardStep = ..., step_1: WizardStep = ...): + ... + ``` + + After the `on_complete` callback returns, the wizard state is reset. + + :param key: used to cache current step and results of each step + :param steps: a list of WizardStep instances or callable objects + :param on_complete: callable object to execute after the last step. + should return true to trigger a Streamlit rerun + :param complete_label: customize the label for the complete button + + :return: None + """ + + if navigate_to: + Router().navigate(navigate_to, navigate_to_args or {}) + + current_step_idx = 0 + wizard_state = st.session_state.get(key) + if isinstance(wizard_state, int): + current_step_idx = wizard_state + + instance = Wizard( + key=key, + steps=[ + WizardStep( + key=f"{key}:{idx}", + body=step, + results=st.session_state.get(f"{key}:{idx}", None), + ) if not isinstance(step, WizardStep) else dataclasses.replace( + step, + key=f"{key}:{idx}", + results=st.session_state.get(f"{key}:{idx}", None), + ) + for idx, step in enumerate(steps) + ], + current_step=current_step_idx, + on_complete=on_complete, + ) + + current_step = instance.current_step + current_step_index = instance.current_step_index + testgen.caption( + f"Step {current_step_index + 1} of {len(steps)}{': ' + current_step.title if current_step.title else ''}" + ) + + step_body_container = st.empty() + with step_body_container.container(): + was_complete_button_clicked, set_complete_button_clicked = temp_value(f"{key}:complete-button") + + if was_complete_button_clicked(): + instance.complete(step_body_container) + else: + instance.render() + button_left_column, _, button_right_column = st.columns([0.30, 0.40, 0.30]) + with button_left_column: + if not instance.is_first_step(): + testgen.button( + type_="stroked", + color="basic", + label="Previous", + on_click=lambda: instance.previous(), + key=f"{key}:button-prev", + ) + + with button_right_column: + next_button_label = complete_label if instance.is_last_step() else "Next" + + testgen.button( + type_="stroked" if not instance.is_last_step() else "flat", + label=next_button_label, + on_click=lambda: set_complete_button_clicked(instance.next() or instance.is_last_step()), + key=f"{key}:button-next", + disabled=not current_step.is_valid, + ) + + +class Wizard: + def __init__( + self, + *, + key: str, + steps: list["WizardStep"], + on_complete: typing.Callable[..., bool] | None = None, + current_step: int = 0, + ) -> None: + self._key = key + self._steps = steps + self._current_step = current_step + self._on_complete = on_complete + + @property + def current_step(self) -> "WizardStep": + return self._steps[self._current_step] + + @property + def current_step_index(self) -> int: + return self._current_step + + def next(self) -> None: + next_step = self._current_step + 1 + if not self.is_last_step(): + st.session_state[self._key] = next_step + return + + def previous(self) -> None: + previous_step = self._current_step - 1 + if previous_step > -1: + st.session_state[self._key] = previous_step + + def is_first_step(self) -> bool: + return self._current_step == 0 + + def is_last_step(self) -> bool: + return self._current_step == len(self._steps) - 1 + + def complete(self, container: DeltaGenerator) -> None: + if self._on_complete: + signature = inspect.signature(self._on_complete) + accepted_params = [param.name for param in signature.parameters.values()] + kwargs: dict = { + key: step for idx, step in enumerate(self._steps) + if (key := f"step_{idx}") and key in accepted_params + } + if "container" in accepted_params: + kwargs["container"] = container + + do_rerun = self._on_complete(**kwargs) + self._reset() + if do_rerun: + st.rerun() + + def _reset(self) -> None: + del st.session_state[self._key] + for step_idx in range(len(self._steps)): + del st.session_state[f"{self._key}:{step_idx}"] + + def render(self) -> None: + step = self._steps[self._current_step] + + extra_args = {"current_step": step} + extra_args.update({f"step_{idx}": step for idx, step in enumerate(self._steps)}) + + signature = inspect.signature(step.body) + step_accepted_params = [param.name for param in signature.parameters.values() if param.name in extra_args] + extra_args = {key: value for key, value in extra_args.items() if key in step_accepted_params} + + try: + results, is_valid = step.body(**extra_args) + except TypeError as error: + logger.exception("Error on wizard step %s", self._current_step, exc_info=True, stack_info=True) + results, is_valid = None, True + + step.results = results + step.is_valid = is_valid + + st.session_state[f"{self._key}:{self._current_step}"] = step.results + + +@dataclasses.dataclass(kw_only=True, slots=True) +class WizardStep[ResultsType]: + body: typing.Callable[..., StepResults] + results: ResultsType = dataclasses.field(default=None) + title: str = dataclasses.field(default="") + key: str | None = dataclasses.field(default=None) + is_valid: bool = dataclasses.field(default=True) diff --git a/testgen/ui/forms.py b/testgen/ui/forms.py new file mode 100644 index 0000000..61a7120 --- /dev/null +++ b/testgen/ui/forms.py @@ -0,0 +1,117 @@ +import typing + +import streamlit as st +from pydantic import BaseModel, Field +from pydantic.json_schema import DEFAULT_REF_TEMPLATE, GenerateJsonSchema, JsonSchemaMode +from streamlit.delta_generator import DeltaGenerator +from streamlit_pydantic.ui_renderer import InputUI + + +class BaseForm(BaseModel): + def __init__(self, /, **data: typing.Any) -> None: + super().__init__(**data) + + @classmethod + def empty(cls) -> typing.Self: + non_validated_instance = cls.model_construct() + non_validated_instance.model_post_init(None) + + return non_validated_instance + + @property + def _disabled_fields(self) -> typing.Set[str]: + if not getattr(self, "_disabled_fields_set", None): + self._disabled_fields_set = set() + return self._disabled_fields_set + + def disable(self, field: str) -> None: + self._disabled_fields.add(field) + + def enable(self, field) -> None: + self._disabled_fields.remove(field) + + @classmethod + def model_json_schema( + self_or_cls, # type: ignore + by_alias: bool = True, + ref_template: str = DEFAULT_REF_TEMPLATE, + schema_generator: type[GenerateJsonSchema] = GenerateJsonSchema, + mode: JsonSchemaMode = 'validation', + ) -> dict[str, typing.Any]: + schema = super().model_json_schema( + by_alias=by_alias, + ref_template=ref_template, + schema_generator=schema_generator, + mode=mode, + ) + + schema_properties: dict[str, dict] = schema.get("properties", {}) + disabled_fields: set[str] = getattr(self_or_cls, "_disabled_fields_set", set()) + for property_name, property_schema in schema_properties.items(): + if property_name in disabled_fields and not property_schema.get("readOnly"): + property_schema["readOnly"] = True + + return schema + + @classmethod + def get_field_label(cls, field_name: str) -> str: + schema = cls.model_json_schema() + schema_properties = schema.get("properties", {}) + field_schema = schema_properties[field_name] + return field_schema.get("st_kwargs_label") or field_schema.get("title") + + +class ManualRender: + @property + def input_ui(self): + if not getattr(self, "_input_ui", None): + self._input_ui = InputUI( + self.form_key(), + self, # type: ignore + group_optional_fields="no", # type: ignore + lowercase_labels=False, + ignore_empty_values=False, + return_model=False, + ) + return self._input_ui + + def form_key(self): + raise NotImplementedError() + + def render_input_ui(self, container: DeltaGenerator, session_state: dict) -> typing.Self: + raise NotImplementedError() + + def render_field(self, field_name: str, container: DeltaGenerator | None = None) -> typing.Any: + streamlit_container = container or self.input_ui._streamlit_container + model_property = self.input_ui._schema_properties[field_name] + initial_value = getattr(self, field_name, None) or self.input_ui._get_value(field_name) + is_disabled = field_name in getattr(self, "_disabled_fields", set()) + + if is_disabled: + model_property["readOnly"] = True + + if model_property.get("type") != "boolean" and initial_value not in [None, ""]: + model_property["init_value"] = initial_value + + new_value = self.input_ui._render_property(streamlit_container, field_name, model_property) + self.update_field_value(field_name, new_value) + + return new_value + + def update_field_value(self, field_name: str, value: typing.Any) -> typing.Any: + self.input_ui._store_value(field_name, value) + setattr(self, field_name, value) + return value + + def get_field_value(self, field_name: str, latest: bool = False) -> typing.Any: + if latest: + return st.session_state.get(self.get_field_key(field_name)) + return self.input_ui._get_value(field_name) + + def reset_cache(self) -> None: + for field_name in typing.cast(type[BaseForm], type(self)).model_fields.keys(): + st.session_state.pop(self.get_field_key(field_name), None) + st.session_state.pop(self.form_key() + "-data", None) + + def get_field_key(self, field_name: str) -> typing.Any: + return str(self.input_ui._session_state.run_id) + "-" + str(self.input_ui._key) + "-" + field_name diff --git a/testgen/ui/queries/table_group_queries.py b/testgen/ui/queries/table_group_queries.py index 0663a6f..c13e62a 100644 --- a/testgen/ui/queries/table_group_queries.py +++ b/testgen/ui/queries/table_group_queries.py @@ -1,3 +1,5 @@ +import uuid + import streamlit as st import testgen.ui.services.database_service as db @@ -108,7 +110,8 @@ def edit(schema, table_group): st.cache_data.clear() -def add(schema, table_group): +def add(schema, table_group) -> str: + new_table_group_id = str(uuid.uuid4()) sql = f"""INSERT INTO {schema}.table_groups (id, project_code, @@ -132,7 +135,7 @@ def add(schema, table_group): source_process, stakeholder_group) SELECT - gen_random_uuid(), + '{new_table_group_id}', '{table_group["project_code"]}', '{table_group["connection_id"]}', '{table_group["table_groups_name"]}', @@ -155,6 +158,7 @@ def add(schema, table_group): ;""" db.execute_sql(sql) st.cache_data.clear() + return new_table_group_id def delete(schema, table_group_ids): diff --git a/testgen/ui/services/connection_service.py b/testgen/ui/services/connection_service.py index 394c82a..27ebf7e 100644 --- a/testgen/ui/services/connection_service.py +++ b/testgen/ui/services/connection_service.py @@ -207,7 +207,7 @@ def form_overwritten_connection_url(connection): "dbname": connection["project_db"], "url": None, "connect_by_url": None, - "connect_by_key": connection["connect_by_key"], + "connect_by_key": connection.get("connect_by_key"), "private_key": None, "private_key_passphrase": "", "dbschema": "", diff --git a/testgen/ui/services/table_group_service.py b/testgen/ui/services/table_group_service.py index 57ea6bd..f51d360 100644 --- a/testgen/ui/services/table_group_service.py +++ b/testgen/ui/services/table_group_service.py @@ -21,9 +21,9 @@ def edit(table_group): table_group_queries.edit(schema, table_group) -def add(table_group): +def add(table_group: dict) -> str: schema = st.session_state["dbschema"] - table_group_queries.add(schema, table_group) + return table_group_queries.add(schema, table_group) def cascade_delete(table_group_names, dry_run=False): diff --git a/testgen/ui/session.py b/testgen/ui/session.py index 0802132..bb198a8 100644 --- a/testgen/ui/session.py +++ b/testgen/ui/session.py @@ -1,16 +1,20 @@ -import typing +from typing import Any, Callable, Literal, TypeVar import streamlit as st from streamlit.runtime.state import SessionStateProxy from testgen.utils.singleton import Singleton +T = TypeVar("T") +TempValueGetter = Callable[..., T] +TempValueSetter = Callable[[T], None] + class TestgenSession(Singleton): cookies_ready: int logging_in: bool logging_out: bool - page_pending_cookies: st.Page + page_pending_cookies: st.Page # type: ignore page_pending_login: str page_pending_sidebar: str page_args_pending_router: dict @@ -23,7 +27,7 @@ class TestgenSession(Singleton): name: str username: str authentication_status: bool - auth_role: typing.Literal["admin", "edit", "read"] + auth_role: Literal["admin", "edit", "read"] project: str add_project: bool @@ -34,13 +38,13 @@ class TestgenSession(Singleton): def __init__(self, state: SessionStateProxy) -> None: super().__setattr__("_state", state) - def __getattr__(self, key: str) -> typing.Any: + def __getattr__(self, key: str) -> Any: state = object.__getattribute__(self, "_state") if key not in state: return None return state[key] - def __setattr__(self, key: str, value: typing.Any) -> None: + def __setattr__(self, key: str, value: Any) -> None: object.__getattribute__(self, "_state")[key] = value def __delattr__(self, key: str) -> None: @@ -49,4 +53,17 @@ def __delattr__(self, key: str) -> None: del state[key] +def temp_value(session_key: str, *, default: T | None = None) -> tuple[TempValueGetter[T | None], TempValueSetter[T]]: + scoped_session_key = f"tg-session:tmp-value:{session_key}" + + def getter() -> T | None: + if scoped_session_key not in st.session_state: + return default + return st.session_state.pop(scoped_session_key, None) + + def setter(value: T): + st.session_state[scoped_session_key] = value + + return getter, setter + session: TestgenSession = TestgenSession(st.session_state) diff --git a/testgen/ui/views/connections.py b/testgen/ui/views/connections.py deleted file mode 100644 index 33df711..0000000 --- a/testgen/ui/views/connections.py +++ /dev/null @@ -1,456 +0,0 @@ -import dataclasses -import logging -import os -import time -import typing - -import streamlit as st - -import testgen.ui.services.database_service as db -from testgen.commands.run_setup_profiling_tools import get_setup_profiling_tools_queries -from testgen.common.database.database_service import empty_cache -from testgen.ui.components import widgets as testgen -from testgen.ui.navigation.menu import MenuItem -from testgen.ui.navigation.page import Page -from testgen.ui.services import authentication_service, connection_service -from testgen.ui.session import session - -LOG = logging.getLogger("testgen") - - -class ConnectionsPage(Page): - path = "connections" - can_activate: typing.ClassVar = [ - lambda: session.authentication_status, - ] - menu_item = MenuItem(icon="database", label="Data Configuration", order=4) - - def render(self, project_code: str, **_kwargs) -> None: - dataframe = connection_service.get_connections(project_code) - connection = dataframe.iloc[0] - - testgen.page_header( - "Connection", - "https://docs.datakitchen.io/article/dataops-testgen-help/connect-your-database", - ) - - _, actions_column = st.columns([.1, .9]) - testgen.flex_row_end(actions_column) - - enable_table_groups = connection["project_host"] and connection["project_db"] and connection["project_qc_schema"] - - with st.container(border=True): - self.show_connection_form(connection, "edit", project_code) - - if actions_column.button( - "Configure QC Utility Schema", - help="Creates the required Utility schema and related functions in the target database", - ): - self.create_qc_schema_dialog(connection) - - if actions_column.button( - f":{'gray' if not enable_table_groups else 'green'}[Table Groups →]", - help="Create or edit Table Groups for the Connection", - ): - self.router.navigate( - "connections:table-groups", - {"connection_id": connection["connection_id"]}, - ) - - @st.dialog(title="Configure QC Utility Schema") - def create_qc_schema_dialog(self, selected_connection): - connection_id = selected_connection["connection_id"] - project_qc_schema = selected_connection["project_qc_schema"] - sql_flavor = selected_connection["sql_flavor"] - user = selected_connection["project_user"] - - create_qc_schema = st.toggle("Create QC Utility Schema", value=True) - grant_privileges = st.toggle("Grant access privileges to TestGen user", value=True) - - user_role = None - - # TODO ALEX: This textbox may be needed if we want to grant permissions to user role - # if sql_flavor == "snowflake": - # user_role_textbox_label = f"Primary role for database user {user}" - # user_role = st.text_input(label=user_role_textbox_label, max_chars=100) - - admin_credentials_expander = st.expander("Admin credential options", expanded=True) - with admin_credentials_expander: - admin_connection_option_index = 0 - admin_connection_options = ["Do not use admin credentials", "Use admin credentials with Password"] - if sql_flavor == "snowflake": - admin_connection_options.append("Use admin credentials with Key-Pair") - - admin_connection_option = st.radio( - "Admin credential options", - label_visibility="hidden", - options=admin_connection_options, - index=admin_connection_option_index, - horizontal=True, - ) - - st.markdown("

 
", unsafe_allow_html=True) - - db_user = None - db_password = None - admin_private_key_passphrase = None - admin_private_key = None - if admin_connection_option == admin_connection_options[0]: - st.markdown(":orange[User created in the connection dialog will be used.]") - else: - db_user = st.text_input(label="Admin db user", max_chars=40) - if admin_connection_option == admin_connection_options[1]: - db_password = st.text_input( - label="Admin db password", max_chars=40, type="password" - ) - st.markdown(":orange[Note: Admin credentials are not stored, are only used for this operation.]") - - if len(admin_connection_options) > 2 and admin_connection_option == admin_connection_options[2]: - admin_private_key_passphrase = st.text_input( - label="Private Key Passphrase", - key="create-qc-schema-private-key-password", - type="password", - max_chars=200, - help="Passphrase used while creating the private Key (leave empty if not applicable)", - ) - - admin_uploaded_file = st.file_uploader("Upload private key (rsa_key.p8)", key="admin-uploaded-file") - if admin_uploaded_file: - admin_private_key = admin_uploaded_file.getvalue().decode("utf-8") - - st.markdown(":orange[Note: Admin credentials are not stored, are only used for this operation.]") - - submit = st.button("Update Configuration") - - if submit: - empty_cache() - script_expander = st.expander("Script Details") - - operation_status = st.empty() - operation_status.info(f"Configuring QC Utility Schema '{project_qc_schema}'...") - - try: - skip_granting_privileges = not grant_privileges - queries = get_setup_profiling_tools_queries(sql_flavor, create_qc_schema, skip_granting_privileges, project_qc_schema, user, user_role) - with script_expander: - st.code( - os.linesep.join(queries), - language="sql", - line_numbers=True) - - connection_service.create_qc_schema( - connection_id, - create_qc_schema, - db_user if db_user else None, - db_password if db_password else None, - skip_granting_privileges, - admin_private_key_passphrase=admin_private_key_passphrase, - admin_private_key=admin_private_key, - user_role=user_role, - ) - operation_status.empty() - operation_status.success("Operation has finished successfully.") - - except Exception as e: - operation_status.empty() - operation_status.error("Error configuring QC Utility Schema.") - error_message = e.args[0] - st.text_area("Error Details", value=error_message) - - def show_connection_form(self, selected_connection, mode, project_code): - flavor_options = ["redshift", "snowflake", "mssql", "postgresql"] - connection_options = ["Connect by Password", "Connect by Key-Pair"] - - left_column, right_column = st.columns([0.75, 0.25]) - - mid_column = st.columns(1)[0] - url_override_toogle_container = st.container() - bottom_left_column, bottom_right_column = st.columns([0.25, 0.75]) - button_left_column, button_right_column = st.columns([0.20, 0.80]) - connection_status_wrapper = st.container() - - connection_id = selected_connection["connection_id"] if mode == "edit" else None - connection_name = selected_connection["connection_name"] if mode == "edit" else "" - sql_flavor_index = flavor_options.index(selected_connection["sql_flavor"]) if mode == "edit" else 0 - project_port = selected_connection["project_port"] if mode == "edit" else "" - project_host = selected_connection["project_host"] if mode == "edit" else "" - project_db = selected_connection["project_db"] if mode == "edit" else "" - project_user = selected_connection["project_user"] if mode == "edit" else "" - url = selected_connection["url"] if mode == "edit" else "" - project_qc_schema = selected_connection["project_qc_schema"] if mode == "edit" else "qc" - password = selected_connection["password"] if mode == "edit" else "" - max_threads = selected_connection["max_threads"] if mode == "edit" else 4 - max_query_chars = selected_connection["max_query_chars"] if mode == "edit" else 10000 - connect_by_url = selected_connection["connect_by_url"] if mode == "edit" else False - connect_by_key = selected_connection["connect_by_key"] if mode == "edit" else False - connection_option_index = 1 if connect_by_key else 0 - private_key = selected_connection["private_key"] if mode == "edit" else None - private_key_passphrase = selected_connection["private_key_passphrase"] if mode == "edit" else "" - - new_connection = { - "connection_id": connection_id, - "project_code": project_code, - "private_key": private_key, - "private_key_passphrase": private_key_passphrase, - "password": password, - "url": url, - "max_threads": right_column.number_input( - label="Max Threads (Advanced Tuning)", - min_value=1, - max_value=8, - value=max_threads, - help=( - "Maximum number of concurrent threads that run tests. Default values should be retained unless " - "test queries are failing." - ), - key=f"connections:form:max-threads:{connection_id or 0}", - ), - "max_query_chars": right_column.number_input( - label="Max Expression Length (Advanced Tuning)", - min_value=500, - max_value=14000, - value=max_query_chars, - help="Some tests are consolidated into queries for maximum performance. Default values should be retained unless test queries are failing.", - key=f"connections:form:max-length:{connection_id or 0}", - ), - "connection_name": left_column.text_input( - label="Connection Name", - max_chars=40, - value=connection_name, - help="Your name for this connection. Can be any text.", - key=f"connections:form:name:{connection_id or 0}", - ), - "sql_flavor": left_column.selectbox( - label="SQL Flavor", - options=flavor_options, - index=sql_flavor_index, - help="The type of database server that you will connect to. This determines TestGen's drivers and SQL dialect.", - key=f"connections:form:flavor:{connection_id or 0}", - ) - } - - st.session_state.disable_url_widgets = connect_by_url - - new_connection["project_port"] = right_column.text_input( - label="Port", - max_chars=5, - value=project_port, - disabled=st.session_state.disable_url_widgets, - key=f"connections:form:port:{connection_id or 0}", - ) - new_connection["project_host"] = left_column.text_input( - label="Host", - max_chars=250, - value=project_host, - disabled=st.session_state.disable_url_widgets, - key=f"connections:form:host:{connection_id or 0}", - ) - new_connection["project_db"] = left_column.text_input( - label="Database", - max_chars=100, - value=project_db, - help="The name of the database defined on your host where your schemas and tables is present.", - disabled=st.session_state.disable_url_widgets, - key=f"connections:form:database:{connection_id or 0}", - ) - - new_connection["project_user"] = left_column.text_input( - label="User", - max_chars=50, - value=project_user, - help="Username to connect to your database.", - key=f"connections:form:user:{connection_id or 0}", - ) - - new_connection["project_qc_schema"] = right_column.text_input( - label="QC Utility Schema", - max_chars=50, - value=project_qc_schema, - help="The name of the schema on your database that will contain TestGen's profiling functions.", - key=f"connections:form:qcschema:{connection_id or 0}", - ) - - if new_connection["sql_flavor"] == "snowflake": - mid_column.divider() - - connection_option = mid_column.radio( - "Connection options", - options=connection_options, - index=connection_option_index, - horizontal=True, - help="Connection strategy", - key=f"connections:form:type_options:{connection_id or 0}", - ) - - new_connection["connect_by_key"] = connection_option == "Connect by Key-Pair" - password_column = mid_column - else: - new_connection["connect_by_key"] = False - password_column = left_column - - uploaded_file = None - - if new_connection["connect_by_key"]: - new_connection["private_key_passphrase"] = mid_column.text_input( - label="Private Key Passphrase", - type="password", - max_chars=200, - value=private_key_passphrase, - help="Passphrase used while creating the private Key (leave empty if not applicable)", - key=f"connections:form:passphrase:{connection_id or 0}", - ) - - uploaded_file = mid_column.file_uploader("Upload private key (rsa_key.p8)") - else: - new_connection["password"] = password_column.text_input( - label="Password", - max_chars=50, - type="password", - value=password, - help="Password to connect to your database.", - key=f"connections:form:password:{connection_id or 0}", - ) - - mid_column.divider() - - url_override_help_text = "If this switch is set to on, the connection string will be driven by the field below. " - if new_connection["connect_by_key"]: - url_override_help_text += "Only user name will be passed per the relevant fields above." - else: - url_override_help_text += "Only user name and password will be passed per the relevant fields above." - - def on_connect_by_url_change(): - value = st.session_state.connect_by_url_toggle - st.session_state.disable_url_widgets = value - - new_connection["connect_by_url"] = url_override_toogle_container.toggle( - "URL override", - value=connect_by_url, - key="connect_by_url_toggle", - help=url_override_help_text, - on_change=on_connect_by_url_change, - ) - - if new_connection["connect_by_url"]: - connection_string = connection_service.form_overwritten_connection_url(new_connection) - connection_string_beginning, connection_string_end = connection_string.split("@", 1) - connection_string_header = connection_string_beginning + "@" - connection_string_header = connection_string_header.replace("%3E", ">") - connection_string_header = connection_string_header.replace("%3C", "<") - - if not url: - url = connection_string_end - - new_connection["url"] = bottom_right_column.text_input( - label="URL Suffix", - max_chars=200, - value=url, - help="Provide a connection string directly. This will override connection parameters if the 'Connect by URL' switch is set.", - ) - - bottom_left_column.text_input(label="URL Prefix", value=connection_string_header, disabled=True) - - bottom_left_column.markdown("

 
", unsafe_allow_html=True) - - testgen.flex_row_end(button_right_column) - submit = button_right_column.button( - "Save" if mode == "edit" else "Add Connection", - disabled=authentication_service.current_user_has_read_role(), - ) - - if submit: - if not new_connection["password"] and not new_connection["connect_by_key"]: - st.error("Enter a valid password.") - else: - if uploaded_file: - new_connection["private_key"] = uploaded_file.getvalue().decode("utf-8") - - if mode == "edit": - connection_service.edit_connection(new_connection) - else: - connection_service.add_connection(new_connection) - success_message = ( - "Changes have been saved successfully. " - if mode == "edit" - else "New connection added successfully. " - ) - st.success(success_message) - time.sleep(1) - st.rerun() - - test_connection = button_left_column.button("Test Connection") - - if test_connection: - single_element_container = connection_status_wrapper.empty() - single_element_container.info("Connecting ...") - connection_status = self.test_connection(new_connection) - - with single_element_container.container(): - renderer = { - True: st.success, - False: st.error, - }[connection_status.successful] - - renderer(connection_status.message) - if not connection_status.successful and connection_status.details: - st.caption("Connection Error Details") - - with st.container(border=True): - st.markdown(connection_status.details) - else: - # This is needed to fix a strange bug in Streamlit when using dialog + input fields + button - # If an input field is changed and the button is clicked immediately (without unfocusing the input first), - # two fragment reruns happen successively, one for unfocusing the input and the other for clicking the button - # Some or all (it seems random) of the input fields disappear when this happens - time.sleep(0.1) - - def test_connection(self, connection: dict) -> "ConnectionStatus": - if connection["connect_by_key"] and connection["connection_id"] is None: - return ConnectionStatus( - message="Please add the connection before testing it (so that we can get your private key file).", - successful=False, - ) - - empty_cache() - try: - sql_query = "select 1;" - results = db.retrieve_target_db_data( - connection["sql_flavor"], - connection["project_host"], - connection["project_port"], - connection["project_db"], - connection["project_user"], - connection["password"], - connection["url"], - connection["connect_by_url"], - connection["connect_by_key"], - connection["private_key"], - connection["private_key_passphrase"], - sql_query, - ) - connection_successful = len(results) == 1 and results[0][0] == 1 - - if not connection_successful: - return ConnectionStatus(message="Error completing a query to the database server.", successful=False) - - qc_error_message = "The connection was successful, but there is an issue with the QC Utility Schema" - try: - qc_results = connection_service.test_qc_connection(connection["project_code"], connection) - if not all(qc_results): - return ConnectionStatus( - message=qc_error_message, - details=f"QC Utility Schema confirmation failed. details: {qc_results}", - successful=False, - ) - return ConnectionStatus(message="The connection was successful.", successful=True) - except Exception as error: - return ConnectionStatus(message=qc_error_message, details=error.args[0], successful=False) - except Exception as error: - return ConnectionStatus(message="Error attempting the Connection.", details=error.args[0], successful=False) - - -@dataclasses.dataclass(frozen=True, slots=True) -class ConnectionStatus: - message: str - successful: bool - details: str | None = dataclasses.field(default=None) diff --git a/testgen/ui/views/connections/__init__.py b/testgen/ui/views/connections/__init__.py new file mode 100644 index 0000000..76f8c37 --- /dev/null +++ b/testgen/ui/views/connections/__init__.py @@ -0,0 +1,3 @@ +from testgen.ui.views.connections.page import ConnectionsPage +from testgen.ui.views.connections.models import ConnectionStatus +from testgen.ui.views.connections.forms import BaseConnectionForm, PasswordConnectionForm, KeyPairConnectionForm diff --git a/testgen/ui/views/connections/forms.py b/testgen/ui/views/connections/forms.py new file mode 100644 index 0000000..942c42a --- /dev/null +++ b/testgen/ui/views/connections/forms.py @@ -0,0 +1,250 @@ +# type: ignore +import base64 +import typing + +from pydantic import computed_field +import streamlit as st +from streamlit.delta_generator import DeltaGenerator + +from testgen.ui.components import widgets as testgen +from testgen.ui.forms import BaseForm, Field, ManualRender +from testgen.ui.services import connection_service + +SQL_FLAVORS = ["redshift", "snowflake", "mssql", "postgresql"] +SQLFlavor = typing.Literal[*SQL_FLAVORS] + + +class BaseConnectionForm(BaseForm, ManualRender): + connection_name: str = Field( + default="", + min_length=3, + max_length=40, + st_kwargs_max_chars=40, + st_kwargs_label="Connection Name", + st_kwargs_help="Your name for this connection. Can be any text.", + ) + project_host: str = Field( + default="", + max_length=250, + st_kwargs_max_chars=250, + st_kwargs_label="Host", + ) + project_port: str = Field(default="", max_length=5, st_kwargs_max_chars=5, st_kwargs_label="Port") + project_db: str = Field( + default="", + max_length=100, + st_kwargs_max_chars=100, + st_kwargs_label="Database", + st_kwargs_help="The name of the database defined on your host where your schemas and tables is present.", + ) + project_user: str = Field( + default="", + max_length=50, + st_kwargs_max_chars=50, + st_kwargs_label="User", + st_kwargs_help="Username to connect to your database.", + ) + connect_by_url: bool = Field( + default=False, + st_kwargs_label="URL override", + st_kwargs_help=( + "If this switch is set to on, the connection string will be driven by the field below. " + "Only user name and password will be passed per the relevant fields above." + ), + ) + url_prefix: str = Field( + default="", + readOnly=True, + st_kwargs_label="URL Prefix", + ) + url: str = Field( + default="", + max_length=200, + st_kwargs_label="URL Suffix", + st_kwargs_max_chars=200, + st_kwargs_help=( + "Provide a connection string directly. This will override connection parameters if " + "the 'Connect by URL' switch is set." + ), + ) + max_threads: int = Field( + default=4, + ge=1, + le=8, + st_kwargs_min_value=1, + st_kwargs_max_value=8, + st_kwargs_label="Max Threads (Advanced Tuning)", + st_kwargs_help=( + "Maximum number of concurrent threads that run tests. Default values should be retained unless " + "test queries are failing." + ), + ) + max_query_chars: int = Field( + default=10000, + ge=500, + le=14000, + st_kwargs_label="Max Expression Length (Advanced Tuning)", + st_kwargs_min_value=500, + st_kwargs_max_value=14000, + st_kwargs_help=( + "Some tests are consolidated into queries for maximum performance. Default values should be retained " + "unless test queries are failing." + ), + ) + project_qc_schema: str = Field( + default="qc", + max_length=50, + st_kwargs_label="QC Utility Schema", + st_kwargs_max_chars=50, + st_kwargs_help="The name of the schema on your database that will contain TestGen's profiling functions.", + ) + + connection_id: int | None = Field(default=None) + + sql_flavor: SQLFlavor = Field( + ..., + st_kwargs_label="SQL Flavor", + st_kwargs_options=SQL_FLAVORS, + st_kwargs_help=( + "The type of database server that you will connect to. This determines TestGen's drivers and SQL dialect." + ), + ) + + def form_key(self): + return f"connection_form:{self.connection_id or 'new'}" + + def render_input_ui(self, container: DeltaGenerator, data: dict) -> typing.Self: + main_fields_container, optional_fields_container = container.columns([0.7, 0.3]) + + if self.get_field_value("connect_by_url", latest=True): + self.disable("project_host") + self.disable("project_port") + self.disable("project_db") + + self.render_field("sql_flavor", container=main_fields_container) + self.render_field("connection_name", container=main_fields_container) + host_field_container, port_field_container = main_fields_container.columns([0.6, 0.4]) + self.render_field("project_host", container=host_field_container) + self.render_field("project_port", container=port_field_container) + + self.render_field("project_db", container=main_fields_container) + self.render_field("project_user", container=main_fields_container) + self.render_field("project_qc_schema", container=optional_fields_container) + self.render_field("max_threads", container=optional_fields_container) + self.render_field("max_query_chars", container=optional_fields_container) + + self.render_extra(container, main_fields_container, optional_fields_container, data) + + testgen.divider(margin_top=8, margin_bottom=8, container=container) + + self.url_prefix = data.get("url_prefix", "") + self.render_field("connect_by_url") + if self.connect_by_url: + connection_string = connection_service.form_overwritten_connection_url(data) + connection_string_beginning, connection_string_end = connection_string.split("@", 1) + + self.update_field_value( + "url_prefix", + f"{connection_string_beginning}@".replace("%3E", ">").replace("%3C", "<"), + ) + if not data.get("url", ""): + self.update_field_value("url", connection_string_end) + + url_override_left_column, url_override_right_column = st.columns([0.25, 0.75]) + self.render_field("url_prefix", container=url_override_left_column) + self.render_field("url", container=url_override_right_column) + + return self + + def render_extra( + self, + container: DeltaGenerator, + left_fields_container: DeltaGenerator, + right_fields_container: DeltaGenerator, + data: dict, + ) -> None: + ... + + @staticmethod + def for_flavor(flavor: SQLFlavor) -> type["BaseConnectionForm"]: + return { + "redshift": PasswordConnectionForm, + "snowflake": KeyPairConnectionForm, + "mssql": PasswordConnectionForm, + "postgresql": PasswordConnectionForm, + }[flavor] + + +class PasswordConnectionForm(BaseConnectionForm): + password: str = Field( + default="", + max_length=50, + writeOnly=True, + st_kwargs_label="Password", + st_kwargs_max_chars=50, + st_kwargs_help="Password to connect to your database.", + ) + + def render_extra( + self, + container: DeltaGenerator, + left_fields_container: DeltaGenerator, + right_fields_container: DeltaGenerator, + data: dict, + ) -> None: + self.render_field("password", left_fields_container) + + +class KeyPairConnectionForm(PasswordConnectionForm): + connect_by_key: bool = Field(default=None) + private_key_passphrase: str = Field( + default="", + max_length=200, + writeOnly=True, + st_kwargs_max_chars=200, + st_kwargs_help=( + "Passphrase used while creating the private Key (leave empty if not applicable)" + ), + st_kwargs_label="Private Key Passphrase", + ) + private_key_inner: str = Field( + default="", + format="base64", + st_kwargs_label="Upload private key (rsa_key.p8)", + ) + + @computed_field + @property + def private_key(self) -> str: + if not self.private_key_inner: + return "" + return base64.b64decode(self.private_key_inner).decode("utf-8") + + def render_extra( + self, + container: DeltaGenerator, + left_fields_container: DeltaGenerator, + right_fields_container: DeltaGenerator, + data: dict, + ) -> None: + testgen.divider(margin_top=8, margin_bottom=8, container=container) + + connect_by_key = self.connect_by_key + if connect_by_key is None: + connect_by_key = self.get_field_value("connect_by_key") + + connection_option: typing.Literal["Connect by Password", "Connect by Key-Pair"] = container.radio( + "Connection options", + options=["Connect by Password", "Connect by Key-Pair"], + index=1 if connect_by_key else 0, + horizontal=True, + help="Connection strategy", + key=self.get_field_key("connection_option"), + ) + self.update_field_value("connect_by_key", connection_option == "Connect by Key-Pair") + + if connection_option == "Connect by Password": + self.render_field("password", container) + else: + self.render_field("private_key_passphrase", container) + self.render_field("private_key_inner", container) diff --git a/testgen/ui/views/connections/models.py b/testgen/ui/views/connections/models.py new file mode 100644 index 0000000..90f16ca --- /dev/null +++ b/testgen/ui/views/connections/models.py @@ -0,0 +1,8 @@ +import dataclasses + + +@dataclasses.dataclass(frozen=True, slots=True) +class ConnectionStatus: + message: str + successful: bool + details: str | None = dataclasses.field(default=None) diff --git a/testgen/ui/views/connections/page.py b/testgen/ui/views/connections/page.py new file mode 100644 index 0000000..770b764 --- /dev/null +++ b/testgen/ui/views/connections/page.py @@ -0,0 +1,444 @@ +from functools import partial +import logging +import os +import time +import typing + +from pydantic import ValidationError +import streamlit as st +from streamlit.delta_generator import DeltaGenerator +import streamlit_pydantic as sp + +import testgen.ui.services.database_service as db +from testgen.ui.services import table_group_service +from testgen.commands.run_setup_profiling_tools import get_setup_profiling_tools_queries +from testgen.commands.run_profiling_bridge import run_profiling_in_background +from testgen.common.database.database_service import empty_cache +from testgen.ui.components import widgets as testgen +from testgen.ui.views.connections.forms import BaseConnectionForm +from testgen.ui.views.table_groups.forms import TableGroupForm +from testgen.ui.navigation.menu import MenuItem +from testgen.ui.navigation.page import Page +from testgen.ui.services import connection_service +from testgen.ui.session import session, temp_value +from testgen.ui.views.connections.models import ConnectionStatus + +LOG = logging.getLogger("testgen") + + +class ConnectionsPage(Page): + path = "connections" + can_activate: typing.ClassVar = [ + lambda: session.authentication_status, + ] + menu_item = MenuItem(icon="database", label="Data Configuration", order=4) + + def render(self, project_code: str, **_kwargs) -> None: + dataframe = connection_service.get_connections(project_code) + connection = dataframe.iloc[1] + has_table_groups = ( + len(connection_service.get_table_group_names_by_connection([connection["connection_id"]]) or []) > 0 + ) + + testgen.page_header( + "Connection", + "https://docs.datakitchen.io/article/dataops-testgen-help/connect-your-database", + ) + + _, actions_column = st.columns([.1, .9]) + testgen.flex_row_end(actions_column) + + with st.container(border=True): + self.show_connection_form(connection.to_dict(), "edit", project_code) + + if has_table_groups: + with actions_column: + testgen.link( + href="connections:table-groups", + params={"connection_id": str(connection["connection_id"])}, + label="Table Groups", + right_icon="chevron_right", + style="margin-left: auto;", + ) + else: + with actions_column: + testgen.button( + type_="stroked", + color="basic", + label="Setup Table Groups", + style="background: white;", + width=200, + on_click=lambda: self.setup_data_configuration(project_code, connection.to_dict()), + ) + + def show_connection_form(self, selected_connection: dict, mode, project_code) -> None: + connection = selected_connection or {} + connection_id = connection.get("connection_id", None) + sql_flavor = connection.get("sql_flavor", "postgresql") + data = {} + + try: + form = BaseConnectionForm.for_flavor(sql_flavor).model_construct(sql_flavor=sql_flavor) + if connection: + connection["password"] = connection["password"] or "" + form = BaseConnectionForm.for_flavor(sql_flavor)(**connection) + + sql_flavor = form.get_field_value("sql_flavor", latest=True) or sql_flavor + if form.sql_flavor != sql_flavor: + form = BaseConnectionForm.for_flavor(sql_flavor)(sql_flavor=sql_flavor) + + form_errors_container = st.empty() + data = sp.pydantic_input( + key=f"connection_form:{connection_id or 'new'}", + model=form, # type: ignore + ) + data.update({ + "project_code": project_code, + }) + if "private_key" not in data: + data.update({ + "connect_by_key": False, + "private_key_passphrase": None, + "private_key": None, + }) + + try: + BaseConnectionForm.for_flavor(sql_flavor).model_validate(data) + except ValidationError as error: + form_errors_container.warning("\n".join([ + f"- {field_label}: {err['msg']}" for err in error.errors() + if (field_label := TableGroupForm.get_field_label(str(err['loc'][0]))) + ])) + except Exception: + LOG.exception("unexpected form validation error") + st.error("Unexpected error displaying the form. Try again") + + test_button_column, config_qc_column, _, save_button_column = st.columns([.2, .2, .4, .2]) + is_submitted, set_submitted = temp_value(f"connection_form-{connection_id or 'new'}:submit") + get_connection_status, set_connection_status = temp_value( + f"connection_form-{connection_id or 'new'}:test_conn" + ) + + with save_button_column: + testgen.button( + type_="flat", + label="Save", + key=f"connection_form:{connection_id or 'new'}:submit", + on_click=lambda: set_submitted(True), + ) + + with test_button_column: + testgen.button( + type_="stroked", + color="basic", + label="Test Connection", + key=f"connection_form:{connection_id or 'new'}:test", + on_click=lambda: set_connection_status(self.test_connection(data)), + ) + + with config_qc_column: + testgen.button( + type_="stroked", + color="basic", + label="Configure QC Utility Schema", + key=f"connection_form:{connection_id or 'new'}:config-qc-schema", + tooltip="Creates the required Utility schema and related functions in the target database", + on_click=lambda: self.create_qc_schema_dialog(connection) + ) + + if (connection_status := get_connection_status()): + single_element_container = st.empty() + single_element_container.info("Connecting ...") + + with single_element_container.container(): + renderer = { + True: st.success, + False: st.error, + }[connection_status.successful] + + renderer(connection_status.message) + if not connection_status.successful and connection_status.details: + st.caption("Connection Error Details") + + with st.container(border=True): + st.markdown(connection_status.details) + + connection_status = None + else: + # This is needed to fix a strange bug in Streamlit when using dialog + input fields + button + # If an input field is changed and the button is clicked immediately (without unfocusing the input first), + # two fragment reruns happen successively, one for unfocusing the input and the other for clicking the button + # Some or all (it seems random) of the input fields disappear when this happens + time.sleep(0.1) + + if is_submitted(): + if not data.get("password") and not data.get("connect_by_key"): + st.error("Enter a valid password.") + else: + if data.get("private_key"): + data["private_key"] = data["private_key"].getvalue().decode("utf-8") + + connection_service.edit_connection(data) + st.success("Changes have been saved successfully.") + time.sleep(1) + st.rerun() + + def test_connection(self, connection: dict) -> "ConnectionStatus": + if connection["connect_by_key"] and connection["connection_id"] is None: + return ConnectionStatus( + message="Please add the connection before testing it (so that we can get your private key file).", + successful=False, + ) + + empty_cache() + try: + sql_query = "select 1;" + results = db.retrieve_target_db_data( + connection["sql_flavor"], + connection["project_host"], + connection["project_port"], + connection["project_db"], + connection["project_user"], + connection["password"], + connection["url"], + connection["connect_by_url"], + connection["connect_by_key"], + connection["private_key"], + connection["private_key_passphrase"], + sql_query, + ) + connection_successful = len(results) == 1 and results[0][0] == 1 + + if not connection_successful: + return ConnectionStatus(message="Error completing a query to the database server.", successful=False) + + qc_error_message = "The connection was successful, but there is an issue with the QC Utility Schema" + try: + qc_results = connection_service.test_qc_connection(connection["project_code"], connection) + if not all(qc_results): + return ConnectionStatus( + message=qc_error_message, + details=f"QC Utility Schema confirmation failed. details: {qc_results}", + successful=False, + ) + return ConnectionStatus(message="The connection was successful.", successful=True) + except Exception as error: + return ConnectionStatus(message=qc_error_message, details=error.args[0], successful=False) + except Exception as error: + return ConnectionStatus(message="Error attempting the Connection.", details=error.args[0], successful=False) + + @st.dialog(title="Configure QC Utility Schema") + def create_qc_schema_dialog(self, selected_connection): + connection_id = selected_connection["connection_id"] + project_qc_schema = selected_connection["project_qc_schema"] + sql_flavor = selected_connection["sql_flavor"] + user = selected_connection["project_user"] + + create_qc_schema = st.toggle("Create QC Utility Schema", value=True) + grant_privileges = st.toggle("Grant access privileges to TestGen user", value=True) + + user_role = None + + # TODO ALEX: This textbox may be needed if we want to grant permissions to user role + # if sql_flavor == "snowflake": + # user_role_textbox_label = f"Primary role for database user {user}" + # user_role = st.text_input(label=user_role_textbox_label, max_chars=100) + + admin_credentials_expander = st.expander("Admin credential options", expanded=True) + with admin_credentials_expander: + admin_connection_option_index = 0 + admin_connection_options = ["Do not use admin credentials", "Use admin credentials with Password"] + if sql_flavor == "snowflake": + admin_connection_options.append("Use admin credentials with Key-Pair") + + admin_connection_option = st.radio( + "Admin credential options", + label_visibility="hidden", + options=admin_connection_options, + index=admin_connection_option_index, + horizontal=True, + ) + + st.markdown("

 
", unsafe_allow_html=True) + + db_user = None + db_password = None + admin_private_key_passphrase = None + admin_private_key = None + if admin_connection_option == admin_connection_options[0]: + st.markdown(":orange[User created in the connection dialog will be used.]") + else: + db_user = st.text_input(label="Admin db user", max_chars=40) + if admin_connection_option == admin_connection_options[1]: + db_password = st.text_input( + label="Admin db password", max_chars=40, type="password" + ) + st.markdown(":orange[Note: Admin credentials are not stored, are only used for this operation.]") + + if len(admin_connection_options) > 2 and admin_connection_option == admin_connection_options[2]: + admin_private_key_passphrase = st.text_input( + label="Private Key Passphrase", + key="create-qc-schema-private-key-password", + type="password", + max_chars=200, + help="Passphrase used while creating the private Key (leave empty if not applicable)", + ) + + admin_uploaded_file = st.file_uploader("Upload private key (rsa_key.p8)", key="admin-uploaded-file") + if admin_uploaded_file: + admin_private_key = admin_uploaded_file.getvalue().decode("utf-8") + + st.markdown(":orange[Note: Admin credentials are not stored, are only used for this operation.]") + + submit = st.button("Update Configuration") + + if submit: + empty_cache() + script_expander = st.expander("Script Details") + + operation_status = st.empty() + operation_status.info(f"Configuring QC Utility Schema '{project_qc_schema}'...") + + try: + skip_granting_privileges = not grant_privileges + queries = get_setup_profiling_tools_queries(sql_flavor, create_qc_schema, skip_granting_privileges, project_qc_schema, user, user_role) + with script_expander: + st.code( + os.linesep.join(queries), + language="sql", + line_numbers=True) + + connection_service.create_qc_schema( + connection_id, + create_qc_schema, + db_user if db_user else None, + db_password if db_password else None, + skip_granting_privileges, + admin_private_key_passphrase=admin_private_key_passphrase, + admin_private_key=admin_private_key, + user_role=user_role, + ) + operation_status.empty() + operation_status.success("Operation has finished successfully.") + + except Exception as e: + operation_status.empty() + operation_status.error("Error configuring QC Utility Schema.") + error_message = e.args[0] + st.text_area("Error Details", value=error_message) + + @st.dialog(title="Data Configuration Setup") + def setup_data_configuration(self, project_code: str, connection: dict) -> None: + will_run_profiling = st.session_state.get("connection_form-new:run-profiling-toggle", True) + testgen.wizard( + key="connections:setup-wizard", + steps=[ + testgen.WizardStep( + title="Create a Table Group", + body=partial(self.create_table_group_step, project_code, connection), + ), + testgen.WizardStep( + title="Run Profiling", + body=self.run_data_profiling_step, + ), + ], + on_complete=self.execute_setup, + complete_label="Save & Run Profiling" if will_run_profiling else "Finish Setup", + navigate_to=st.session_state.pop("setup_data_config:navigate-to", None), + navigate_to_args=st.session_state.pop("setup_data_config:navigate-to-args", {}), + ) + + def create_table_group_step(self, project_code: str, connection: dict) -> tuple[dict | None, bool]: + is_valid: bool = True + data: dict = {} + + try: + form = TableGroupForm.model_construct() + form_errors_container = st.empty() + data = sp.pydantic_input(key="table_form:new", model=form) # type: ignore + + try: + TableGroupForm.model_validate(data) + form_errors_container.empty() + data.update({"project_code": project_code, "connection_id": connection["connection_id"]}) + except ValidationError as error: + form_errors_container.warning("\n".join([ + f"- {field_label}: {err['msg']}" for err in error.errors() + if (field_label := TableGroupForm.get_field_label(str(err['loc'][0]))) + ])) + is_valid = False + except Exception: + LOG.exception("unexpected form validation error") + st.error("Unexpected error displaying the form. Try again") + is_valid = False + + return data, is_valid + + def run_data_profiling_step(self, step_0: testgen.WizardStep | None = None) -> tuple[bool, bool]: + if not step_0 or not step_0.results: + st.error("A table group is required to complete this step.") + return False, False + + run_profiling = True + profiling_message = "Profiling will be performed in a background process." + table_group = step_0.results + + with st.container(): + run_profiling = st.checkbox( + label=f"Execute profiling for the table group **{table_group['table_groups_name']}**?", + key="connection_form-new:run-profiling-toggle", + value=True, + ) + if not run_profiling: + profiling_message = ( + "Profiling will be skipped. You can run this step later from the Profiling Runs page." + ) + st.markdown(f":material/info: _{profiling_message}_") + + return run_profiling, True + + def execute_setup( + self, + container: DeltaGenerator, + step_0: testgen.WizardStep[dict], + step_1: testgen.WizardStep[bool], + ) -> bool: + table_group = step_0.results + table_group_name: str = table_group["table_groups_name"] + should_run_profiling: bool = step_1.results + + with container.container(): + status_container = st.empty() + + try: + status_container.info(f"Creating table group **{table_group_name.strip()}**.") + table_group_id = table_group_service.add(table_group) + TableGroupForm.model_construct().reset_cache() + except Exception as err: + status_container.error(f"Error creating table group: {err!s}.") + + if should_run_profiling: + try: + status_container.info("Starting profiling run ...") + run_profiling_in_background(table_group_id) + status_container.success(f"Profiling run started for table group **{table_group_name.strip()}**.") + except Exception as err: + status_container.error(f"Profiling run encountered errors: {err!s}.") + + _, link_column = st.columns([.7, .3]) + with link_column: + testgen.button( + type_="stroked", + color="primary", + label="Go to Profiling Runs", + icon="chevron_right", + key="setup_data_config:keys:go-to-runs", + on_click=lambda: ( + st.session_state.__setattr__("setup_data_config:navigate-to", "profiling-runs") + or st.session_state.__setattr__("setup_data_config:navigate-to-args", { + "table_group": table_group_id + }) + ), + ) + + return not should_run_profiling diff --git a/testgen/ui/views/table_groups/__init__.py b/testgen/ui/views/table_groups/__init__.py new file mode 100644 index 0000000..99df82c --- /dev/null +++ b/testgen/ui/views/table_groups/__init__.py @@ -0,0 +1,2 @@ +from testgen.ui.views.table_groups.page import TableGroupsPage +# from testgen.ui.views.table_groups.forms import ... diff --git a/testgen/ui/views/table_groups/forms.py b/testgen/ui/views/table_groups/forms.py new file mode 100644 index 0000000..9087307 --- /dev/null +++ b/testgen/ui/views/table_groups/forms.py @@ -0,0 +1,170 @@ +# type: ignore +import typing + +from streamlit.delta_generator import DeltaGenerator + +from testgen.ui.components import widgets as testgen +from testgen.ui.forms import BaseForm, Field, ManualRender + +SQLFlavor = typing.Literal["redshift", "snowflake", "mssql", "postgresql"] + + +class TableGroupForm(BaseForm, ManualRender): + table_groups_name: str = Field( + default="", + min_length=1, + max_length=40, + st_kwargs_label="Name", + st_kwargs_max_chars=40, + st_kwargs_help="A unique name to describe the table group", + ) + profiling_include_mask: str = Field( + default="%", + max_length=40, + st_kwargs_label="Tables to Include Mask", + st_kwargs_max_chars=40, + st_kwargs_help="A SQL filter supported by your database's LIKE operator for table names to include", + ) + profiling_exclude_mask: str = Field( + default="tmp%", + st_kwargs_label="Tables to Exclude Mask", + st_kwargs_max_chars=40, + st_kwargs_help="A SQL filter supported by your database's LIKE operator for table names to exclude", + ) + profiling_table_set: str = Field( + default="", + st_kwargs_label="Explicit Table List", + st_kwargs_max_chars=2000, + st_kwargs_help="A list of specific table names to include, separated by commas", + ) + table_group_schema: str = Field( + default="", + min_length=1, + max_length=40, + st_kwargs_label="Schema", + st_kwargs_max_chars=40, + st_kwargs_help="The database schema containing the tables in the Table Group", + ) + profile_id_column_mask: str = Field( + default="%_id", + st_kwargs_label="Profiling ID column mask", + st_kwargs_max_chars=40, + st_kwargs_help="A SQL filter supported by your database's LIKE operator representing ID columns (optional)", + ) + profile_sk_column_mask: str = Field( + default="%_sk", + st_kwargs_label="Profiling Surrogate Key column mask", + st_kwargs_max_chars=40, + st_kwargs_help="A SQL filter supported by your database's LIKE operator representing surrogate key columns (optional)", + ) + profiling_delay_days: int = Field( + default=0, + st_kwargs_label="Min Profiling Age, Days", + st_kwargs_min_value=0, + st_kwargs_max_value=999, + st_kwargs_help="The number of days to wait before new profiling will be available to generate tests", + ) + profile_use_sampling: bool = Field( + default=True, + st_kwargs_label="Use profile sampling", + st_kwargs_help="Toggle on to base profiling on a sample of records instead of the full table", + ) + profile_sample_percent: int = Field( + default=30, + st_kwargs_label="Sample percent", + st_kwargs_min_value=1, + st_kwargs_max_value=100, + st_kwargs_help="Percent of records to include in the sample, unless the calculated count falls below the specified minimum.", + ) + profile_sample_min_count: int = Field( + default=15000, + st_kwargs_label="Min Sample Record Count", + st_kwargs_min_value=1, + st_kwargs_max_value=1000000, + st_kwargs_help="The minimum number of records to be included in any sample (if available)", + ) + data_source: str = Field( + default="", + st_kwargs_label="Data Source", + st_kwargs_max_chars=40, + st_kwargs_help="Original source of all tables in this dataset. This can be overridden at the table level. (Optional)", + ) + source_system: str = Field( + default="", + st_kwargs_label="System of Origin", + st_kwargs_max_chars=40, + st_kwargs_help="Enterprise system source for all tables in this dataset. " + "This can be overridden at the table level. (Optional)", + ) + business_domain: str = Field( + default="", + st_kwargs_label="Business Domain", + st_kwargs_max_chars=40, + st_kwargs_help="Business division responsible for all tables in this dataset. " + "e.g. Finance, Sales, Manufacturing. (Optional)", + ) + data_location: str = Field( + default="", + st_kwargs_label="Location", + st_kwargs_max_chars=40, + st_kwargs_help="Physical or virtual location of all tables in this dataset. " + "e.g. Headquarters, Cloud, etc. (Optional)", + ) + transform_level: str = Field( + default="", + st_kwargs_label="Transform Level", + st_kwargs_max_chars=40, + st_kwargs_help="Data warehouse processing layer. " + "Indicates the processing stage: e.g. Raw, Conformed, Processed, Reporting. (Optional)", + ) + source_process: str = Field( + default="", + st_kwargs_label="Source Process", + st_kwargs_max_chars=40, + st_kwargs_help="The process, program or data flow that produced this data. (Optional)", + ) + stakeholder_group: str = Field( + default="", + st_kwargs_label="Stakeholder Group", + st_kwargs_max_chars=40, + st_kwargs_help="Designator for data owners or stakeholders who are responsible for this data. (Optional)", + ) + table_group_id: int | None = Field(default=None) + + def form_key(self): + return f"table_group_form:{self.table_group_id or 'new'}" + + def render_input_ui(self, container: DeltaGenerator, data: dict) -> typing.Self: + left_column, right_column = container.columns([.5, .5]) + + self.render_field("table_groups_name", left_column) + self.render_field("profiling_include_mask", left_column) + self.render_field("profiling_exclude_mask", left_column) + self.render_field("profiling_table_set", left_column) + + self.render_field("table_group_schema", right_column) + self.render_field("profile_id_column_mask", right_column) + self.render_field("profile_sk_column_mask", right_column) + self.render_field("profiling_delay_days", right_column) + + self.render_field("profile_use_sampling", container) + profile_sampling_expander = container.expander("Sampling Parameters", expanded=False) + with profile_sampling_expander: + expander_left_column, expander_right_column = profile_sampling_expander.columns([0.50, 0.50]) + self.render_field("profile_sample_percent", expander_left_column) + self.render_field("profile_sample_min_count", expander_right_column) + + provenance_expander = container.expander("Data Provenance (Optional)", expanded=False) + with provenance_expander: + provenance_left_column, provenance_right_column = provenance_expander.columns([0.50, 0.50]) + + self.render_field("data_source", provenance_left_column) + self.render_field("source_system", provenance_left_column) + self.render_field("business_domain", provenance_left_column) + self.render_field("data_location", provenance_left_column) + + self.render_field("transform_level", provenance_right_column) + self.render_field("source_process", provenance_right_column) + self.render_field("stakeholder_group", provenance_right_column) + + return self diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups/page.py similarity index 99% rename from testgen/ui/views/table_groups.py rename to testgen/ui/views/table_groups/page.py index e62787c..7b9e8a9 100644 --- a/testgen/ui/views/table_groups.py +++ b/testgen/ui/views/table_groups/page.py @@ -29,7 +29,7 @@ class TableGroupsPage(Page): def render(self, connection_id: str, **_kwargs) -> None: connection = connection_service.get_by_id(connection_id, hide_passwords=False) if not connection: - self.router.navigate_with_warning( + return self.router.navigate_with_warning( f"Connection with ID '{connection_id}' does not exist. Redirecting to list of Connections ...", "connections", ) @@ -40,7 +40,7 @@ def render(self, connection_id: str, **_kwargs) -> None: testgen.page_header( "Table Groups", "https://docs.datakitchen.io/article/dataops-testgen-help/create-a-table-group", - breadcrumbs=[ + breadcrumbs=[ # type: ignore { "label": "Connections", "path": "connections", "params": { "project_code": project_code } }, { "label": connection["connection_name"] }, ], diff --git a/testgen/utils/singleton.py b/testgen/utils/singleton.py index 0c87de3..722f7f2 100644 --- a/testgen/utils/singleton.py +++ b/testgen/utils/singleton.py @@ -2,9 +2,9 @@ class SingletonType(type): - _instances: typing.ClassVar[dict[type, object]] = {} + _instances: typing.ClassVar[dict[type, typing.Any]] = {} - def __call__(cls, *args, **kwargs) -> typing.Any: + def __call__(cls, *args, **kwargs): if cls not in cls._instances: cls._instances[cls] = super().__call__(*args, **kwargs) return cls._instances[cls] From 4d11dc1bb505bd7f5209c6cbd97483cd57e9368c Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Tue, 22 Oct 2024 10:27:36 -0400 Subject: [PATCH 33/91] feat(ui): add a database flavor selector component --- testgen/ui/assets.py | 19 +++ testgen/ui/assets/flavors/mssql.svg | 123 +++++++++++++++ testgen/ui/assets/flavors/postgresql.svg | 100 ++++++++++++ testgen/ui/assets/flavors/redshift.svg | 75 +++++++++ testgen/ui/assets/flavors/snowflake.svg | 97 ++++++++++++ .../frontend/js/components/flavor_selector.js | 145 ++++++++++++++++++ testgen/ui/components/frontend/js/main.js | 2 + testgen/ui/components/widgets/__init__.py | 2 +- testgen/ui/components/widgets/button.py | 2 +- testgen/ui/components/widgets/wizard.py | 4 +- testgen/ui/forms.py | 10 +- testgen/ui/queries/connection_queries.py | 21 ++- testgen/ui/services/connection_service.py | 10 +- testgen/ui/services/database_service.py | 5 +- testgen/ui/session.py | 3 +- testgen/ui/views/connections/__init__.py | 6 +- testgen/ui/views/connections/forms.py | 22 +-- testgen/ui/views/connections/page.py | 23 ++- testgen/ui/views/table_groups/__init__.py | 4 +- testgen/ui/views/table_groups/forms.py | 3 +- 20 files changed, 629 insertions(+), 47 deletions(-) create mode 100644 testgen/ui/assets.py create mode 100644 testgen/ui/assets/flavors/mssql.svg create mode 100644 testgen/ui/assets/flavors/postgresql.svg create mode 100644 testgen/ui/assets/flavors/redshift.svg create mode 100644 testgen/ui/assets/flavors/snowflake.svg create mode 100644 testgen/ui/components/frontend/js/components/flavor_selector.js diff --git a/testgen/ui/assets.py b/testgen/ui/assets.py new file mode 100644 index 0000000..9ea10f1 --- /dev/null +++ b/testgen/ui/assets.py @@ -0,0 +1,19 @@ +import pathlib + +from streamlit.elements.image import WidthBehaviour, image_to_url + + +def get_asset_path(path: str) -> str: + return (pathlib.Path(__file__).parent / "assets" / path).as_posix() + + +def get_asset_data_url(path: str) -> str: + absolute_path = get_asset_path(path) + return image_to_url( + absolute_path, + int(WidthBehaviour.ORIGINAL), + clamp=False, + channels="RGB", + output_format="auto", + image_id=path, + ) diff --git a/testgen/ui/assets/flavors/mssql.svg b/testgen/ui/assets/flavors/mssql.svg new file mode 100644 index 0000000..c6333d9 --- /dev/null +++ b/testgen/ui/assets/flavors/mssql.svg @@ -0,0 +1,123 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/testgen/ui/assets/flavors/postgresql.svg b/testgen/ui/assets/flavors/postgresql.svg new file mode 100644 index 0000000..7db671a --- /dev/null +++ b/testgen/ui/assets/flavors/postgresql.svg @@ -0,0 +1,100 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + diff --git a/testgen/ui/assets/flavors/redshift.svg b/testgen/ui/assets/flavors/redshift.svg new file mode 100644 index 0000000..26bcc27 --- /dev/null +++ b/testgen/ui/assets/flavors/redshift.svg @@ -0,0 +1,75 @@ + + + + + + image/svg+xml + + Icon-Architecture/32/Arch_Amazon-Redshift_32 + + + + + + Icon-Architecture/32/Arch_Amazon-Redshift_32 + + + + + + + diff --git a/testgen/ui/assets/flavors/snowflake.svg b/testgen/ui/assets/flavors/snowflake.svg new file mode 100644 index 0000000..955c3d2 --- /dev/null +++ b/testgen/ui/assets/flavors/snowflake.svg @@ -0,0 +1,97 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/testgen/ui/components/frontend/js/components/flavor_selector.js b/testgen/ui/components/frontend/js/components/flavor_selector.js new file mode 100644 index 0000000..72e06ec --- /dev/null +++ b/testgen/ui/components/frontend/js/components/flavor_selector.js @@ -0,0 +1,145 @@ +/** + * @typedef Falvor + * @type {object} + * @property {string} label + * @property {string} value + * @property {string} icon + * @property {(boolean|null)} selected + * + * @typedef Properties + * @type {object} + * @property {Array.} flavors + * @property {string} selected + * @property {(number|null)} columns + */ + +import van from '../van.min.js'; +import { Streamlit } from '../streamlit.js'; + +const headerHeight = 35; +const rowGap = 16; +const rowHeight = 64; +const columnSize = '200px'; +const { div, span, img, h3 } = van.tags; + +const DatabaseFlavorSelector = (/** @type Properties */props) => { + const flavors = van.val(props.flavors); + const numberOfColumns = van.val(props.columns) ?? 3; + const numberOfRows = Math.ceil(flavors.length / numberOfColumns); + const selectedFlavor = van.state(van.val(props.selected)); + + window.testgen.isPage = true; + Streamlit.setFrameHeight( + headerHeight + + rowHeight * numberOfRows + + rowGap * (numberOfRows / 2) + ); + + if (!window.testgen.loadedStylesheets.databaseFlavorSelector) { + document.adoptedStyleSheets.push(stylesheet); + window.testgen.loadedStylesheets.databaseFlavorSelector = true; + } + + return div( + {class: 'tg-flavor-selector-page'}, + h3( + {class: 'tg-flavor-selector-header'}, + 'Select a database flavor' + ), + () => { + return div( + { + class: 'tg-flavor-selector', + style: `grid-template-columns: ${Array(numberOfColumns).fill(columnSize).join(' ')}; row-gap: ${rowGap}px;` + }, + flavors.map(flavor => + DatabaseFlavor( + { + label: van.state(flavor.label), + value: van.state(flavor.value), + icon: van.state(flavor.icon), + selected: van.derive(() => selectedFlavor.val === flavor.value), + }, + () => { + selectedFlavor.val = flavor.value; + Streamlit.sendData(flavor.value); + }, + ) + ), + ); + }, + ); +}; + +const DatabaseFlavor = ( + /** @type Falvor */ props, + /** @type Function */ onClick, +) => { + return div( + { + class: `tg-flavor ${props.selected.val ? 'selected' : ''}`, + onclick: onClick, + }, + span({class: 'tg-flavor-focus-state-indicator'}, ''), + img( + {class: 'tg-flavor--icon', src: props.icon}, + ), + span( + {class: 'tg-flavor--label'}, + props.label + ), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` + .tg-flavor-selector-header { + margin: unset; + margin-bottom: 16px; + font-weight: 400; + } + + .tg-flavor-selector { + display: grid; + grid-template-rows: auto; + column-gap: 32px; + } + + .tg-flavor { + display: flex; + align-items: center; + padding: 16px; + border: 1px solid var(--border-color); + border-radius: 4px; + cursor: pointer; + position: relative; + } + + .tg-flavor .tg-flavor-focus-state-indicator::before { + content: ""; + opacity: 0; + top: 0; + left: 0; + right: 0; + bottom: 0; + position: absolute; + pointer-events: none; + border-radius: inherit; + background: var(--button-primary-hover-state-background); + } + + .tg-flavor:hover .tg-flavor-focus-state-indicator::before, + .tg-flavor.selected .tg-flavor-focus-state-indicator::before { + opacity: var(--button-hover-state-opacity); + } + + .tg-flavor--icon { + margin-right: 16px; + } + + .tg-flavor--label { + font-weight: 500; + } +`); + +export { DatabaseFlavorSelector }; diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js index 9c42de7..3dc7f62 100644 --- a/testgen/ui/components/frontend/js/main.js +++ b/testgen/ui/components/frontend/js/main.js @@ -16,6 +16,7 @@ import { Select } from './components/select.js' import { SortingSelector } from './components/sorting_selector.js'; import { TestRuns } from './pages/test_runs.js'; import { ProfilingRuns } from './pages/profiling_runs.js'; +import { DatabaseFlavorSelector } from './components/flavor_selector.js'; let currentWindowVan = van; let topWindowVan = window.top.van; @@ -32,6 +33,7 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props) sidebar: window.top.testgen.components.Sidebar, test_runs: TestRuns, profiling_runs: ProfilingRuns, + database_flavor_selector: DatabaseFlavorSelector, }; if (Object.keys(componentById).includes(id)) { diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py index d58047e..c2d490c 100644 --- a/testgen/ui/components/widgets/__init__.py +++ b/testgen/ui/components/widgets/__init__.py @@ -24,4 +24,4 @@ from testgen.ui.components.widgets.sorting_selector import sorting_selector from testgen.ui.components.widgets.summary_bar import summary_bar from testgen.ui.components.widgets.testgen_component import testgen_component -from testgen.ui.components.widgets.wizard import wizard, WizardStep +from testgen.ui.components.widgets.wizard import WizardStep, wizard diff --git a/testgen/ui/components/widgets/button.py b/testgen/ui/components/widgets/button.py index 3c32630..d96b588 100644 --- a/testgen/ui/components/widgets/button.py +++ b/testgen/ui/components/widgets/button.py @@ -43,7 +43,7 @@ def button( if width: props.update({"width": width}) - if isinstance(width, (int, float,)): + if isinstance(width, int | float): props.update({"width": f"{width}px"}) if style: diff --git a/testgen/ui/components/widgets/wizard.py b/testgen/ui/components/widgets/wizard.py index 8a055f2..1b87da1 100644 --- a/testgen/ui/components/widgets/wizard.py +++ b/testgen/ui/components/widgets/wizard.py @@ -1,6 +1,6 @@ import dataclasses -import logging import inspect +import logging import typing import streamlit as st @@ -205,7 +205,7 @@ def render(self) -> None: @dataclasses.dataclass(kw_only=True, slots=True) -class WizardStep[ResultsType]: +class WizardStep(typing.Generic[ResultsType]): body: typing.Callable[..., StepResults] results: ResultsType = dataclasses.field(default=None) title: str = dataclasses.field(default="") diff --git a/testgen/ui/forms.py b/testgen/ui/forms.py index 61a7120..6a5bc0a 100644 --- a/testgen/ui/forms.py +++ b/testgen/ui/forms.py @@ -1,7 +1,7 @@ import typing import streamlit as st -from pydantic import BaseModel, Field +from pydantic import BaseModel from pydantic.json_schema import DEFAULT_REF_TEMPLATE, GenerateJsonSchema, JsonSchemaMode from streamlit.delta_generator import DeltaGenerator from streamlit_pydantic.ui_renderer import InputUI @@ -19,7 +19,7 @@ def empty(cls) -> typing.Self: return non_validated_instance @property - def _disabled_fields(self) -> typing.Set[str]: + def _disabled_fields(self) -> set[str]: if not getattr(self, "_disabled_fields_set", None): self._disabled_fields_set = set() return self._disabled_fields_set @@ -36,7 +36,7 @@ def model_json_schema( by_alias: bool = True, ref_template: str = DEFAULT_REF_TEMPLATE, schema_generator: type[GenerateJsonSchema] = GenerateJsonSchema, - mode: JsonSchemaMode = 'validation', + mode: JsonSchemaMode = "validation", ) -> dict[str, typing.Any]: schema = super().model_json_schema( by_alias=by_alias, @@ -76,10 +76,10 @@ def input_ui(self): return self._input_ui def form_key(self): - raise NotImplementedError() + raise NotImplementedError def render_input_ui(self, container: DeltaGenerator, session_state: dict) -> typing.Self: - raise NotImplementedError() + raise NotImplementedError def render_field(self, field_name: str, container: DeltaGenerator | None = None) -> typing.Any: streamlit_container = container or self.input_ui._streamlit_container diff --git a/testgen/ui/queries/connection_queries.py b/testgen/ui/queries/connection_queries.py index dc10bed..e03dfcc 100644 --- a/testgen/ui/queries/connection_queries.py +++ b/testgen/ui/queries/connection_queries.py @@ -1,3 +1,5 @@ +from typing import cast + import pandas as pd import streamlit as st @@ -68,8 +70,13 @@ def edit_connection(schema, connection, encrypted_password, encrypted_private_ke st.cache_data.clear() -def add_connection(schema, connection, encrypted_password, encrypted_private_key, encrypted_private_key_passphrase): - +def add_connection( + schema: str, + connection: dict, + encrypted_password: str | None, + encrypted_private_key: str | None, + encrypted_private_key_passphrase: str | None, +) -> int: sql_header = f"""INSERT INTO {schema}.connections (project_code, sql_flavor, url, connect_by_url, connect_by_key, project_host, project_port, project_user, project_db, project_qc_schema, @@ -103,12 +110,16 @@ def add_connection(schema, connection, encrypted_password, encrypted_private_key sql_header += """max_threads, max_query_chars) """ sql_footer += f""" '{connection["max_threads"]}' as max_threads, - '{connection["max_query_chars"]}' as max_query_chars;""" + '{connection["max_query_chars"]}' as max_query_chars""" - sql = sql_header + sql_footer + sql = sql_header + sql_footer + " RETURNING connection_id" - db.execute_sql(sql) + cursor = db.execute_sql(sql) st.cache_data.clear() + if cursor and (primary_key := cast(tuple, cursor.fetchone())): + return primary_key[0] + + return 0 def delete_connections(schema, connection_ids): diff --git a/testgen/ui/services/connection_service.py b/testgen/ui/services/connection_service.py index 27ebf7e..3fe3ecd 100644 --- a/testgen/ui/services/connection_service.py +++ b/testgen/ui/services/connection_service.py @@ -58,12 +58,18 @@ def edit_connection(connection): connection_queries.edit_connection(schema, connection, encrypted_password, encrypted_private_key, encrypted_private_key_passphrase) -def add_connection(connection): +def add_connection(connection) -> int: empty_cache() schema = st.session_state["dbschema"] connection = pre_save_connection_process(connection) encrypted_password, encrypted_private_key, encrypted_private_key_passphrase = encrypt_credentials(connection) - connection_queries.add_connection(schema, connection, encrypted_password, encrypted_private_key, encrypted_private_key_passphrase) + return connection_queries.add_connection( + schema, + connection, + encrypted_password, + encrypted_private_key, + encrypted_private_key_passphrase, + ) def pre_save_connection_process(connection): diff --git a/testgen/ui/services/database_service.py b/testgen/ui/services/database_service.py index fd2fac9..e5030cd 100644 --- a/testgen/ui/services/database_service.py +++ b/testgen/ui/services/database_service.py @@ -2,6 +2,7 @@ import pandas as pd from sqlalchemy import create_engine, text +from sqlalchemy.engine.cursor import CursorResult from testgen.common.credentials import ( get_tg_db, @@ -74,10 +75,10 @@ def retrieve_single_result(str_sql): return lstResult[0] -def execute_sql(str_sql): +def execute_sql(str_sql) -> CursorResult | None: if str_sql > "": tg_engine = _start_engine() - tg_engine.execute(text(str_sql)) + return tg_engine.execute(text(str_sql)) def execute_sql_raw(str_sql): diff --git a/testgen/ui/session.py b/testgen/ui/session.py index bb198a8..0e5ef49 100644 --- a/testgen/ui/session.py +++ b/testgen/ui/session.py @@ -1,4 +1,5 @@ -from typing import Any, Callable, Literal, TypeVar +from collections.abc import Callable +from typing import Any, Literal, TypeVar import streamlit as st from streamlit.runtime.state import SessionStateProxy diff --git a/testgen/ui/views/connections/__init__.py b/testgen/ui/views/connections/__init__.py index 76f8c37..cc9b67f 100644 --- a/testgen/ui/views/connections/__init__.py +++ b/testgen/ui/views/connections/__init__.py @@ -1,3 +1,5 @@ -from testgen.ui.views.connections.page import ConnectionsPage +# ruff: noqa: F401 + +from testgen.ui.views.connections.forms import BaseConnectionForm, KeyPairConnectionForm, PasswordConnectionForm from testgen.ui.views.connections.models import ConnectionStatus -from testgen.ui.views.connections.forms import BaseConnectionForm, PasswordConnectionForm, KeyPairConnectionForm +from testgen.ui.views.connections.page import ConnectionsPage diff --git a/testgen/ui/views/connections/forms.py b/testgen/ui/views/connections/forms.py index 942c42a..7bd5b07 100644 --- a/testgen/ui/views/connections/forms.py +++ b/testgen/ui/views/connections/forms.py @@ -2,8 +2,8 @@ import base64 import typing -from pydantic import computed_field import streamlit as st +from pydantic import computed_field from streamlit.delta_generator import DeltaGenerator from testgen.ui.components import widgets as testgen @@ -158,10 +158,10 @@ def render_input_ui(self, container: DeltaGenerator, data: dict) -> typing.Self: def render_extra( self, - container: DeltaGenerator, - left_fields_container: DeltaGenerator, - right_fields_container: DeltaGenerator, - data: dict, + _container: DeltaGenerator, + _left_fields_container: DeltaGenerator, + _right_fields_container: DeltaGenerator, + _data: dict, ) -> None: ... @@ -187,10 +187,10 @@ class PasswordConnectionForm(BaseConnectionForm): def render_extra( self, - container: DeltaGenerator, + _container: DeltaGenerator, left_fields_container: DeltaGenerator, - right_fields_container: DeltaGenerator, - data: dict, + _right_fields_container: DeltaGenerator, + _data: dict, ) -> None: self.render_field("password", left_fields_container) @@ -223,9 +223,9 @@ def private_key(self) -> str: def render_extra( self, container: DeltaGenerator, - left_fields_container: DeltaGenerator, - right_fields_container: DeltaGenerator, - data: dict, + _left_fields_container: DeltaGenerator, + _right_fields_container: DeltaGenerator, + _data: dict, ) -> None: testgen.divider(margin_top=8, margin_bottom=8, container=container) diff --git a/testgen/ui/views/connections/page.py b/testgen/ui/views/connections/page.py index 770b764..fc2d661 100644 --- a/testgen/ui/views/connections/page.py +++ b/testgen/ui/views/connections/page.py @@ -1,27 +1,26 @@ -from functools import partial import logging import os import time import typing +from functools import partial -from pydantic import ValidationError import streamlit as st -from streamlit.delta_generator import DeltaGenerator import streamlit_pydantic as sp +from pydantic import ValidationError +from streamlit.delta_generator import DeltaGenerator import testgen.ui.services.database_service as db -from testgen.ui.services import table_group_service -from testgen.commands.run_setup_profiling_tools import get_setup_profiling_tools_queries from testgen.commands.run_profiling_bridge import run_profiling_in_background +from testgen.commands.run_setup_profiling_tools import get_setup_profiling_tools_queries from testgen.common.database.database_service import empty_cache from testgen.ui.components import widgets as testgen -from testgen.ui.views.connections.forms import BaseConnectionForm -from testgen.ui.views.table_groups.forms import TableGroupForm from testgen.ui.navigation.menu import MenuItem from testgen.ui.navigation.page import Page -from testgen.ui.services import connection_service +from testgen.ui.services import connection_service, table_group_service from testgen.ui.session import session, temp_value +from testgen.ui.views.connections.forms import BaseConnectionForm from testgen.ui.views.connections.models import ConnectionStatus +from testgen.ui.views.table_groups import TableGroupForm LOG = logging.getLogger("testgen") @@ -35,7 +34,7 @@ class ConnectionsPage(Page): def render(self, project_code: str, **_kwargs) -> None: dataframe = connection_service.get_connections(project_code) - connection = dataframe.iloc[1] + connection = dataframe.iloc[0] has_table_groups = ( len(connection_service.get_table_group_names_by_connection([connection["connection_id"]]) or []) > 0 ) @@ -71,7 +70,7 @@ def render(self, project_code: str, **_kwargs) -> None: on_click=lambda: self.setup_data_configuration(project_code, connection.to_dict()), ) - def show_connection_form(self, selected_connection: dict, mode, project_code) -> None: + def show_connection_form(self, selected_connection: dict, _mode: str, project_code) -> None: connection = selected_connection or {} connection_id = connection.get("connection_id", None) sql_flavor = connection.get("sql_flavor", "postgresql") @@ -107,7 +106,7 @@ def show_connection_form(self, selected_connection: dict, mode, project_code) -> except ValidationError as error: form_errors_container.warning("\n".join([ f"- {field_label}: {err['msg']}" for err in error.errors() - if (field_label := TableGroupForm.get_field_label(str(err['loc'][0]))) + if (field_label := TableGroupForm.get_field_label(str(err["loc"][0]))) ])) except Exception: LOG.exception("unexpected form validation error") @@ -364,7 +363,7 @@ def create_table_group_step(self, project_code: str, connection: dict) -> tuple[ except ValidationError as error: form_errors_container.warning("\n".join([ f"- {field_label}: {err['msg']}" for err in error.errors() - if (field_label := TableGroupForm.get_field_label(str(err['loc'][0]))) + if (field_label := TableGroupForm.get_field_label(str(err["loc"][0]))) ])) is_valid = False except Exception: diff --git a/testgen/ui/views/table_groups/__init__.py b/testgen/ui/views/table_groups/__init__.py index 99df82c..77b5027 100644 --- a/testgen/ui/views/table_groups/__init__.py +++ b/testgen/ui/views/table_groups/__init__.py @@ -1,2 +1,4 @@ +# ruff: noqa: F401 + +from testgen.ui.views.table_groups.forms import TableGroupForm from testgen.ui.views.table_groups.page import TableGroupsPage -# from testgen.ui.views.table_groups.forms import ... diff --git a/testgen/ui/views/table_groups/forms.py b/testgen/ui/views/table_groups/forms.py index 9087307..bd559e2 100644 --- a/testgen/ui/views/table_groups/forms.py +++ b/testgen/ui/views/table_groups/forms.py @@ -3,7 +3,6 @@ from streamlit.delta_generator import DeltaGenerator -from testgen.ui.components import widgets as testgen from testgen.ui.forms import BaseForm, Field, ManualRender SQLFlavor = typing.Literal["redshift", "snowflake", "mssql", "postgresql"] @@ -134,7 +133,7 @@ class TableGroupForm(BaseForm, ManualRender): def form_key(self): return f"table_group_form:{self.table_group_id or 'new'}" - def render_input_ui(self, container: DeltaGenerator, data: dict) -> typing.Self: + def render_input_ui(self, container: DeltaGenerator, _: dict) -> typing.Self: left_column, right_column = container.columns([.5, .5]) self.render_field("table_groups_name", left_column) From 25ac0f5ac080c4c093586da5ec7e450705e4bb96 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Thu, 24 Oct 2024 10:34:27 -0400 Subject: [PATCH 34/91] fix(ui): Add streamlit-pydantic to list of dependencies --- pyproject.toml | 1 + testgen/ui/views/connections/forms.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c848c77..cc41773 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ dependencies = [ "cryptography==42.0.8", "validators==0.33.0", "reportlab==4.2.2", + "streamlit-pydantic @ git+https://github.com/LukasMasuch/streamlit-pydantic.git@9f84145b6b6e74cdff3a7815ab75b0464c4d4f24", ] [project.optional-dependencies] diff --git a/testgen/ui/views/connections/forms.py b/testgen/ui/views/connections/forms.py index 7bd5b07..61a52ef 100644 --- a/testgen/ui/views/connections/forms.py +++ b/testgen/ui/views/connections/forms.py @@ -11,7 +11,7 @@ from testgen.ui.services import connection_service SQL_FLAVORS = ["redshift", "snowflake", "mssql", "postgresql"] -SQLFlavor = typing.Literal[*SQL_FLAVORS] +SQLFlavor = typing.Literal["redshift", "snowflake", "mssql", "postgresql"] class BaseConnectionForm(BaseForm, ManualRender): From 9ee6bd25b5df3b072fe59f3550f1cef3c1199a08 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Thu, 24 Oct 2024 13:48:10 -0400 Subject: [PATCH 35/91] fix: python 3.10 compatibility and missing import --- testgen/ui/forms.py | 6 +++--- testgen/ui/views/connections/forms.py | 2 +- testgen/ui/views/table_groups/forms.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/testgen/ui/forms.py b/testgen/ui/forms.py index 6a5bc0a..ff3e679 100644 --- a/testgen/ui/forms.py +++ b/testgen/ui/forms.py @@ -1,7 +1,7 @@ import typing import streamlit as st -from pydantic import BaseModel +from pydantic import BaseModel, Field # noqa: F401 from pydantic.json_schema import DEFAULT_REF_TEMPLATE, GenerateJsonSchema, JsonSchemaMode from streamlit.delta_generator import DeltaGenerator from streamlit_pydantic.ui_renderer import InputUI @@ -12,7 +12,7 @@ def __init__(self, /, **data: typing.Any) -> None: super().__init__(**data) @classmethod - def empty(cls) -> typing.Self: + def empty(cls) -> "BaseForm": non_validated_instance = cls.model_construct() non_validated_instance.model_post_init(None) @@ -78,7 +78,7 @@ def input_ui(self): def form_key(self): raise NotImplementedError - def render_input_ui(self, container: DeltaGenerator, session_state: dict) -> typing.Self: + def render_input_ui(self, container: DeltaGenerator, session_state: dict) -> "BaseForm": raise NotImplementedError def render_field(self, field_name: str, container: DeltaGenerator | None = None) -> typing.Any: diff --git a/testgen/ui/views/connections/forms.py b/testgen/ui/views/connections/forms.py index 61a52ef..1990d98 100644 --- a/testgen/ui/views/connections/forms.py +++ b/testgen/ui/views/connections/forms.py @@ -113,7 +113,7 @@ class BaseConnectionForm(BaseForm, ManualRender): def form_key(self): return f"connection_form:{self.connection_id or 'new'}" - def render_input_ui(self, container: DeltaGenerator, data: dict) -> typing.Self: + def render_input_ui(self, container: DeltaGenerator, data: dict) -> "BaseConnectionForm": main_fields_container, optional_fields_container = container.columns([0.7, 0.3]) if self.get_field_value("connect_by_url", latest=True): diff --git a/testgen/ui/views/table_groups/forms.py b/testgen/ui/views/table_groups/forms.py index bd559e2..7f60e32 100644 --- a/testgen/ui/views/table_groups/forms.py +++ b/testgen/ui/views/table_groups/forms.py @@ -133,7 +133,7 @@ class TableGroupForm(BaseForm, ManualRender): def form_key(self): return f"table_group_form:{self.table_group_id or 'new'}" - def render_input_ui(self, container: DeltaGenerator, _: dict) -> typing.Self: + def render_input_ui(self, container: DeltaGenerator, _: dict) -> "TableGroupForm": left_column, right_column = container.columns([.5, .5]) self.render_field("table_groups_name", left_column) From 3b84e6aa5338d324f977dfe2de34cd439b913336 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Mon, 28 Oct 2024 18:32:50 -0400 Subject: [PATCH 36/91] fix(connections): use the flavor form when parsing validation errors --- testgen/ui/components/frontend/css/shared.css | 8 +++---- .../frontend/js/components/flavor_selector.js | 20 ++++++++-------- testgen/ui/components/widgets/button.py | 7 ++++-- testgen/ui/views/connections/forms.py | 2 +- testgen/ui/views/connections/page.py | 23 +++++++++++++------ 5 files changed, 37 insertions(+), 23 deletions(-) diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index 04aab9a..bcbe89c 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -38,13 +38,13 @@ body { --button-hover-state-opacity: 0.12; --button-basic-background: transparent; - --button-basic-text-color: rgba(0, 0, 0, .54); + --button-basic-text-color: rgba(0, 0, 0, .87); --button-basic-hover-state-background: rgba(0, 0, 0, .54); --button-basic-flat-text-color: rgba(0, 0, 0); - --button-basic-flat-background: rgba(0, 0, 0, .54); + --button-basic-flat-background: rgba(0, 0, 0, .87); - --button-basic-stroked-text-color: rgba(0, 0, 0, .54); + --button-basic-stroked-text-color: rgba(0, 0, 0, .87); --button-basic-stroked-background: transparent; --button-primary-background: transparent; @@ -81,7 +81,7 @@ body { --button-basic-flat-text-color: rgba(255, 255, 255); --button-basic-flat-background: rgba(255, 255, 255, .54); - --button-basic-stroked-text-color: rgba(255, 255, 255, .85); + --button-basic-stroked-text-color: rgba(255, 255, 255, .87); --button-basic-stroked-background: transparent; --button-stroked-border: 1px solid var(--border-color); diff --git a/testgen/ui/components/frontend/js/components/flavor_selector.js b/testgen/ui/components/frontend/js/components/flavor_selector.js index 72e06ec..d2d7523 100644 --- a/testgen/ui/components/frontend/js/components/flavor_selector.js +++ b/testgen/ui/components/frontend/js/components/flavor_selector.js @@ -15,6 +15,7 @@ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; +import { loadStylesheet } from '../utils.js'; const headerHeight = 35; const rowGap = 16; @@ -23,10 +24,12 @@ const columnSize = '200px'; const { div, span, img, h3 } = van.tags; const DatabaseFlavorSelector = (/** @type Properties */props) => { - const flavors = van.val(props.flavors); - const numberOfColumns = van.val(props.columns) ?? 3; + loadStylesheet('databaseFlavorSelector', stylesheet); + + const flavors = props.flavors?.val ?? props.flavors; + const numberOfColumns = props.columns?.val ?? props.columns ?? 3; const numberOfRows = Math.ceil(flavors.length / numberOfColumns); - const selectedFlavor = van.state(van.val(props.selected)); + const selectedFlavor = van.state(props.selected?.val ?? props.selected); window.testgen.isPage = true; Streamlit.setFrameHeight( @@ -35,16 +38,11 @@ const DatabaseFlavorSelector = (/** @type Properties */props) => { + rowGap * (numberOfRows / 2) ); - if (!window.testgen.loadedStylesheets.databaseFlavorSelector) { - document.adoptedStyleSheets.push(stylesheet); - window.testgen.loadedStylesheets.databaseFlavorSelector = true; - } - return div( {class: 'tg-flavor-selector-page'}, h3( {class: 'tg-flavor-selector-header'}, - 'Select a database flavor' + 'Select your database type' ), () => { return div( @@ -128,6 +126,10 @@ stylesheet.replace(` background: var(--button-primary-hover-state-background); } + .tg-flavor.selected { + border-color: var(--primary-color); + } + .tg-flavor:hover .tg-flavor-focus-state-indicator::before, .tg-flavor.selected .tg-flavor-focus-state-indicator::before { opacity: var(--button-hover-state-opacity); diff --git a/testgen/ui/components/widgets/button.py b/testgen/ui/components/widgets/button.py index d96b588..9b30cdb 100644 --- a/testgen/ui/components/widgets/button.py +++ b/testgen/ui/components/widgets/button.py @@ -9,7 +9,7 @@ def button( type_: ButtonType = "basic", - color: ButtonColor = "primary", + color: ButtonColor | None = None, label: str | None = None, icon: str | None = None, tooltip: str | None = None, @@ -28,8 +28,11 @@ def button( :param icon: icon name of material rounded icon fonts :param on_click: click handler for this button """ + color_ = color or "primary" + if not color and type_ == "icon": + color_ = "basic" - props = {"type": type_, "disabled": disabled, "color": color} + props = {"type": type_, "disabled": disabled, "color": color_} if type_ != "icon": if not label: raise ValueError(f"A label is required for {type_} buttons") diff --git a/testgen/ui/views/connections/forms.py b/testgen/ui/views/connections/forms.py index 1990d98..d054c83 100644 --- a/testgen/ui/views/connections/forms.py +++ b/testgen/ui/views/connections/forms.py @@ -123,7 +123,7 @@ def render_input_ui(self, container: DeltaGenerator, data: dict) -> "BaseConnect self.render_field("sql_flavor", container=main_fields_container) self.render_field("connection_name", container=main_fields_container) - host_field_container, port_field_container = main_fields_container.columns([0.6, 0.4]) + host_field_container, port_field_container = main_fields_container.columns([0.8, 0.2]) self.render_field("project_host", container=host_field_container) self.render_field("project_port", container=port_field_container) diff --git a/testgen/ui/views/connections/page.py b/testgen/ui/views/connections/page.py index fc2d661..1502747 100644 --- a/testgen/ui/views/connections/page.py +++ b/testgen/ui/views/connections/page.py @@ -44,7 +44,9 @@ def render(self, project_code: str, **_kwargs) -> None: "https://docs.datakitchen.io/article/dataops-testgen-help/connect-your-database", ) + testgen.whitespace(0.3) _, actions_column = st.columns([.1, .9]) + testgen.whitespace(0.3) testgen.flex_row_end(actions_column) with st.container(border=True): @@ -53,17 +55,21 @@ def render(self, project_code: str, **_kwargs) -> None: if has_table_groups: with actions_column: testgen.link( + label="Manage Table Groups", href="connections:table-groups", params={"connection_id": str(connection["connection_id"])}, - label="Table Groups", right_icon="chevron_right", - style="margin-left: auto;", + underline=False, + height=40, + style="margin-left: auto; border-radius: 4px;" + " border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", ) else: with actions_column: testgen.button( type_="stroked", - color="basic", + color="primary", + icon="table_view", label="Setup Table Groups", style="background: white;", width=200, @@ -77,10 +83,13 @@ def show_connection_form(self, selected_connection: dict, _mode: str, project_co data = {} try: - form = BaseConnectionForm.for_flavor(sql_flavor).model_construct(sql_flavor=sql_flavor) + FlavorForm = BaseConnectionForm.for_flavor(sql_flavor) if connection: connection["password"] = connection["password"] or "" - form = BaseConnectionForm.for_flavor(sql_flavor)(**connection) + FlavorForm = BaseConnectionForm.for_flavor(sql_flavor) + + form_kwargs = connection or {"sql_flavor": sql_flavor} + form = FlavorForm(**form_kwargs) sql_flavor = form.get_field_value("sql_flavor", latest=True) or sql_flavor if form.sql_flavor != sql_flavor: @@ -102,11 +111,11 @@ def show_connection_form(self, selected_connection: dict, _mode: str, project_co }) try: - BaseConnectionForm.for_flavor(sql_flavor).model_validate(data) + FlavorForm.model_validate(data) except ValidationError as error: form_errors_container.warning("\n".join([ f"- {field_label}: {err['msg']}" for err in error.errors() - if (field_label := TableGroupForm.get_field_label(str(err["loc"][0]))) + if (field_label := FlavorForm.get_field_label(str(err["loc"][0]))) ])) except Exception: LOG.exception("unexpected form validation error") From 39aa35ce670c61bfd8f1b16c6d0742ac971f6df3 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Tue, 29 Oct 2024 09:32:39 -0400 Subject: [PATCH 37/91] misc(ui): add white background to custom link --- testgen/ui/views/connections/page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/views/connections/page.py b/testgen/ui/views/connections/page.py index 1502747..7dfa6a7 100644 --- a/testgen/ui/views/connections/page.py +++ b/testgen/ui/views/connections/page.py @@ -61,7 +61,7 @@ def render(self, project_code: str, **_kwargs) -> None: right_icon="chevron_right", underline=False, height=40, - style="margin-left: auto; border-radius: 4px;" + style="margin-left: auto; border-radius: 4px; background: white;" " border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", ) else: From 5e0ec9a793293498d06792112ff671230aaf3b18 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Tue, 29 Oct 2024 09:43:53 -0400 Subject: [PATCH 38/91] misc(ui): add db flavor icons for azure sql and synapse --- testgen/ui/assets/flavors/azure_sql.svg | 135 ++++++++++++++++ .../ui/assets/flavors/azure_synapse_table.svg | 145 ++++++++++++++++++ .../frontend/js/components/flavor_selector.js | 4 +- 3 files changed, 282 insertions(+), 2 deletions(-) create mode 100644 testgen/ui/assets/flavors/azure_sql.svg create mode 100644 testgen/ui/assets/flavors/azure_synapse_table.svg diff --git a/testgen/ui/assets/flavors/azure_sql.svg b/testgen/ui/assets/flavors/azure_sql.svg new file mode 100644 index 0000000..7329ae2 --- /dev/null +++ b/testgen/ui/assets/flavors/azure_sql.svg @@ -0,0 +1,135 @@ + + + + + + image/svg+xml + + Icon-databases-130 + + + + + + + + + + + + + + + + + + + + + Icon-databases-130 + + + + + + + + diff --git a/testgen/ui/assets/flavors/azure_synapse_table.svg b/testgen/ui/assets/flavors/azure_synapse_table.svg new file mode 100644 index 0000000..9d908fa --- /dev/null +++ b/testgen/ui/assets/flavors/azure_synapse_table.svg @@ -0,0 +1,145 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/testgen/ui/components/frontend/js/components/flavor_selector.js b/testgen/ui/components/frontend/js/components/flavor_selector.js index d2d7523..a4a1875 100644 --- a/testgen/ui/components/frontend/js/components/flavor_selector.js +++ b/testgen/ui/components/frontend/js/components/flavor_selector.js @@ -19,7 +19,7 @@ import { loadStylesheet } from '../utils.js'; const headerHeight = 35; const rowGap = 16; -const rowHeight = 64; +const rowHeight = 67; const columnSize = '200px'; const { div, span, img, h3 } = van.tags; @@ -35,7 +35,7 @@ const DatabaseFlavorSelector = (/** @type Properties */props) => { Streamlit.setFrameHeight( headerHeight + rowHeight * numberOfRows - + rowGap * (numberOfRows / 2) + + rowGap * (numberOfRows - 1) ); return div( From a6e20a7fb58317176479cfb51147e5031d2e1115 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Thu, 31 Oct 2024 11:23:35 -0400 Subject: [PATCH 39/91] fix(connections): display cached value for private key file uploader --- testgen/ui/views/connections/forms.py | 46 +++++++++++++++++++++------ 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/testgen/ui/views/connections/forms.py b/testgen/ui/views/connections/forms.py index d054c83..e6890e1 100644 --- a/testgen/ui/views/connections/forms.py +++ b/testgen/ui/views/connections/forms.py @@ -1,10 +1,10 @@ # type: ignore -import base64 import typing import streamlit as st from pydantic import computed_field from streamlit.delta_generator import DeltaGenerator +from streamlit.runtime.uploaded_file_manager import UploadedFile from testgen.ui.components import widgets as testgen from testgen.ui.forms import BaseForm, Field, ManualRender @@ -207,18 +207,16 @@ class KeyPairConnectionForm(PasswordConnectionForm): ), st_kwargs_label="Private Key Passphrase", ) - private_key_inner: str = Field( - default="", - format="base64", - st_kwargs_label="Upload private key (rsa_key.p8)", - ) + _uploaded_file: UploadedFile | None = None @computed_field @property def private_key(self) -> str: - if not self.private_key_inner: + if self._uploaded_file is None: return "" - return base64.b64decode(self.private_key_inner).decode("utf-8") + + file_contents: bytes = self._uploaded_file.getvalue() + return file_contents.decode("utf-8") def render_extra( self, @@ -247,4 +245,34 @@ def render_extra( self.render_field("password", container) else: self.render_field("private_key_passphrase", container) - self.render_field("private_key_inner", container) + + file_uploader_key = self.get_field_key("private_key_uploader") + cached_file_upload_key = self.get_field_key("previous_private_key_file") + + self._uploaded_file = container.file_uploader( + key=file_uploader_key, + label="Upload private key (rsa_key.p8)", + accept_multiple_files=False, + on_change=lambda: st.session_state.pop(cached_file_upload_key, None), + ) + + if self._uploaded_file: + st.session_state[cached_file_upload_key] = self._uploaded_file + elif self._uploaded_file is None and (cached_file_upload := st.session_state.get(cached_file_upload_key)): + self._uploaded_file = cached_file_upload + file_size = f"{round(self._uploaded_file.size / 1024, 2)}KB" + container.markdown( + f""" +
+ draft + {self._uploaded_file.name} + {file_size} +
+ """, + unsafe_allow_html=True, + ) + + def reset_cache(self) -> None: + st.session_state.pop(self.get_field_key("private_key_uploader"), None) + st.session_state.pop(self.get_field_key("previous_private_key_file"), None) + return super().reset_cache() From 372c12b6ae110e90a71155cdc453d400dfc06ab0 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Fri, 1 Nov 2024 11:49:58 -0400 Subject: [PATCH 40/91] fix(ui): use index instead of value in flavor selector --- .../frontend/js/components/flavor_selector.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/testgen/ui/components/frontend/js/components/flavor_selector.js b/testgen/ui/components/frontend/js/components/flavor_selector.js index a4a1875..e5ff790 100644 --- a/testgen/ui/components/frontend/js/components/flavor_selector.js +++ b/testgen/ui/components/frontend/js/components/flavor_selector.js @@ -9,7 +9,7 @@ * @typedef Properties * @type {object} * @property {Array.} flavors - * @property {string} selected + * @property {((number|null))} selected * @property {(number|null)} columns */ @@ -29,7 +29,7 @@ const DatabaseFlavorSelector = (/** @type Properties */props) => { const flavors = props.flavors?.val ?? props.flavors; const numberOfColumns = props.columns?.val ?? props.columns ?? 3; const numberOfRows = Math.ceil(flavors.length / numberOfColumns); - const selectedFlavor = van.state(props.selected?.val ?? props.selected); + const selectedIndex = van.state(props.selected?.val ?? props.selected); window.testgen.isPage = true; Streamlit.setFrameHeight( @@ -50,17 +50,17 @@ const DatabaseFlavorSelector = (/** @type Properties */props) => { class: 'tg-flavor-selector', style: `grid-template-columns: ${Array(numberOfColumns).fill(columnSize).join(' ')}; row-gap: ${rowGap}px;` }, - flavors.map(flavor => + flavors.map((flavor, idx) => DatabaseFlavor( { label: van.state(flavor.label), value: van.state(flavor.value), icon: van.state(flavor.icon), - selected: van.derive(() => selectedFlavor.val === flavor.value), + selected: van.derive(() => selectedIndex.val == idx), }, () => { - selectedFlavor.val = flavor.value; - Streamlit.sendData(flavor.value); + selectedIndex.val = idx; + Streamlit.sendData({index: idx, value: flavor.value}); }, ) ), From 1a167b8e794b5cdbc4d5859753777f27660aef81 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Tue, 5 Nov 2024 18:17:58 -0400 Subject: [PATCH 41/91] fix: sleep to protect against multiple reruns --- testgen/ui/views/connections/forms.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testgen/ui/views/connections/forms.py b/testgen/ui/views/connections/forms.py index e6890e1..bbaba0c 100644 --- a/testgen/ui/views/connections/forms.py +++ b/testgen/ui/views/connections/forms.py @@ -1,4 +1,5 @@ # type: ignore +import time import typing import streamlit as st @@ -114,6 +115,7 @@ def form_key(self): return f"connection_form:{self.connection_id or 'new'}" def render_input_ui(self, container: DeltaGenerator, data: dict) -> "BaseConnectionForm": + time.sleep(0.1) main_fields_container, optional_fields_container = container.columns([0.7, 0.3]) if self.get_field_value("connect_by_url", latest=True): @@ -154,6 +156,8 @@ def render_input_ui(self, container: DeltaGenerator, data: dict) -> "BaseConnect self.render_field("url_prefix", container=url_override_left_column) self.render_field("url", container=url_override_right_column) + time.sleep(0.1) + return self def render_extra( From d25bce83a2e15cd7c2b2639e3585f405e42110e3 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Tue, 5 Nov 2024 18:31:16 -0400 Subject: [PATCH 42/91] misc: remove qc schema creation logic --- testgen/ui/views/connections/forms.py | 8 -- testgen/ui/views/connections/page.py | 115 +------------------------- 2 files changed, 2 insertions(+), 121 deletions(-) diff --git a/testgen/ui/views/connections/forms.py b/testgen/ui/views/connections/forms.py index bbaba0c..ce7fc42 100644 --- a/testgen/ui/views/connections/forms.py +++ b/testgen/ui/views/connections/forms.py @@ -92,13 +92,6 @@ class BaseConnectionForm(BaseForm, ManualRender): "unless test queries are failing." ), ) - project_qc_schema: str = Field( - default="qc", - max_length=50, - st_kwargs_label="QC Utility Schema", - st_kwargs_max_chars=50, - st_kwargs_help="The name of the schema on your database that will contain TestGen's profiling functions.", - ) connection_id: int | None = Field(default=None) @@ -131,7 +124,6 @@ def render_input_ui(self, container: DeltaGenerator, data: dict) -> "BaseConnect self.render_field("project_db", container=main_fields_container) self.render_field("project_user", container=main_fields_container) - self.render_field("project_qc_schema", container=optional_fields_container) self.render_field("max_threads", container=optional_fields_container) self.render_field("max_query_chars", container=optional_fields_container) diff --git a/testgen/ui/views/connections/page.py b/testgen/ui/views/connections/page.py index 7dfa6a7..aeb939c 100644 --- a/testgen/ui/views/connections/page.py +++ b/testgen/ui/views/connections/page.py @@ -1,5 +1,4 @@ import logging -import os import time import typing from functools import partial @@ -11,7 +10,6 @@ import testgen.ui.services.database_service as db from testgen.commands.run_profiling_bridge import run_profiling_in_background -from testgen.commands.run_setup_profiling_tools import get_setup_profiling_tools_queries from testgen.common.database.database_service import empty_cache from testgen.ui.components import widgets as testgen from testgen.ui.navigation.menu import MenuItem @@ -102,6 +100,7 @@ def show_connection_form(self, selected_connection: dict, _mode: str, project_co ) data.update({ "project_code": project_code, + "project_qc_schema": "", }) if "private_key" not in data: data.update({ @@ -121,7 +120,7 @@ def show_connection_form(self, selected_connection: dict, _mode: str, project_co LOG.exception("unexpected form validation error") st.error("Unexpected error displaying the form. Try again") - test_button_column, config_qc_column, _, save_button_column = st.columns([.2, .2, .4, .2]) + test_button_column, _, save_button_column = st.columns([.2, .6, .2]) is_submitted, set_submitted = temp_value(f"connection_form-{connection_id or 'new'}:submit") get_connection_status, set_connection_status = temp_value( f"connection_form-{connection_id or 'new'}:test_conn" @@ -144,16 +143,6 @@ def show_connection_form(self, selected_connection: dict, _mode: str, project_co on_click=lambda: set_connection_status(self.test_connection(data)), ) - with config_qc_column: - testgen.button( - type_="stroked", - color="basic", - label="Configure QC Utility Schema", - key=f"connection_form:{connection_id or 'new'}:config-qc-schema", - tooltip="Creates the required Utility schema and related functions in the target database", - on_click=lambda: self.create_qc_schema_dialog(connection) - ) - if (connection_status := get_connection_status()): single_element_container = st.empty() single_element_container.info("Connecting ...") @@ -235,106 +224,6 @@ def test_connection(self, connection: dict) -> "ConnectionStatus": except Exception as error: return ConnectionStatus(message="Error attempting the Connection.", details=error.args[0], successful=False) - @st.dialog(title="Configure QC Utility Schema") - def create_qc_schema_dialog(self, selected_connection): - connection_id = selected_connection["connection_id"] - project_qc_schema = selected_connection["project_qc_schema"] - sql_flavor = selected_connection["sql_flavor"] - user = selected_connection["project_user"] - - create_qc_schema = st.toggle("Create QC Utility Schema", value=True) - grant_privileges = st.toggle("Grant access privileges to TestGen user", value=True) - - user_role = None - - # TODO ALEX: This textbox may be needed if we want to grant permissions to user role - # if sql_flavor == "snowflake": - # user_role_textbox_label = f"Primary role for database user {user}" - # user_role = st.text_input(label=user_role_textbox_label, max_chars=100) - - admin_credentials_expander = st.expander("Admin credential options", expanded=True) - with admin_credentials_expander: - admin_connection_option_index = 0 - admin_connection_options = ["Do not use admin credentials", "Use admin credentials with Password"] - if sql_flavor == "snowflake": - admin_connection_options.append("Use admin credentials with Key-Pair") - - admin_connection_option = st.radio( - "Admin credential options", - label_visibility="hidden", - options=admin_connection_options, - index=admin_connection_option_index, - horizontal=True, - ) - - st.markdown("

 
", unsafe_allow_html=True) - - db_user = None - db_password = None - admin_private_key_passphrase = None - admin_private_key = None - if admin_connection_option == admin_connection_options[0]: - st.markdown(":orange[User created in the connection dialog will be used.]") - else: - db_user = st.text_input(label="Admin db user", max_chars=40) - if admin_connection_option == admin_connection_options[1]: - db_password = st.text_input( - label="Admin db password", max_chars=40, type="password" - ) - st.markdown(":orange[Note: Admin credentials are not stored, are only used for this operation.]") - - if len(admin_connection_options) > 2 and admin_connection_option == admin_connection_options[2]: - admin_private_key_passphrase = st.text_input( - label="Private Key Passphrase", - key="create-qc-schema-private-key-password", - type="password", - max_chars=200, - help="Passphrase used while creating the private Key (leave empty if not applicable)", - ) - - admin_uploaded_file = st.file_uploader("Upload private key (rsa_key.p8)", key="admin-uploaded-file") - if admin_uploaded_file: - admin_private_key = admin_uploaded_file.getvalue().decode("utf-8") - - st.markdown(":orange[Note: Admin credentials are not stored, are only used for this operation.]") - - submit = st.button("Update Configuration") - - if submit: - empty_cache() - script_expander = st.expander("Script Details") - - operation_status = st.empty() - operation_status.info(f"Configuring QC Utility Schema '{project_qc_schema}'...") - - try: - skip_granting_privileges = not grant_privileges - queries = get_setup_profiling_tools_queries(sql_flavor, create_qc_schema, skip_granting_privileges, project_qc_schema, user, user_role) - with script_expander: - st.code( - os.linesep.join(queries), - language="sql", - line_numbers=True) - - connection_service.create_qc_schema( - connection_id, - create_qc_schema, - db_user if db_user else None, - db_password if db_password else None, - skip_granting_privileges, - admin_private_key_passphrase=admin_private_key_passphrase, - admin_private_key=admin_private_key, - user_role=user_role, - ) - operation_status.empty() - operation_status.success("Operation has finished successfully.") - - except Exception as e: - operation_status.empty() - operation_status.error("Error configuring QC Utility Schema.") - error_message = e.args[0] - st.text_area("Error Details", value=error_message) - @st.dialog(title="Data Configuration Setup") def setup_data_configuration(self, project_code: str, connection: dict) -> None: will_run_profiling = st.session_state.get("connection_form-new:run-profiling-toggle", True) From 10c612e1668d532208e5ff3e8d36e05948157386 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Wed, 6 Nov 2024 09:53:38 -0400 Subject: [PATCH 43/91] fix(profiling): add parenthesis to profiling issue criteria the missing parentheses caused the query that inserted into profiling anomalies table to include anomalies for other profiling runs --- testgen/template/dbsetup/050_populate_new_schema_metadata.sql | 2 +- testgen/template/dbupgrade/0112_incremental_upgrade.sql | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 testgen/template/dbupgrade/0112_incremental_upgrade.sql diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql index f30d83c..c4ea048 100644 --- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql +++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql @@ -16,7 +16,7 @@ INSERT INTO profile_anomaly_types (id, anomaly_type, data_object, anomaly_name, VALUES ('1001', 'Suggested_Type', 'Column', 'Suggested Data Type', 'Data stored as text all meets criteria for a more suitable type. ', '(functional_data_type NOT IN (''Boolean'', ''Flag'') ) AND (column_type ILIKE ''%ch ar%'' OR column_type ILIKE ''text'') AND NOT (datatype_suggestion ILIKE ''%char%'' OR datatype_suggestion ILIKE ''text'')', 'p.datatype_suggestion::VARCHAR(200)', 'Likely', 'Consider changing the column data type to tighte n controls over data ingested and to make values more efficient, consistent and suitable for downstream analysis.'), - ('1002', 'Non_Standard_Blanks', 'Column', 'Non-Standard Blank Values', 'Values representing missing data may be unexpected or inconsistent. Non-standard values may include empty strings as opposed to nulls, dummy entries such as "MISSING" or repeated characters that may have been used to bypass entry requirements, processing artifacts such as "NULL", or spreadsheet artifacts such as "NA", "ERROR".', 'p.filled_value_ct > 0 OR p.zero_length_ct > 0', '''Filled Values: '' || p.filled_value_ct::VARCHAR || '', Empty String: '' || p.zero_length_ct::VARCHAR || '', Null: '' || p.null_value_ct::VARCHAR || '', Records: '' || p.record_ct::VARCHAR', 'Definite', 'Consider cleansing the column upon ingestion to replace all variants of missing data with a standard designation, like Null.'), + ('1002', 'Non_Standard_Blanks', 'Column', 'Non-Standard Blank Values', 'Values representing missing data may be unexpected or inconsistent. Non-standard values may include empty strings as opposed to nulls, dummy entries such as "MISSING" or repeated characters that may have been used to bypass entry requirements, processing artifacts such as "NULL", or spreadsheet artifacts such as "NA", "ERROR".', '(p.filled_value_ct > 0 OR p.zero_length_ct > 0)', '''Filled Values: '' || p.filled_value_ct::VARCHAR || '', Empty String: '' || p.zero_length_ct::VARCHAR || '', Null: '' || p.null_value_ct::VARCHAR || '', Records: '' || p.record_ct::VARCHAR', 'Definite', 'Consider cleansing the column upon ingestion to replace all variants of missing data with a standard designation, like Null.'), ('1003', 'Invalid_Zip_USA', 'Column', 'Invalid USA Zip Code Format', 'Some values present do not conform with the expected format of USA Zip Codes.', 'p.std_pattern_match = ''ZIP_USA'' AND (p.general_type <> ''A'' OR p.filled_value_ct > 0 OR p.min_length >= 1 AND p.min_length <= 4 OR p.max_length > 10)', 'CASE WHEN p.general_type = ''N'' THEN ''Type: '' || p.column_type || '', '' ELSE '''' END || ''Min Length: '' || p.min_length::VARCHAR || '', Max Length: '' || p.max_length::VARCHAR || '', Filled Values: '' || p.filled_value_ct::VARCHAR', 'Definite', 'Consider correcting invalid column values or changing them to indicate a missing value if corrections cannot be made.'), ('1004', 'Multiple_Types_Minor', 'Multi-Col', 'Multiple Data Types per Column Name - Minor', 'Columns with the same name have the same general type across tables, but the types do not exactly match. Truncation issues may result if columns are commingled and assumed to be the same format.', 'm.general_type_ct = 1 AND m.type_ct > 1', '''Found '' || m.column_ct::VARCHAR || '' columns, '' || m.type_ct::VARCHAR(10) || '' types, '' || m.min_type || '' to '' || m.max_type', 'Possible', 'Consider changing the column data types to be fully consistent. This will tighten your standards at ingestion and assure that data is consistent between tables.'), ('1005', 'Multiple_Types_Major', 'Multi-Col', 'Multiple Data Types per Column Name - Major', 'Columns with the same name have broadly different types across tables. Differences could be significant enough to cause errors in downstream analysis, extra steps resulting in divergent business logic and inconsistencies in results.', 'm.general_type_ct > 1', '''Found '' || m.column_ct::VARCHAR || '' columns, '' || m.type_ct::VARCHAR(10) || '' types, '' || m.min_type || '' to '' || m.max_type', 'Likely', 'Ideally, you should change the column data types to be fully consistent. If the data is meant to be different, you should change column names so downstream users aren''t led astray.'), diff --git a/testgen/template/dbupgrade/0112_incremental_upgrade.sql b/testgen/template/dbupgrade/0112_incremental_upgrade.sql new file mode 100644 index 0000000..c81cccb --- /dev/null +++ b/testgen/template/dbupgrade/0112_incremental_upgrade.sql @@ -0,0 +1,3 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +UPDATE profile_anomaly_types SET anomaly_criteria = '(p.filled_value_ct > 0 OR p.zero_length_ct > 0)' WHERE id = '1002'; From e64d20a20ced85f42c7bfd0cf9a6ed8354c555a2 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Thu, 7 Nov 2024 12:15:35 -0400 Subject: [PATCH 44/91] misc(profiling): remove qc schema creation logic --- testgen/__main__.py | 80 ---------------- testgen/commands/run_quick_start.py | 11 --- testgen/commands/run_setup_profiling_tools.py | 96 ------------------- testgen/ui/services/connection_service.py | 7 -- 4 files changed, 194 deletions(-) delete mode 100644 testgen/commands/run_setup_profiling_tools.py diff --git a/testgen/__main__.py b/testgen/__main__.py index 285e949..fd19379 100644 --- a/testgen/__main__.py +++ b/testgen/__main__.py @@ -1,4 +1,3 @@ -import getpass import logging import os import subprocess @@ -33,7 +32,6 @@ from testgen.commands.run_observability_exporter import run_observability_exporter from testgen.commands.run_profiling_bridge import run_profiling_queries from testgen.commands.run_quick_start import run_quick_start, run_quick_start_increment -from testgen.commands.run_setup_profiling_tools import run_setup_profiling_tools from testgen.commands.run_upgrade_db_config import get_schema_revision, is_db_revision_up_to_date, run_upgrade_db_config from testgen.common import ( configure_logging, @@ -450,84 +448,6 @@ def do_upgrade_system_version(): click.echo("System and services upgrade is not required.") -@cli.command( - "setup-target-db-functions", help="Use to set up the utility functions in the target database for running profiles." -) -@click.option( - "-c", - "--connection-id", - help="The identifier for the connection. Use a connection_id shown in list-connections.", - required=True, - type=click.STRING, -) -@click.option( - "-dr", - "--dry-run", - default=False, - is_flag=True, - required=False, - help="Dry run to show which schema will be modified", -) -@click.option( - "-cs", - "--create-qc-schema", - default=False, - is_flag=True, - required=False, - help="Create the QC utility schema required in the target database", -) -@click.option("--yes", "-y", default=False, is_flag=True, required=False, help="Force yes") -@click.option( - "--skip-asking-credentials", - "-s", - default=False, - is_flag=True, - required=False, - help="Skip request for special write credentials for target database, uses standard credentials instead", -) -@click.option( - "--skip-granting-privileges", - "-sgp", - default=False, - is_flag=True, - required=False, - help="Skip granting execute privileges to the user for the QC utility schema in the target database", -) -@pass_configuration -def setup_profiling_tools( - configuration: Configuration, - connection_id: str, - dry_run: bool, - create_qc_schema: bool, - yes: bool, - skip_asking_credentials: bool, - skip_granting_privileges: bool, -): - db_user = None - db_password = None - if not skip_asking_credentials: - db_user = input("Admin DB User?") - db_password = getpass.getpass("Admin DB Password?") - - if not yes and not dry_run: - confirm = input( - f"Are you sure you want to setup the utility functions to be able to run the profile for connection {connection_id}? [yes/No]" - ) - if confirm.lower() != "yes": - click.echo("Exiting without any operation performed.") - return - project_qc_schema = run_setup_profiling_tools( - connection_id, dry_run, create_qc_schema, db_user, db_password, skip_granting_privileges - ) - if not dry_run: - message = f"Project DB has been set up. Modified schema: {project_qc_schema}" - else: - message = ( - f"Project DB dry run completed, no changes applied. Modified schema would have been: {project_qc_schema}" - ) - click.echo(message) - - @cli.command("get-test-results", help="Fetches results for a test run.") @click.option( "-tr", diff --git a/testgen/commands/run_quick_start.py b/testgen/commands/run_quick_start.py index 67a22b5..487c47d 100644 --- a/testgen/commands/run_quick_start.py +++ b/testgen/commands/run_quick_start.py @@ -5,7 +5,6 @@ from testgen import settings from testgen.commands.run_get_entities import run_table_group_list from testgen.commands.run_launch_db_config import run_launch_db_config -from testgen.commands.run_setup_profiling_tools import run_setup_profiling_tools from testgen.common.database.database_service import ( AssignConnectParms, CreateDatabaseIfNotExists, @@ -140,16 +139,6 @@ def run_quick_start(delete_target_db: bool) -> None: rows, _ = run_table_group_list(project_key) connection_id = str(rows[0][2]) - # run qc - command = "testgen setup-target-db-functions --connection-id --create-qc-schema --yes" - click.echo(f"Running CLI command: {command}") - create_qc_schema = True - db_user = params_mapping["TESTGEN_ADMIN_USER"] - db_password = params_mapping["TESTGEN_ADMIN_PASSWORD"] - dry_run = False - project_qc_schema = run_setup_profiling_tools(connection_id, dry_run, create_qc_schema, db_user, db_password) - click.echo(f"Schema {project_qc_schema} has been created in the target db") - def run_quick_start_increment(iteration): params_mapping = _get_params_mapping(iteration) diff --git a/testgen/commands/run_setup_profiling_tools.py b/testgen/commands/run_setup_profiling_tools.py deleted file mode 100644 index c2d42f3..0000000 --- a/testgen/commands/run_setup_profiling_tools.py +++ /dev/null @@ -1,96 +0,0 @@ -import logging - -from testgen.commands.run_get_entities import run_get_connection -from testgen.common import AssignConnectParms, RunActionQueryList -from testgen.common.database.database_service import get_queries_for_command - -LOG = logging.getLogger("testgen") - - -def _get_params_mapping(project_qc_schema: str, user: str, user_role: str | None) -> dict: - return { - "DATA_QC_SCHEMA": project_qc_schema, - "DB_USER": user, - "DB_USER_ROLE": user_role, - } - - -def get_setup_profiling_tools_queries(sql_flavor, create_qc_schema, skip_granting_privileges, project_qc_schema, user, user_role=None): - queries = [] - - params_mapping = _get_params_mapping(project_qc_schema, user, user_role) - - if create_qc_schema: - queries.extend( - get_queries_for_command( - f"flavors/{sql_flavor}/setup_profiling_tools", - params_mapping, - mask=rf"^.*create_qc_schema_{sql_flavor}.sql$", - ) - ) - - queries.extend( - get_queries_for_command( - f"flavors/{sql_flavor}/setup_profiling_tools", params_mapping, mask=rf"^.*functions_{sql_flavor}.sql$" - ) - ) - - if not skip_granting_privileges: - queries.extend( - get_queries_for_command( - f"flavors/{sql_flavor}/setup_profiling_tools", - params_mapping, - mask=rf"^.*grant_execute_privileges_{sql_flavor}.sql$", - ) - ) - - return queries - - -def run_setup_profiling_tools( - connection_id: str | int, - dry_run: bool, - create_qc_schema: bool = True, - db_user: str | None = None, - db_password: str | None = None, - skip_granting_privileges: bool = False, - admin_private_key_passphrase: str | None = None, - admin_private_key: str | None = None, - user_role: str | None = None, -) -> str: - connection = run_get_connection(str(connection_id)) - - # Set Project Connection Parms in common.db_bridgers from retrieved parms - LOG.info("CurrentStep: Assigning Connection Parms") - user = db_user or connection["project_user"] - connect_by_key = admin_private_key is not None or connection["connect_by_key"] - private_key_passphrase = admin_private_key_passphrase if admin_private_key is not None else connection["private_key_passphrase"] - private_key = admin_private_key if admin_private_key is not None else connection["private_key"] - - AssignConnectParms( - connection["project_key"], - connection["connection_id"], - connection["project_host"], - connection["project_port"], - connection["project_db"], - connection["project_qc_schema"], - user, - connection["sql_flavor"], - connection["url"], - connection["connect_by_url"], - connect_by_key, - private_key, - private_key_passphrase, - "PROJECT", - ) - - project_qc_schema = connection["project_qc_schema"] - sql_flavor = connection["sql_flavor"] - user = connection["project_user"] - - queries = get_setup_profiling_tools_queries(sql_flavor, create_qc_schema, skip_granting_privileges, project_qc_schema, user, user_role) - - if not dry_run: - RunActionQueryList("PROJECT", queries, user_override=db_user, pwd_override=db_password) - - return project_qc_schema diff --git a/testgen/ui/services/connection_service.py b/testgen/ui/services/connection_service.py index 3fe3ecd..66796d4 100644 --- a/testgen/ui/services/connection_service.py +++ b/testgen/ui/services/connection_service.py @@ -3,7 +3,6 @@ import testgen.ui.queries.connection_queries as connection_queries import testgen.ui.services.table_group_service as table_group_service from testgen.commands.run_profiling_bridge import InitializeProfilingSQL -from testgen.commands.run_setup_profiling_tools import run_setup_profiling_tools from testgen.common.database.database_service import ( AssignConnectParms, RetrieveDBResultsToList, @@ -196,12 +195,6 @@ def test_qc_connection(project_code, connection, init_profiling=True): return qc_results -def create_qc_schema(connection_id, create_qc_schema, db_user, db_password, skip_granting_privileges, admin_private_key_passphrase=None, admin_private_key=None, user_role=None): - dry_run = False - empty_cache() - run_setup_profiling_tools(connection_id, dry_run, create_qc_schema, db_user, db_password, skip_granting_privileges, admin_private_key_passphrase, admin_private_key, user_role) - - def form_overwritten_connection_url(connection): flavor = connection["sql_flavor"] From 66ca55d5a6e8bf6d210afdfd1b8844ee69fa3643 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Tue, 29 Oct 2024 18:36:29 -0400 Subject: [PATCH 45/91] feat(pdf): Hygiene Issues report --- testgen/ui/pdf/hygiene_issue_report.py | 165 ++++++++++++++++++ testgen/ui/services/hygiene_issues_service.py | 87 +++++++++ .../views/dialogs/profiling_results_dialog.py | 12 +- testgen/ui/views/profiling_anomalies.py | 143 +++++---------- testgen/ui/views/test_definitions.py | 9 +- testgen/ui/views/test_results.py | 27 +-- 6 files changed, 324 insertions(+), 119 deletions(-) create mode 100644 testgen/ui/pdf/hygiene_issue_report.py create mode 100644 testgen/ui/services/hygiene_issues_service.py diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py new file mode 100644 index 0000000..4c23ec6 --- /dev/null +++ b/testgen/ui/pdf/hygiene_issue_report.py @@ -0,0 +1,165 @@ +import pandas +from reportlab.lib import colors +from reportlab.lib.colors import HexColor +from reportlab.lib.enums import TA_CENTER +from reportlab.lib.styles import ParagraphStyle +from reportlab.platypus import CondPageBreak, KeepTogether, Paragraph, Table, TableStyle + +from testgen.ui.pdf.dataframe_table import DataFrameTableBuilder +from testgen.ui.pdf.style import ( + COLOR_GRAY_BG, + COLOR_GREEN_BG, + PARA_STYLE_CELL, + PARA_STYLE_FOOTNOTE, + PARA_STYLE_H1, + PARA_STYLE_INFO, + PARA_STYLE_MONO, + PARA_STYLE_TEXT, + PARA_STYLE_TITLE, + TABLE_STYLE_DEFAULT, +) +from testgen.ui.pdf.templates import DatakitchenTemplate +from testgen.ui.services.hygiene_issues_service import get_source_data + +SECTION_MIN_AVAILABLE_HEIGHT = 120 + +CLASS_COLORS = { + "Definite": HexColor(0xE94D4A), + "Likely": HexColor(0xFC8F2A), + "Possible": HexColor(0xFCD349), + "Potential PII": HexColor(0xFC8F2A), +} + +def build_summary_table(document, hi_data): + + summary_table_style = TableStyle( + ( + # All-table styles + ("GRID", (0, 0), (-1, -1), 2, colors.white), + ("BACKGROUND", (0, 0), (-1, -1), COLOR_GRAY_BG), + + # Empty cells + ("BACKGROUND", (2, 5), (-1, -1), colors.white), + + # Header cells + *[ + (cmd[0], *coords, *cmd[1:]) + for coords in ( + ((2, 2), (2, 4)), + ((0, 0), (0, -1)) + ) + for cmd in ( + ("FONT", "Helvetica-Bold"), + ("ALIGN", "RIGHT"), + ("BACKGROUND", COLOR_GREEN_BG), + ) + ], + + # Layout + ("SPAN", (1, 0), (3, 0)), + + ("SPAN", (1, 1), (4, 1)), + + ("SPAN", (3, 2), (4, 2)), + ("SPAN", (3, 3), (4, 3)), + ("SPAN", (3, 4), (4, 4)), + ("SPAN", (3, 5), (4, 5)), + + + # Status cell + *[ + (cmd[0], (4, 0), (4, 0), *cmd[1:]) + for cmd in ( + ("BACKGROUND", CLASS_COLORS.get(hi_data["issue_likelihood"], COLOR_GRAY_BG)), + ("ALIGNMENT", "CENTER"), + ("VALIGN", "MIDDLE"), + ) + ], + ), + parent=TABLE_STYLE_DEFAULT, + ) + + + profiling_timestamp = pandas.to_datetime(hi_data["profiling_starttime"]).strftime("%Y-%m-%d %H:%M:%S") + summary_table_data = [ + ( + "Hygiene Issue", + ( + Paragraph(f"{hi_data["anomaly_name"]}:", style=PARA_STYLE_CELL), + Paragraph(hi_data["anomaly_description"], style=PARA_STYLE_CELL), + ), + None, + None, + Paragraph( + hi_data["issue_likelihood"], + style=ParagraphStyle("likelihood", textColor=colors.white, fontSize=10, parent=PARA_STYLE_CELL, alignment=TA_CENTER), + ), + ), + ( + "Detail", + Paragraph( + hi_data["detail"], + style=ParagraphStyle("detail", fontName="Helvetica-Bold", parent=PARA_STYLE_CELL), + ), + ), + + ("Database/Schema", hi_data["schema_name"], "Profiling Date", profiling_timestamp), + ("Table", hi_data["table_name"], "Table Group", hi_data["table_groups_name"]), + ("Column", hi_data["column_name"], "Disposition", hi_data["disposition"] or "No Decision"), + ("Column Type", hi_data["column_type"]), + ] + + summary_table_col_widths = [n * document.width for n in (.15, .35, .15, .15, .20)] + return Table(summary_table_data, style=summary_table_style, hAlign="LEFT", colWidths=summary_table_col_widths) + + +def build_sample_data_content(document, sample_data_tuple): + sample_data_status, sample_data_msg, lookup_query, sample_data = sample_data_tuple + if sample_data_status in ("ND", "NA"): + yield Paragraph(sample_data_msg, style=PARA_STYLE_INFO) + elif sample_data_status == "ERR" or sample_data is None: + yield Paragraph("It was not possible to fetch the sample data this time.", style=PARA_STYLE_INFO) + else: + sample_data.columns = [col.replace("_", " ").title() for col in sample_data.columns] + df_table_builder = DataFrameTableBuilder(sample_data, document.width) + table_flowables = [df_table_builder.build_table(hAlign="LEFT")] + if df_table_builder.omitted_columns: + omitted_columns = ", ".join(df_table_builder.omitted_columns) + sample_data_msg = f"Note: The following columns were omitted from this table: {omitted_columns}" + if sample_data_msg: + table_flowables.append(Paragraph(sample_data_msg, style=PARA_STYLE_FOOTNOTE)) + + yield from df_table_builder.split_in_columns(table_flowables) + + +def build_sql_query_conntent(sample_data_tuple): + lookup_query = sample_data_tuple[2] + if lookup_query: + return Paragraph(lookup_query, PARA_STYLE_MONO) + else: + return Paragraph("No sample data lookup query registered for this issue.") + + +def get_report_content(document, hi_data): + yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE) + yield build_summary_table(document, hi_data) + + yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT) + yield Paragraph("Suggested Action", style=PARA_STYLE_H1) + yield Paragraph(hi_data["suggested_action"], style=PARA_STYLE_TEXT) + + sample_data_tuple = get_source_data(hi_data) + + yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT) + yield Paragraph("Sample Data", PARA_STYLE_H1) + yield from build_sample_data_content(document, sample_data_tuple) + + yield KeepTogether([ + Paragraph("SQL Query", PARA_STYLE_H1), + build_sql_query_conntent(sample_data_tuple) + ]) + + +def create_report(filename, hi_data): + doc = DatakitchenTemplate(filename) + doc.build(flowables=list(get_report_content(doc, hi_data))) diff --git a/testgen/ui/services/hygiene_issues_service.py b/testgen/ui/services/hygiene_issues_service.py new file mode 100644 index 0000000..0668876 --- /dev/null +++ b/testgen/ui/services/hygiene_issues_service.py @@ -0,0 +1,87 @@ +import streamlit as st + +from testgen.ui.services import database_service as db + + +def get_source_data(hi_data): + str_schema = st.session_state["dbschema"] + # Define the query + str_sql = f""" + SELECT t.lookup_query, tg.table_group_schema, c.project_qc_schema, + c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, + c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase + FROM {str_schema}.target_data_lookups t + INNER JOIN {str_schema}.table_groups tg + ON ('{hi_data["table_groups_id"]}'::UUID = tg.id) + INNER JOIN {str_schema}.connections c + ON (tg.connection_id = c.connection_id) + AND (t.sql_flavor = c.sql_flavor) + WHERE t.error_type = 'Profile Anomaly' + AND t.test_id = '{hi_data["anomaly_id"]}' + AND t.lookup_query > ''; + """ + + def get_lookup_query(test_id, detail_exp, column_names): + if test_id in {"1019", "1020"}: + start_index = detail_exp.find("Columns: ") + if start_index == -1: + columns = [col.strip() for col in column_names.split(",")] + else: + start_index += len("Columns: ") + column_names_str = detail_exp[start_index:] + columns = [col.strip() for col in column_names_str.split(",")] + queries = [ + f"SELECT '{column}' AS column_name, MAX({column}) AS max_date_available FROM {{TARGET_SCHEMA}}.{{TABLE_NAME}}" + for column in columns + ] + sql_query = " UNION ALL ".join(queries) + " ORDER BY max_date_available DESC;" + else: + sql_query = "" + return sql_query + + def replace_parms(str_query): + str_query = ( + get_lookup_query(hi_data["anomaly_id"], hi_data["detail"], hi_data["column_name"]) + if lst_query[0]["lookup_query"] == "created_in_ui" + else lst_query[0]["lookup_query"] + ) + str_query = str_query.replace("{TARGET_SCHEMA}", lst_query[0]["table_group_schema"]) + str_query = str_query.replace("{TABLE_NAME}", hi_data["table_name"]) + str_query = str_query.replace("{COLUMN_NAME}", hi_data["column_name"]) + str_query = str_query.replace("{DATA_QC_SCHEMA}", lst_query[0]["project_qc_schema"]) + str_query = str_query.replace("{DETAIL_EXPRESSION}", hi_data["detail"]) + str_query = str_query.replace("{PROFILE_RUN_DATE}", hi_data["profiling_starttime"]) + if str_query is None or str_query == "": + raise ValueError("Lookup query is not defined for this Anomoly Type.") + return str_query + + try: + # Retrieve SQL for customer lookup + lst_query = db.retrieve_data_list(str_sql) + + # Retrieve and return data as df + if lst_query: + str_sql = replace_parms(str_sql) + df = db.retrieve_target_db_df( + lst_query[0]["sql_flavor"], + lst_query[0]["project_host"], + lst_query[0]["project_port"], + lst_query[0]["project_db"], + lst_query[0]["project_user"], + lst_query[0]["project_pw_encrypted"], + str_sql, + lst_query[0]["url"], + lst_query[0]["connect_by_url"], + lst_query[0]["connect_by_key"], + lst_query[0]["private_key"], + lst_query[0]["private_key_passphrase"], + ) + if df.empty: + return "ND", "Data that violates Hygiene Issue criteria is not present in the current dataset.", str_sql, None + else: + return "OK", None, str_sql, df + else: + return "NA", "Source data lookup is not available for this Issue.", None, None + + except Exception as e: + return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}", None, None diff --git a/testgen/ui/views/dialogs/profiling_results_dialog.py b/testgen/ui/views/dialogs/profiling_results_dialog.py index 26f3078..5cce9c6 100644 --- a/testgen/ui/views/dialogs/profiling_results_dialog.py +++ b/testgen/ui/views/dialogs/profiling_results_dialog.py @@ -12,13 +12,11 @@ BUTTON_HELP = "Review profiling for highlighted column" -def view_profiling_button(button_container, str_table_name, str_column_name, - str_profile_run_id=None, str_table_groups_id=None): - with button_container: - if st.button( - BUTTON_TEXT, help=BUTTON_HELP, use_container_width=True - ): - profiling_results_dialog(str_table_name, str_column_name, str_profile_run_id, str_table_groups_id) +def view_profiling_button(str_table_name, str_column_name, str_profile_run_id=None, str_table_groups_id=None): + if st.button( + BUTTON_TEXT, help=BUTTON_HELP, use_container_width=True + ): + profiling_results_dialog(str_table_name, str_column_name, str_profile_run_id, str_table_groups_id) @st.dialog(title="Profiling Results") diff --git a/testgen/ui/views/profiling_anomalies.py b/testgen/ui/views/profiling_anomalies.py index 1450e42..4e70ce5 100644 --- a/testgen/ui/views/profiling_anomalies.py +++ b/testgen/ui/views/profiling_anomalies.py @@ -1,5 +1,7 @@ import typing +from io import BytesIO +import pandas as pd import plotly.express as px import streamlit as st @@ -8,10 +10,12 @@ import testgen.ui.services.form_service as fm import testgen.ui.services.query_service as dq from testgen.common import date_service -from testgen.common.read_file import replace_templated_functions from testgen.ui.components import widgets as testgen +from testgen.ui.components.widgets.download_dialog import FILE_DATA_TYPE, download_dialog, zip_multi_file_data from testgen.ui.navigation.page import Page +from testgen.ui.pdf.hygiene_issue_report import create_report from testgen.ui.services import project_service +from testgen.ui.services.hygiene_issues_service import get_source_data as get_source_data_uncached from testgen.ui.session import session from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button @@ -167,7 +171,7 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | if not selected_row: st.markdown(":orange[Select a record to see more information.]") else: - col1, col2 = st.columns([0.7, 0.3]) + col1, col2 = st.columns([0.8, 0.2]) with col1: fm.render_html_list( selected_row, @@ -185,17 +189,33 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | int_data_width=700, ) with col2: - # _, v_col2 = st.columns([0.3, 0.7]) - v_col1, v_col2 = st.columns([0.5, 0.5]) - view_profiling_button( - v_col1, selected_row["table_name"], selected_row["column_name"], - str_profile_run_id=run_id - ) - with v_col2: + view_profiling_button( + selected_row["table_name"], selected_row["column_name"], str_profile_run_id=run_id + ) + if st.button( "Source Data →", help="Review current source data for highlighted issue", use_container_width=True ): source_data_dialog(selected_row) + if st.button( + ":material/file_save: Issue Report", + use_container_width=True, + help="Generate a PDF report for each selected issue", + ): + dialog_title = "Download Issue Report" + if len(selected) == 1: + download_dialog( + dialog_title=dialog_title, + file_content_func=get_report_file_data, + args=(selected[0],), + ) + else: + zip_func = zip_multi_file_data( + "testgen_issue_reports.zip", + get_report_file_data, + [(arg,) for arg in selected], + ) + download_dialog(dialog_title=dialog_title, file_content_func=zip_func) cached_functions = [get_anomaly_disposition, get_profiling_anomaly_summary] # Clear the list cache if the list is sorted by disposition/action @@ -269,12 +289,16 @@ def get_profiling_anomalies(str_profile_run_id, str_likelihood, issue_type_id, s WHEN t.issue_likelihood = 'Definite' THEN 4 END AS likelihood_order, t.anomaly_description, r.detail, t.suggested_action, - r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime + r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, + tg.table_groups_name FROM {str_schema}.profile_anomaly_results r INNER JOIN {str_schema}.profile_anomaly_types t ON r.anomaly_id = t.id INNER JOIN {str_schema}.profiling_runs p ON r.profile_run_id = p.id + INNER JOIN {str_schema}.table_groups tg + ON r.table_groups_id = tg.id + WHERE r.profile_run_id = '{str_profile_run_id}' {str_criteria} {str_order_by} @@ -352,90 +376,8 @@ def get_profiling_anomaly_summary(str_profile_run_id): @st.cache_data(show_spinner=False) -def get_bad_data(selected_row): - str_schema = st.session_state["dbschema"] - # Define the query - str_sql = f""" - SELECT t.lookup_query, tg.table_group_schema, c.project_qc_schema, - c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, - c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase - FROM {str_schema}.target_data_lookups t - INNER JOIN {str_schema}.table_groups tg - ON ('{selected_row["table_groups_id"]}'::UUID = tg.id) - INNER JOIN {str_schema}.connections c - ON (tg.connection_id = c.connection_id) - AND (t.sql_flavor = c.sql_flavor) - WHERE t.error_type = 'Profile Anomaly' - AND t.test_id = '{selected_row["anomaly_id"]}' - AND t.lookup_query > ''; - """ - - def get_lookup_query(test_id, detail_exp, column_names): - if test_id in {"1019", "1020"}: - start_index = detail_exp.find("Columns: ") - if start_index == -1: - columns = [col.strip() for col in column_names.split(",")] - else: - start_index += len("Columns: ") - column_names_str = detail_exp[start_index:] - columns = [col.strip() for col in column_names_str.split(",")] - queries = [ - f"SELECT '{column}' AS column_name, MAX({column}) AS max_date_available FROM {{TARGET_SCHEMA}}.{{TABLE_NAME}}" - for column in columns - ] - sql_query = " UNION ALL ".join(queries) + " ORDER BY max_date_available DESC;" - else: - sql_query = "" - return sql_query - - def replace_parms(str_query): - str_query: str = ( - get_lookup_query(selected_row["anomaly_id"], selected_row["detail"], selected_row["column_name"]) - if lst_query[0]["lookup_query"] == "created_in_ui" - else lst_query[0]["lookup_query"] - ) - str_query = str_query.replace("{TARGET_SCHEMA}", lst_query[0]["table_group_schema"]) - str_query = str_query.replace("{TABLE_NAME}", selected_row["table_name"]) - str_query = str_query.replace("{COLUMN_NAME}", selected_row["column_name"]) - str_query = str_query.replace("{DATA_QC_SCHEMA}", lst_query[0]["project_qc_schema"]) - str_query = str_query.replace("{DETAIL_EXPRESSION}", selected_row["detail"]) - str_query = str_query.replace("{PROFILE_RUN_DATE}", selected_row["profiling_starttime"]) - if "{{DKFN_" in str_query: - str_query = replace_templated_functions(str_query, lst_query[0]["sql_flavor"]) - if str_query is None or str_query == "": - raise ValueError("Lookup query is not defined for this Anomoly Type.") - return str_query - - try: - # Retrieve SQL for customer lookup - lst_query = db.retrieve_data_list(str_sql) - - # Retrieve and return data as df - if lst_query: - str_sql = replace_parms(str_sql) - df = db.retrieve_target_db_df( - lst_query[0]["sql_flavor"], - lst_query[0]["project_host"], - lst_query[0]["project_port"], - lst_query[0]["project_db"], - lst_query[0]["project_user"], - lst_query[0]["project_pw_encrypted"], - str_sql, - lst_query[0]["url"], - lst_query[0]["connect_by_url"], - lst_query[0]["connect_by_key"], - lst_query[0]["private_key"], - lst_query[0]["private_key_passphrase"], - ) - if df.empty: - return "ND", "Data that violates Hygiene Issue criteria is not present in the current dataset.", None - else: - return "OK", None, df - else: - return "NA", "A source data lookup for this Issue is not available.", None - - except Exception as e: - return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}", None +def get_source_data(hi_data): + return get_source_data_uncached(hi_data) def write_frequency_graph(df_tests): @@ -466,7 +408,7 @@ def source_data_dialog(selected_row): fm.render_html_list(selected_row, ["detail"], None, 700, ["Hygiene Issue Detail"]) with st.spinner("Retrieving source data..."): - bad_data_status, bad_data_msg, df_bad = get_bad_data(selected_row) + bad_data_status, bad_data_msg, _, df_bad = get_source_data(selected_row) if bad_data_status in {"ND", "NA"}: st.info(bad_data_msg) elif bad_data_status == "ERR": @@ -496,3 +438,14 @@ def do_disposition_update(selected, str_new_status): str_result = f":red[**The update {str_which} did not succeed.**]" return str_result + +def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE: + hi_id = tr_data["anomaly_id"] + profiling_time = pd.Timestamp(tr_data["profiling_starttime"]).strftime("%Y%m%d_%H%M%S") + file_name = f"testgen_issue_report_{hi_id}_{profiling_time}.pdf" + + with BytesIO() as buffer: + create_report(buffer, tr_data) + update_progress(1.0) + buffer.seek(0) + return file_name, "application/pdf", buffer.read() diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index f8bc5ec..0f7a542 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -808,10 +808,11 @@ def show_test_defs_grid( _, col_profile_button = right_column.columns([0.7, 0.3]) if selected_row["test_scope"] == "column": - view_profiling_button( - col_profile_button, selected_row["table_name"], selected_row["column_name"], - str_table_groups_id=str_table_groups_id - ) + with col_profile_button: + view_profiling_button( + selected_row["table_name"], selected_row["column_name"], + str_table_groups_id=str_table_groups_id + ) with right_column: st.write(generate_test_defs_help(row_selected["test_type"])) diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index ed97aa9..9cc88eb 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -542,12 +542,21 @@ def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_co v_col1, v_col2, v_col3, v_col4 = st.columns([.25, .25, .25, .25]) if authentication_service.current_user_has_edit_role(): view_edit_test(v_col1, selected_row["test_definition_id_current"]) + if selected_row["test_scope"] == "column": - view_profiling_button( - v_col2, selected_row["table_name"], selected_row["column_names"], - str_table_groups_id=selected_row["table_groups_id"] - ) - view_bad_data(v_col3, selected_row) + with v_col2: + view_profiling_button( + selected_row["table_name"], + selected_row["column_names"], + str_table_groups_id=selected_row["table_groups_id"] + ) + + with v_col3: + if st.button( + "Source Data →", help="Review current source data for highlighted result", + use_container_width=True + ): + source_data_dialog(selected_row) with v_col4: @@ -694,14 +703,6 @@ def do_disposition_update(selected, str_new_status): return str_result -def view_bad_data(button_container, selected_row): - with button_container: - if st.button( - "Source Data →", help="Review current source data for highlighted result", use_container_width=True - ): - source_data_dialog(selected_row) - - @st.dialog(title="Source Data") def source_data_dialog(selected_row): st.markdown(f"#### {selected_row['test_name_short']}") From a67078b7fd4e06b5edadee564f993901608f19a0 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 11:58:38 -0500 Subject: [PATCH 46/91] refactor(components): upgrade vanJS to latest version --- .../frontend/js/components/breadcrumbs.js | 10 ++++-- .../frontend/js/components/button.js | 14 ++++---- .../components/frontend/js/components/link.js | 22 ++++++++---- .../frontend/js/components/select.js | 6 ++-- .../frontend/js/components/sidebar.js | 19 +++++++---- .../frontend/js/components/summary_bar.js | 34 ++++++++----------- .../frontend/js/pages/profiling_runs.js | 20 +++++------ .../components/frontend/js/pages/test_runs.js | 16 ++++----- testgen/ui/components/frontend/js/utils.js | 23 ++++++++----- testgen/ui/components/frontend/js/van.min.js | 3 +- testgen/ui/components/widgets/empty_state.py | 3 +- 11 files changed, 97 insertions(+), 73 deletions(-) diff --git a/testgen/ui/components/frontend/js/components/breadcrumbs.js b/testgen/ui/components/frontend/js/components/breadcrumbs.js index 949499c..52a18a9 100644 --- a/testgen/ui/components/frontend/js/components/breadcrumbs.js +++ b/testgen/ui/components/frontend/js/components/breadcrumbs.js @@ -11,7 +11,7 @@ */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; -import { emitEvent, loadStylesheet } from '../utils.js'; +import { emitEvent, getValue, loadStylesheet } from '../utils.js'; const { a, div, span } = van.tags; @@ -25,7 +25,7 @@ const Breadcrumbs = (/** @type Properties */ props) => { return div( {class: 'tg-breadcrumbs-wrapper'}, () => { - const breadcrumbs = van.val(props.breadcrumbs); + const breadcrumbs = getValue(props.breadcrumbs) || []; return div( { class: 'tg-breadcrumbs' }, @@ -33,7 +33,11 @@ const Breadcrumbs = (/** @type Properties */ props) => { const isLastItem = idx === breadcrumbs.length - 1; items.push(a({ class: `tg-breadcrumbs--${ isLastItem ? 'current' : 'active'}`, - onclick: () => emitEvent('LinkClicked', { href: b.path, params: b.params }) }, + onclick: (event) => { + event.preventDefault(); + event.stopPropagation(); + emitEvent('LinkClicked', { href: b.path, params: b.params }); + }}, b.label, )); if (!isLastItem) { diff --git a/testgen/ui/components/frontend/js/components/button.js b/testgen/ui/components/frontend/js/components/button.js index 893a1b1..ba2092a 100644 --- a/testgen/ui/components/frontend/js/components/button.js +++ b/testgen/ui/components/frontend/js/components/button.js @@ -11,7 +11,7 @@ * @property {(bool)} disabled * @property {string?} style */ -import { emitEvent, enforceElementWidth, loadStylesheet } from '../utils.js'; +import { emitEvent, enforceElementWidth, getValue, loadStylesheet } from '../utils.js'; import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; @@ -31,7 +31,9 @@ const BUTTON_COLOR = { const Button = (/** @type Properties */ props) => { loadStylesheet('button', stylesheet); - const isIconOnly = props.type === BUTTON_TYPE.ICON || (props.icon?.val && !props.label?.val); + const buttonType = getValue(props.type); + const width = getValue(props.width); + const isIconOnly = buttonType === BUTTON_TYPE.ICON || (getValue(props.icon) && !getValue(props.label)); if (!window.testgen.isPage) { Streamlit.setFrameHeight(40); @@ -39,8 +41,8 @@ const Button = (/** @type Properties */ props) => { enforceElementWidth(window.frameElement, 40); } - if (props.width?.val) { - enforceElementWidth(window.frameElement, props.width?.val); + if (width) { + enforceElementWidth(window.frameElement, width); } } @@ -52,8 +54,8 @@ const Button = (/** @type Properties */ props) => { const onClickHandler = props.onclick || (() => emitEvent('ButtonClicked')); return button( { - class: `tg-button tg-${props.type.val}-button tg-${props.color?.val ?? 'basic'}-button ${props.type.val !== 'icon' && isIconOnly ? 'tg-icon-button' : ''}`, - style: () => `width: ${props.width?.val ?? '100%'}; ${props.style?.val}`, + class: `tg-button tg-${buttonType}-button tg-${getValue(props.color) ?? 'basic'}-button ${buttonType !== 'icon' && isIconOnly ? 'tg-icon-button' : ''}`, + style: () => `width: ${isIconOnly ? '' : (width ?? '100%')}; ${getValue(props.style)}`, onclick: onClickHandler, disabled: props.disabled, }, diff --git a/testgen/ui/components/frontend/js/components/link.js b/testgen/ui/components/frontend/js/components/link.js index 8a0b09b..49c562a 100644 --- a/testgen/ui/components/frontend/js/components/link.js +++ b/testgen/ui/components/frontend/js/components/link.js @@ -13,7 +13,7 @@ * @property {number?} width * @property {string?} style */ -import { emitEvent, enforceElementWidth, loadStylesheet } from '../utils.js'; +import { emitEvent, enforceElementWidth, getValue, loadStylesheet } from '../utils.js'; import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; @@ -23,17 +23,25 @@ const Link = (/** @type Properties */ props) => { loadStylesheet('link', stylesheet); if (!window.testgen.isPage) { - Streamlit.setFrameHeight(props.height?.val || 24); - if (props.width?.val) { - enforceElementWidth(window.frameElement, props.width.val); + Streamlit.setFrameHeight(getValue(props.height) || 24); + const width = getValue(props.width); + if (width) { + enforceElementWidth(window.frameElement, width); } } + const href = getValue(props.href); + const params = getValue(props.params) || {}; + return a( { - class: `tg-link ${props.underline?.val ? 'tg-link--underline' : ''}`, + class: `tg-link ${getValue(props.underline) ? 'tg-link--underline' : ''}`, style: props.style, - onclick: () => emitEvent('LinkClicked', { href: props.href.val, params: props.params.val }), + onclick: (event) => { + event.preventDefault(); + event.stopPropagation(); + emitEvent('LinkClicked', { href, params }); + }, }, div( {class: 'tg-link--wrapper'}, @@ -50,7 +58,7 @@ const LinkIcon = ( /** @type string */position, ) => { return i( - {class: `material-symbols-rounded tg-link--icon tg-link--icon-${position}`, style: `font-size: ${size?.val || 20}px;`}, + {class: `material-symbols-rounded tg-link--icon tg-link--icon-${position}`, style: `font-size: ${getValue(size) || 20}px;`}, icon, ); }; diff --git a/testgen/ui/components/frontend/js/components/select.js b/testgen/ui/components/frontend/js/components/select.js index 6cd7c48..5f4f68c 100644 --- a/testgen/ui/components/frontend/js/components/select.js +++ b/testgen/ui/components/frontend/js/components/select.js @@ -13,7 +13,7 @@ */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; -import { loadStylesheet } from '../utils.js'; +import { getValue, loadStylesheet } from '../utils.js'; const { div, label, option, select } = van.tags; @@ -25,9 +25,9 @@ const Select = (/** @type {Properties} */ props) => { const changeHandler = props.onChange || post; return div( {class: 'tg-select'}, - label({for: domId, class: 'tg-select--label'}, van.val(props.label)), + label({for: domId, class: 'tg-select--label'}, props.label), () => { - const options = van.val(props.options); + const options = getValue(props.options) || []; return select( {id: domId, class: 'tg-select--field', onchange: changeHandler}, options.map(op => option({class: 'tg-select--field--option', value: op.value, selected: op.selected}, op.label)), diff --git a/testgen/ui/components/frontend/js/components/sidebar.js b/testgen/ui/components/frontend/js/components/sidebar.js index 56c5650..5057a48 100644 --- a/testgen/ui/components/frontend/js/components/sidebar.js +++ b/testgen/ui/components/frontend/js/components/sidebar.js @@ -46,7 +46,7 @@ const Sidebar = (/** @type {Properties} */ props) => { return div( {class: 'menu'}, () => { - const menuItems = van.val(props.menu).items; + const menuItems = props.menu?.val.items || []; return div( {class: 'content'}, menuItems.map(item => @@ -56,12 +56,12 @@ const Sidebar = (/** @type {Properties} */ props) => { ); }, button( - { class: `tg-button logout`, onclick: () => navigate(van.val(props.logout_path)) }, + { class: `tg-button logout`, onclick: (event) => navigate(event, props.logout_path?.val) }, i({class: 'material-symbols-rounded'}, 'logout'), span('Logout'), ), span({class: 'menu--username'}, props.username), - () => Version(van.val(props.menu).version), + () => Version(props.menu?.val.version), ); }; @@ -78,14 +78,14 @@ const MenuSection = (/** @type {MenuItem} */ item, /** @type {string} */ current const MenuItem = (/** @type {MenuItem} */ item, /** @type {string} */ currentPage) => { const classes = van.derive(() => { - if (isCurrentPage(item.page, van.val(currentPage))) { + if (isCurrentPage(item.page, currentPage?.val)) { return 'menu--item active'; } return 'menu--item'; }); return a( - {class: classes, href: `/${item.page}`, onclick: () => navigate(item.page, van.val(currentPage))}, + {class: classes, href: `/${item.page}`, onclick: (event) => navigate(event, item.page, currentPage?.val)}, i({class: 'menu--item--icon material-symbols-rounded'}, item.icon), span({class: 'menu--item--label'}, item.label), ); @@ -121,11 +121,16 @@ const VersionRow = (/** @type string */ label, /** @type string */ version, icon ); }; -function navigate(/** @type string */ path, /** @type string */ currentPage = null) { +function navigate(/** @type object */ event, /** @type string */ path, /** @type string */ currentPage = null) { + // Needed to prevent page refresh + // Returning false does not work because VanJS does not use inline handlers -> https://github.com/vanjs-org/van/discussions/246 + event.preventDefault(); + // Prevent Streamlit from reacting to event + event.stopPropagation(); + if (Sidebar.StreamlitInstance && path !== currentPage) { Sidebar.StreamlitInstance.sendData(path); } - return false; } function isCurrentPage(/** @type string */ itemPath, /** @type string */ currentPage) { diff --git a/testgen/ui/components/frontend/js/components/summary_bar.js b/testgen/ui/components/frontend/js/components/summary_bar.js index 152b589..000d2fb 100644 --- a/testgen/ui/components/frontend/js/components/summary_bar.js +++ b/testgen/ui/components/frontend/js/components/summary_bar.js @@ -13,7 +13,7 @@ * @property {number} width */ import van from '../van.min.js'; -import { loadStylesheet } from '../utils.js'; +import { getValue, loadStylesheet } from '../utils.js'; const { div, span } = van.tags; const colorMap = { @@ -26,32 +26,28 @@ const colorMap = { brown: '#8D6E63', grey: '#BDBDBD', } +const defaultHeight = 24; const SummaryBar = (/** @type Properties */ props) => { loadStylesheet('summaryBar', stylesheet); - - const height = props.height.val || 24; - const width = props.width.val; - const summaryItems = props.items.val; - const label = props.label?.val; - const total = summaryItems.reduce((sum, item) => sum + item.value, 0); + const total = van.derive(() => getValue(props.items).reduce((sum, item) => sum + item.value, 0)); return div( - { class: 'tg-summary-bar-wrapper' }, - () => { - return label ? div( - { class: 'tg-summary-bar--label' }, - label, - ) : null; - }, - div( + { style: () => `max-width: ${props.width ? getValue(props.width) + 'px' : '100%'};` }, + () => props.label ? div( + { class: 'tg-summary-bar--label' }, + props.label, + ) : '', + () => div( { class: 'tg-summary-bar', - style: `height: ${height}px; max-width: ${width ? width + 'px' : '100%'}` + style: () => `height: ${getValue(props.height) || defaultHeight}px;` }, - summaryItems.map(item => span({ - class: `tg-summary-bar--item`, - style: `width: ${item.value * 100 / total}%; background-color: ${colorMap[item.color] || item.color};`, + getValue(props.items).map(item => span({ + class: 'tg-summary-bar--item', + style: () => `width: ${item.value * 100 / total.val}%; + ${item.value ? 'min-width: 1px;' : ''} + background-color: ${colorMap[item.color] || item.color};`, })), ), () => { diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js index 531768d..9afe07c 100644 --- a/testgen/ui/components/frontend/js/pages/profiling_runs.js +++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js @@ -9,7 +9,7 @@ import { SummaryBar } from '../components/summary_bar.js'; import { Link } from '../components/link.js'; import { Button } from '../components/button.js'; import { Streamlit } from '../streamlit.js'; -import { emitEvent, resizeFrameHeightToElement, wrapProps } from '../utils.js'; +import { emitEvent, resizeFrameHeightToElement } from '../utils.js'; import { formatTimestamp, formatDuration } from '../display_utils.js'; const { div, span, i } = van.tags; @@ -77,12 +77,12 @@ const ProfilingRunItem = (item, /** @type string[] */ columns) => { formatDuration(item.duration), ), ), - item.status === 'Running' && item.process_id ? Button(wrapProps({ + item.status === 'Running' && item.process_id ? Button({ type: 'stroked', label: 'Cancel Run', style: 'width: auto; height: 32px; color: var(--purple); margin-left: 16px;', onclick: () => emitEvent('RunCanceled', { payload: item }), - })) : null, + }) : null, ), div( { style: `flex: ${columns[2]}` }, @@ -94,17 +94,17 @@ const ProfilingRunItem = (item, /** @type string[] */ columns) => { }, `${item.table_ct || 0} tables, ${item.column_ct || 0} columns`, ), - item.column_ct ? Link(wrapProps({ + item.column_ct ? Link({ label: 'View results', href: 'profiling-runs:results', params: { 'run_id': item.profiling_run_id }, underline: true, right_icon: 'chevron_right', - })) : null, + }) : null, ), div( { style: `flex: ${columns[3]}` }, - item.anomaly_ct ? SummaryBar(wrapProps({ + item.anomaly_ct ? SummaryBar({ items: [ { label: 'Definite', value: item.anomalies_definite_ct, color: 'red' }, { label: 'Likely', value: item.anomalies_likely_ct, color: 'orange' }, @@ -112,16 +112,16 @@ const ProfilingRunItem = (item, /** @type string[] */ columns) => { { label: 'Dismissed', value: item.anomalies_dismissed_ct, color: 'grey' }, ], height: 10, - width: 300, - })) : '--', - item.anomaly_ct ? Link(wrapProps({ + width: 350, + }) : '--', + item.anomaly_ct ? Link({ label: `View ${item.anomaly_ct} issues`, href: 'profiling-runs:hygiene', params: { 'run_id': item.profiling_run_id }, underline: true, right_icon: 'chevron_right', style: 'margin-top: 8px;', - })) : null, + }) : null, ), ); } diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js index 596e8a7..c5084b4 100644 --- a/testgen/ui/components/frontend/js/pages/test_runs.js +++ b/testgen/ui/components/frontend/js/pages/test_runs.js @@ -9,7 +9,7 @@ import { SummaryBar } from '../components/summary_bar.js'; import { Link } from '../components/link.js'; import { Button } from '../components/button.js'; import { Streamlit } from '../streamlit.js'; -import { emitEvent, resizeFrameHeightToElement, wrapProps } from '../utils.js'; +import { emitEvent, resizeFrameHeightToElement } from '../utils.js'; import { formatTimestamp, formatDuration } from '../display_utils.js'; const { div, span, i } = van.tags; @@ -58,12 +58,12 @@ const TestRunItem = (item, /** @type string[] */ columns) => { { class: 'table-row flex-row' }, div( { style: `flex: ${columns[0]}` }, - Link(wrapProps({ + Link({ label: formatTimestamp(item.test_starttime), href: 'test-runs:results', params: { 'run_id': item.test_run_id }, underline: true, - })), + }), div( { class: 'text-caption mt-1' }, `${item.table_groups_name} > ${item.test_suite}`, @@ -78,16 +78,16 @@ const TestRunItem = (item, /** @type string[] */ columns) => { formatDuration(item.duration), ), ), - item.status === 'Running' && item.process_id ? Button(wrapProps({ + item.status === 'Running' && item.process_id ? Button({ type: 'stroked', label: 'Cancel Run', style: 'width: auto; height: 32px; color: var(--purple); margin-left: 16px;', onclick: () => emitEvent('RunCanceled', { payload: item }), - })) : null, + }) : null, ), div( { style: `flex: ${columns[2]}` }, - item.test_ct ? SummaryBar(wrapProps({ + item.test_ct ? SummaryBar({ items: [ { label: 'Passed', value: item.passed_ct, color: 'green' }, { label: 'Warning', value: item.warning_ct, color: 'yellow' }, @@ -96,8 +96,8 @@ const TestRunItem = (item, /** @type string[] */ columns) => { { label: 'Dismissed', value: item.dismissed_ct, color: 'grey' }, ], height: 10, - width: 300, - })) : '--', + width: 400, + }) : '--', ), ); } diff --git a/testgen/ui/components/frontend/js/utils.js b/testgen/ui/components/frontend/js/utils.js index 9b3bcb9..d8d712c 100644 --- a/testgen/ui/components/frontend/js/utils.js +++ b/testgen/ui/components/frontend/js/utils.js @@ -32,13 +32,6 @@ function loadStylesheet( } } -function wrapProps(/** @type object */props) { - for (const [key, value] of Object.entries(props)) { - props[key] = van.state(value); - } - return props; -} - function emitEvent( /** @type string */event, /** @type object */data = {}, @@ -46,4 +39,18 @@ function emitEvent( Streamlit.sendData({ event, ...data, _id: Math.random() }) // Identify the event so its handler is called once } -export { emitEvent, enforceElementWidth, loadStylesheet, resizeFrameHeightToElement, wrapProps }; +// Replacement for van.val() +// https://github.com/vanjs-org/van/discussions/280 +const stateProto = Object.getPrototypeOf(van.state()); +function getValue(/** @type object */ prop) { // van state or static value + const proto = Object.getPrototypeOf(prop ?? 0); + if (proto === stateProto) { + return prop.val; + } + if (proto === Function.prototype) { + return prop(); + } + return prop; +} + +export { emitEvent, enforceElementWidth, getValue, loadStylesheet, resizeFrameHeightToElement }; diff --git a/testgen/ui/components/frontend/js/van.min.js b/testgen/ui/components/frontend/js/van.min.js index a78d3da..7e23e03 100644 --- a/testgen/ui/components/frontend/js/van.min.js +++ b/testgen/ui/components/frontend/js/van.min.js @@ -1 +1,2 @@ -let e,t,l,r,o,f=Object,n=f.getPrototypeOf,s=document,a={isConnected:1},i={},d=n(a),u=n(n),_=(e,t,l,r)=>(e??(setTimeout(l,r),new Set)).add(t),h=(e,t,r)=>{let o=l;l=t;try{return e(r)}catch(e){return console.error(e),r}finally{l=o}},c=e=>e.filter(e=>e.t?.isConnected),g=t=>o=_(o,t,()=>{for(let e of o)e.l=c(e.l),e.o=c(e.o);o=e},1e3),w={get val(){return l?.add(this),this.i},get oldVal(){return l?.add(this),this.u},set val(l){let r=this;if(l!==r.i){r.i=l;let o=[...r.o=c(r.o)];for(let t of o)x(t.f,t.s,t.t),t.t=e;r.l.length?t=_(t,r,p):r.u=l}}},v=e=>({__proto__:w,i:e,u:e,l:[],o:[]}),S=e=>n(e??0)===w,y=(e,t)=>{let l=new Set,o={f:e},f=r;r=[];let n=h(e,l,t);n=(n??s).nodeType?n:new Text(n);for(let e of l)g(e),e.l.push(o);for(let e of r)e.t=n;return r=f,o.t=n},x=(e,t=v(),l)=>{let o=new Set,f={f:e,s:t};f.t=l??r?.push(f)??a,t.val=h(e,o);for(let e of o)g(e),e.o.push(f);return t},V=(t,...l)=>{for(let r of l.flat(1/0)){let l=n(r??0),o=l===w?y(()=>r.val):l===u?y(r):r;o!=e&&t.append(o)}return t},b=t=>new Proxy((l,...r)=>{let[o,...a]=n(r[0]??0)===d?r:[{},...r],_=t?s.createElementNS(t,l):s.createElement(l);for(let[t,r]of f.entries(o)){let o=l=>l?f.getOwnPropertyDescriptor(l,t)??o(n(l)):e,s=l+","+t,a=i[s]??(i[s]=o(n(_))?.set??0),d=a?a.bind(_):_.setAttribute.bind(_,t),h=n(r??0);h===w?y(()=>(d(r.val),_)):h!==u||t.startsWith("on")&&!r.h?d(r):y(()=>(d(r()),_))}return V(_,...a)},{get:(t,l)=>t.bind(e,l)}),m=(e,t)=>t?t!==e&&e.replaceWith(t):e.remove(),p=()=>{let l=[...t].filter(e=>e.i!==e.u);t=e;for(let t of new Set(l.flatMap(e=>e.l=c(e.l))))m(t.t,y(t.f,t.t)),t.t=e;for(let e of l)e.u=e.i};export default{add:V,_:e=>(e.h=1,e),tags:b(),tagsNS:b,state:v,val:e=>S(e)?e.val:e,oldVal:e=>S(e)?e.oldVal:e,derive:x,hydrate:(e,t)=>m(e,y(t,e))}; \ No newline at end of file +// https://vanjs.org/code/van-1.5.2.min.js +let e,t,r,o,l,n,s=Object.getPrototypeOf,f={isConnected:1},i={},h=s(f),a=s(s),d=(e,t,r,o)=>(e??(setTimeout(r,o),new Set)).add(t),u=(e,t,o)=>{let l=r;r=t;try{return e(o)}catch(e){return console.error(e),o}finally{r=l}},w=e=>e.filter(e=>e.t?.isConnected),_=e=>l=d(l,e,()=>{for(let e of l)e.o=w(e.o),e.l=w(e.l);l=n},1e3),c={get val(){return r?.i?.add(this),this.rawVal},get oldVal(){return r?.i?.add(this),this.h},set val(o){r?.u?.add(this),o!==this.rawVal&&(this.rawVal=o,this.o.length+this.l.length?(t?.add(this),e=d(e,this,v)):this.h=o)}},S=e=>({__proto__:c,rawVal:e,h:e,o:[],l:[]}),g=(e,t)=>{let r={i:new Set,u:new Set},l={f:e},n=o;o=[];let s=u(e,r,t);s=(s??document).nodeType?s:new Text(s);for(let e of r.i)r.u.has(e)||(_(e),e.o.push(l));for(let e of o)e.t=s;return o=n,l.t=s},y=(e,t=S(),r)=>{let l={i:new Set,u:new Set},n={f:e,s:t};n.t=r??o?.push(n)??f,t.val=u(e,l,t.rawVal);for(let e of l.i)l.u.has(e)||(_(e),e.l.push(n));return t},b=(e,...t)=>{for(let r of t.flat(1/0)){let t=s(r??0),o=t===c?g(()=>r.val):t===a?g(r):r;o!=n&&e.append(o)}return e},m=(e,t,...r)=>{let[o,...l]=s(r[0]??0)===h?r:[{},...r],f=e?document.createElementNS(e,t):document.createElement(t);for(let[e,r]of Object.entries(o)){let o=t=>t?Object.getOwnPropertyDescriptor(t,e)??o(s(t)):n,l=t+","+e,h=i[l]??=o(s(f))?.set??0,d=e.startsWith("on")?(t,r)=>{let o=e.slice(2);f.removeEventListener(o,r),f.addEventListener(o,t)}:h?h.bind(f):f.setAttribute.bind(f,e),u=s(r??0);e.startsWith("on")||u===a&&(r=y(r),u=c),u===c?g(()=>(d(r.val,r.h),f)):d(r)}return b(f,l)},x=e=>({get:(t,r)=>m.bind(n,e,r)}),j=(e,t)=>t?t!==e&&e.replaceWith(t):e.remove(),v=()=>{let r=0,o=[...e].filter(e=>e.rawVal!==e.h);do{t=new Set;for(let e of new Set(o.flatMap(e=>e.l=w(e.l))))y(e.f,e.s,e.t),e.t=n}while(++r<100&&(o=[...t]).length);let l=[...e].filter(e=>e.rawVal!==e.h);e=n;for(let e of new Set(l.flatMap(e=>e.o=w(e.o))))j(e.t,g(e.f,e.t)),e.t=n;for(let e of l)e.h=e.rawVal};export default{tags:new Proxy(e=>new Proxy(m,x(e)),x()),hydrate:(e,t)=>j(e,g(t,e)),add:b,state:S,derive:y}; \ No newline at end of file diff --git a/testgen/ui/components/widgets/empty_state.py b/testgen/ui/components/widgets/empty_state.py index 8b34df0..505d560 100644 --- a/testgen/ui/components/widgets/empty_state.py +++ b/testgen/ui/components/widgets/empty_state.py @@ -66,9 +66,10 @@ def empty_state( elif button_onclick: button( type_="flat", + color="primary", label=action_label, icon=button_icon, on_click=button_onclick, - style="margin: auto; width: auto; background-color: var(--primary-color);", + style="margin: auto; width: auto;", ) whitespace(5) From 41b25c1a6e4af288ce5c84606ea37c70148a81ba Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 12:49:14 -0500 Subject: [PATCH 47/91] refactor(tooltip): support tooltip positions - add to button and tooltip-icon components --- testgen/ui/components/frontend/css/shared.css | 6 + .../frontend/js/components/button.js | 29 +++- .../frontend/js/components/tooltip.js | 157 ++++++++++++++++++ .../frontend/js/components/tooltip_icon.js | 45 +++++ .../frontend/js/pages/profiling_runs.js | 2 +- .../components/frontend/js/pages/test_runs.js | 2 +- .../ui/components/frontend/js/van-tooltip.js | 52 ------ 7 files changed, 231 insertions(+), 62 deletions(-) create mode 100644 testgen/ui/components/frontend/js/components/tooltip.js create mode 100644 testgen/ui/components/frontend/js/components/tooltip_icon.js delete mode 100644 testgen/ui/components/frontend/js/van-tooltip.js diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index bcbe89c..3c3f07f 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -26,6 +26,7 @@ body { --disabled-text-color: #00000042; --caption-text-color: rgba(49, 51, 63, 0.6); /* Match Streamlit's caption color */ --border-color: rgba(0, 0, 0, .12); + --tooltip-color: #333d; --dk-card-background: #fff; --sidebar-background-color: white; @@ -88,6 +89,11 @@ body { } } +.hidden { + display: none !important; +} + + /* Table styles */ .table { background-color: var(--dk-card-background); diff --git a/testgen/ui/components/frontend/js/components/button.js b/testgen/ui/components/frontend/js/components/button.js index ba2092a..858a588 100644 --- a/testgen/ui/components/frontend/js/components/button.js +++ b/testgen/ui/components/frontend/js/components/button.js @@ -14,6 +14,7 @@ import { emitEvent, enforceElementWidth, getValue, loadStylesheet } from '../utils.js'; import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; +import { Tooltip } from './tooltip.js'; const { button, i, span } = van.tags; const BUTTON_TYPE = { @@ -44,21 +45,29 @@ const Button = (/** @type Properties */ props) => { if (width) { enforceElementWidth(window.frameElement, width); } - } - - if (props.tooltip) { - window.frameElement.parentElement.setAttribute('data-tooltip', props.tooltip.val); - window.frameElement.parentElement.setAttribute('data-tooltip-position', props.tooltipPosition.val); + if (props.tooltip) { + window.frameElement.parentElement.setAttribute('data-tooltip', props.tooltip.val); + window.frameElement.parentElement.setAttribute('data-tooltip-position', props.tooltipPosition.val); + } } const onClickHandler = props.onclick || (() => emitEvent('ButtonClicked')); + const showTooltip = van.state(false); + return button( { class: `tg-button tg-${buttonType}-button tg-${getValue(props.color) ?? 'basic'}-button ${buttonType !== 'icon' && isIconOnly ? 'tg-icon-button' : ''}`, style: () => `width: ${isIconOnly ? '' : (width ?? '100%')}; ${getValue(props.style)}`, onclick: onClickHandler, disabled: props.disabled, + onmouseenter: props.tooltip ? (() => showTooltip.val = true) : undefined, + onmouseleave: props.tooltip ? (() => showTooltip.val = false) : undefined, }, + props.tooltip ? Tooltip({ + text: props.tooltip, + show: showTooltip, + position: props.tooltipPosition, + }) : undefined, span({class: 'tg-button-focus-state-indicator'}, ''), props.icon ? i({class: 'material-symbols-rounded'}, props.icon) : undefined, !isIconOnly ? span(props.label) : undefined, @@ -71,7 +80,6 @@ button.tg-button { height: 40px; position: relative; - overflow: hidden; display: flex; flex-direction: row; @@ -88,6 +96,11 @@ button.tg-button { font-size: 14px; } +button.tg-button .tg-button-focus-state-indicator { + border-radius: inherit; + overflow: hidden; +} + button.tg-button .tg-button-focus-state-indicator::before { content: ""; opacity: 0; @@ -113,7 +126,7 @@ button.tg-button:has(span) { } button.tg-button:not(.tg-icon-button):has(span):has(i) { - padding-left: 8px; + padding-left: 12px; } button.tg-button[disabled] { @@ -121,7 +134,7 @@ button.tg-button[disabled] { cursor: not-allowed; } -button.tg-button.tg-icon-button > i { +button.tg-button > i { font-size: 18px; } diff --git a/testgen/ui/components/frontend/js/components/tooltip.js b/testgen/ui/components/frontend/js/components/tooltip.js new file mode 100644 index 0000000..843e175 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/tooltip.js @@ -0,0 +1,157 @@ +// Code modified from vanjs-ui +// https://www.npmjs.com/package/vanjs-ui +// https://cdn.jsdelivr.net/npm/vanjs-ui@0.10.0/dist/van-ui.nomodule.js + +/** + * @typedef Properties + * @type {object} + * @property {string} text + * @property {boolean} show + * @property {('top-left' | 'top' | 'top-right' | 'right' | 'bottom-right' | 'bottom' | 'bottom-left' | 'left')?} position + */ +import van from '../van.min.js'; +import { getValue, loadStylesheet } from '../utils.js'; + +const { div, span } = van.tags; +const defaultPosition = 'top'; + +const Tooltip = (/** @type Properties */ props) => { + loadStylesheet('tooltip', stylesheet); + + return span( + { + class: () => `tg-tooltip ${getValue(props.position) || defaultPosition} ${getValue(props.show) ? '' : 'hidden'}`, + style: () => `opacity: ${getValue(props.show) ? 1 : 0};`, + }, + props.text, + div({ class: 'tg-tooltip--triangle' }), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-tooltip { + width: max-content; + max-width: 400px; + position: absolute; + z-index: 1; + border-radius: 4px; + background-color: var(--tooltip-color); + padding: 4px 8px; + color: white; + font-size: 13px; + font-family: 'Roboto', 'Helvetica Neue', sans-serif; + text-align: center; + text-wrap: wrap; + transition: opacity 0.3s; +} + +.tg-tooltip--triangle { + width: 0; + height: 0; + position: absolute; + border: solid transparent; +} + +.tg-tooltip.top-left { + right: 50%; + bottom: 125%; + transform: translateX(20px); +} +.top-left .tg-tooltip--triangle { + bottom: -5px; + right: 20px; + margin-right: -5px; + border-width: 5px 5px 0; + border-top-color: var(--tooltip-color); +} + +.tg-tooltip.top { + left: 50%; + bottom: 125%; + transform: translateX(-50%); +} +.top .tg-tooltip--triangle { + bottom: -5px; + left: 50%; + margin-left: -5px; + border-width: 5px 5px 0; + border-top-color: var(--tooltip-color); +} + +.tg-tooltip.top-right { + left: 50%; + bottom: 125%; + transform: translateX(-20px); +} +.top-right .tg-tooltip--triangle { + bottom: -5px; + left: 20px; + margin-left: -5px; + border-width: 5px 5px 0; + border-top-color: var(--tooltip-color); +} + +.tg-tooltip.right { + left: 125%; +} +.right .tg-tooltip--triangle { + top: 50%; + left: -5px; + margin-top: -5px; + border-width: 5px 5px 5px 0; + border-right-color: var(--tooltip-color); +} + +.tg-tooltip.bottom-right { + left: 50%; + top: 125%; + transform: translateX(-20px); +} +.bottom-right .tg-tooltip--triangle { + top: -5px; + left: 20px; + margin-left: -5px; + border-width: 0 5px 5px; + border-bottom-color: var(--tooltip-color); +} + +.tg-tooltip.bottom { + top: 125%; + left: 50%; + transform: translateX(-50%); +} +.bottom .tg-tooltip--triangle { + top: -5px; + left: 50%; + margin-left: -5px; + border-width: 0 5px 5px; + border-bottom-color: var(--tooltip-color); +} + +.tg-tooltip.bottom-left { + right: 50%; + top: 125%; + transform: translateX(20px); +} +.bottom-left .tg-tooltip--triangle { + top: -5px; + right: 20px; + margin-right: -5px; + border-width: 0 5px 5px; + border-bottom-color: var(--tooltip-color); +} + +.tg-tooltip.left { + right: 125%; +} +.left .tg-tooltip--triangle { + top: 50%; + right: -5px; + margin-top: -5px; + border-width: 5px 0 5px 5px; + border-left-color: var(--tooltip-color); +} +`); + +export { Tooltip }; diff --git a/testgen/ui/components/frontend/js/components/tooltip_icon.js b/testgen/ui/components/frontend/js/components/tooltip_icon.js new file mode 100644 index 0000000..7d3d5d3 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/tooltip_icon.js @@ -0,0 +1,45 @@ +/** + * @typedef Properties + * @type {object} + * @property {string} icon + * @property {number?} iconSize + * @property {string} tooltip + * @property {('top-left' | 'top' | 'top-right' | 'right' | 'bottom-right' | 'bottom' | 'bottom-left' | 'left')?} tooltipPosition + * @property {string} classes + */ +import { getValue, loadStylesheet } from '../utils.js'; +import van from '../van.min.js'; +import { Tooltip } from './tooltip.js'; + +const { i } = van.tags; +const defaultIconSize = 20; + +const TooltipIcon = (/** @type Properties */ props) => { + loadStylesheet('tooltipIcon', stylesheet); + const showTooltip = van.state(false); + + return i( + { + class: () => `material-symbols-rounded tg-tooltip-icon text-secondary ${getValue(props.classes)}`, + style: () => `font-size: ${getValue(props.iconSize) || defaultIconSize}px;`, + onmouseenter: () => showTooltip.val = true, + onmouseleave: () => showTooltip.val = false, + }, + props.icon, + Tooltip({ + text: props.tooltip, + show: showTooltip, + position: props.tooltipPosition, + }), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-tooltip-icon { + position: relative; + cursor: default; +} +`); + +export { TooltipIcon }; diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js index 9afe07c..c434f37 100644 --- a/testgen/ui/components/frontend/js/pages/profiling_runs.js +++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js @@ -4,7 +4,7 @@ * @property {array} items */ import van from '../van.min.js'; -import { Tooltip } from '../van-tooltip.js'; +import { Tooltip } from '../components/tooltip.js'; import { SummaryBar } from '../components/summary_bar.js'; import { Link } from '../components/link.js'; import { Button } from '../components/button.js'; diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js index c5084b4..d100f91 100644 --- a/testgen/ui/components/frontend/js/pages/test_runs.js +++ b/testgen/ui/components/frontend/js/pages/test_runs.js @@ -4,7 +4,7 @@ * @property {array} items */ import van from '../van.min.js'; -import { Tooltip } from '../van-tooltip.js'; +import { Tooltip } from '../components/tooltip.js'; import { SummaryBar } from '../components/summary_bar.js'; import { Link } from '../components/link.js'; import { Button } from '../components/button.js'; diff --git a/testgen/ui/components/frontend/js/van-tooltip.js b/testgen/ui/components/frontend/js/van-tooltip.js deleted file mode 100644 index 565715b..0000000 --- a/testgen/ui/components/frontend/js/van-tooltip.js +++ /dev/null @@ -1,52 +0,0 @@ -// Code modified from vanjs-ui -// https://www.npmjs.com/package/vanjs-ui -// https://cdn.jsdelivr.net/npm/vanjs-ui@0.10.0/dist/van-ui.nomodule.js - -import van from './van.min.js'; -const { div, span } = van.tags; - -const toStyleStr = (style) => Object.entries(style).map(([k, v]) => `${k}: ${v};`).join(""); - -const Tooltip = ({ text, show, backgroundColor = '#333D', fontColor = 'white', fadeInSec = 0.3, tooltipClass = '', tooltipStyleOverrides = {}, triangleClass = '', triangleStyleOverrides = {}, }) => { - const tooltipStylesStr = toStyleStr({ - width: 'max-content', - 'min-width': '100px', - 'max-width': '400px', - visibility: 'hidden', - 'background-color': backgroundColor, - color: fontColor, - 'text-align': 'center', - padding: '5px', - 'border-radius': '5px', - position: 'absolute', - 'z-index': 1, - bottom: '125%', - left: '50%', - transform: 'translateX(-50%)', - opacity: 0, - transition: `opacity ${fadeInSec}s`, - 'font-size': '14px', - 'font-family': `'Roboto', 'Helvetica Neue', sans-serif`, - 'text-wrap': 'wrap', - ...tooltipStyleOverrides, - }); - const triangleStylesStr = toStyleStr({ - width: 0, - height: 0, - 'margin-left': '-5px', - 'border-left': '5px solid transparent', - 'border-right': '5px solid transparent', - 'border-top': '5px solid #333', - position: 'absolute', - bottom: '-5px', - left: '50%', - ...triangleStyleOverrides, - }); - const dom = span({ class: tooltipClass, style: tooltipStylesStr }, text, div({ class: triangleClass, style: triangleStylesStr })); - van.derive(() => show.val ? - (dom.style.opacity = '1', dom.style.visibility = 'visible') : - (dom.style.opacity = '0', dom.style.visibility = 'hidden')); - return dom; -}; - -export { Tooltip }; From eee533cc632bf3c844328f6ae3c881d55684a575 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 12:51:09 -0500 Subject: [PATCH 48/91] feat(link): support opening links in new tabs --- .../ui/components/frontend/js/components/link.js | 16 +++++++++++++++- testgen/ui/components/widgets/link.py | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/testgen/ui/components/frontend/js/components/link.js b/testgen/ui/components/frontend/js/components/link.js index 49c562a..b070b6f 100644 --- a/testgen/ui/components/frontend/js/components/link.js +++ b/testgen/ui/components/frontend/js/components/link.js @@ -4,6 +4,7 @@ * @property {string} href * @property {object} params * @property {string} label + * @property {boolean} open_new * @property {boolean} underline * @property {string?} left_icon * @property {number?} left_icon_size @@ -32,12 +33,15 @@ const Link = (/** @type Properties */ props) => { const href = getValue(props.href); const params = getValue(props.params) || {}; + const open_new = !!getValue(props.open_new); return a( { class: `tg-link ${getValue(props.underline) ? 'tg-link--underline' : ''}`, style: props.style, - onclick: (event) => { + href: `/${href}${getQueryFromParams(params)}`, + target: open_new ? '_blank' : '', + onclick: open_new ? null : (event) => { event.preventDefault(); event.stopPropagation(); emitEvent('LinkClicked', { href, params }); @@ -63,6 +67,16 @@ const LinkIcon = ( ); }; +function getQueryFromParams(/** @type object */ params) { + const query = Object.entries(params).reduce((query, [ key, value ]) => { + if (key && value) { + return `${query}${query ? '&' : ''}${key}=${value}`; + } + return query; + }, ''); + return query ? `?${query}` : ''; +} + const stylesheet = new CSSStyleSheet(); stylesheet.replace(` .tg-link { diff --git a/testgen/ui/components/widgets/link.py b/testgen/ui/components/widgets/link.py index 7230edb..4e2bf28 100644 --- a/testgen/ui/components/widgets/link.py +++ b/testgen/ui/components/widgets/link.py @@ -7,6 +7,7 @@ def link( label: str, *, params: dict = {}, # noqa: B006 + open_new: bool = False, underline: bool = True, left_icon: str | None = None, left_icon_size: float = 20.0, @@ -22,6 +23,7 @@ def link( "params": params, "label": label, "height": height, + "open_new": open_new, "underline": underline, } if left_icon: From ab08ba23012c105e9a8169906ceef0b2e4405b6f Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 12:54:51 -0500 Subject: [PATCH 49/91] misc(summary): add legend colors to summary bar component --- testgen/ui/assets/style.css | 26 +++++++++++++ testgen/ui/components/frontend/css/shared.css | 8 ++++ .../frontend/js/components/summary_bar.js | 38 +++++++++++++------ testgen/ui/components/widgets/summary_bar.py | 2 +- 4 files changed, 62 insertions(+), 12 deletions(-) diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css index 184a8c6..67266d7 100644 --- a/testgen/ui/assets/style.css +++ b/testgen/ui/assets/style.css @@ -262,8 +262,34 @@ Use as testgen.text("text", "extra_styles") */ .tg-summary-bar--caption { margin-top: 4px; + display: flex; + flex-flow: row wrap; + align-items: center; color: var(--caption-text-color); + font-size: 13px; font-style: italic; + line-height: 1; +} + +.tg-summary-bar--legend { + display: flex; + flex-flow: row nowrap; + align-items: center; + width: auto; +} + +.tg-summary-bar--legend:not(:last-child) { + margin-right: 8px; +} + +.tg-summary-bar--legend-dot { + margin-right: 2px; + font-size: 4px; + font-style: normal; +} + +.tg-summary-bar--legend-dot::before { + content: '⬤'; } /* */ diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index 3c3f07f..7adb2bf 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -93,6 +93,14 @@ body { display: none !important; } +.dot { + font-size: 10px; + font-style: normal; +} + +.dot::before { + content: '⬤'; +} /* Table styles */ .table { diff --git a/testgen/ui/components/frontend/js/components/summary_bar.js b/testgen/ui/components/frontend/js/components/summary_bar.js index 000d2fb..b73ea5c 100644 --- a/testgen/ui/components/frontend/js/components/summary_bar.js +++ b/testgen/ui/components/frontend/js/components/summary_bar.js @@ -8,9 +8,9 @@ * @typedef Properties * @type {object} * @property {Array.} items - * @property {string} label - * @property {number} height - * @property {number} width + * @property {string?} label + * @property {number?} height + * @property {number?} width */ import van from '../van.min.js'; import { getValue, loadStylesheet } from '../utils.js'; @@ -50,12 +50,17 @@ const SummaryBar = (/** @type Properties */ props) => { background-color: ${colorMap[item.color] || item.color};`, })), ), - () => { - return total ? div( - { class: `tg-summary-bar--caption` }, - summaryItems.map(item => `${item.label}: ${item.value || 0}`).join(', '), - ) : null; - }, + () => total.val ? div( + { class: 'tg-summary-bar--caption flex-row fx-flex-wrap text-caption mt-1' }, + getValue(props.items).map(item => div( + { class: 'tg-summary-bar--legend flex-row' }, + span({ + class: 'dot', + style: `color: ${colorMap[item.color] || item.color};`, + }), + `${item.label}: ${item.value || 0}`, + )), + ) : '', ); }; @@ -80,10 +85,21 @@ stylesheet.replace(` } .tg-summary-bar--caption { - margin-top: 4px; - color: var(--caption-text-color); font-style: italic; } + +.tg-summary-bar--legend { + width: auto; +} + +.tg-summary-bar--legend:not(:last-child) { + margin-right: 8px; +} + +.tg-summary-bar--legend span { + margin-right: 2px; + font-size: 4px; +} `); export { SummaryBar }; diff --git a/testgen/ui/components/widgets/summary_bar.py b/testgen/ui/components/widgets/summary_bar.py index c4b636d..bf913c6 100644 --- a/testgen/ui/components/widgets/summary_bar.py +++ b/testgen/ui/components/widgets/summary_bar.py @@ -44,7 +44,7 @@ def summary_bar( if total: item_spans = "".join([ f'' for item in items ]) - caption = ", ".join([ f"{item['label']}: {item['value']}" for item in items ]) + caption = "".join([ f'
{item["label"]}: {item["value"]}
' for item in items ]) caption_div = f"""
{caption} From 58ca5a7d802cf90f35ab24b3405a57c3956798e7 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 13:01:33 -0500 Subject: [PATCH 50/91] feat(components): add BoxPlot, PercentBar and FrequencyBars components --- testgen/ui/components/frontend/css/shared.css | 36 ++- .../ui/components/frontend/js/axis_utils.js | 54 ++++ .../frontend/js/components/box_plot.js | 290 ++++++++++++++++++ .../frontend/js/components/frequency_bars.js | 94 ++++++ .../frontend/js/components/percent_bar.js | 79 +++++ .../frontend/js/components/summary_bar.js | 11 +- .../components/frontend/js/display_utils.js | 23 +- 7 files changed, 573 insertions(+), 14 deletions(-) create mode 100644 testgen/ui/components/frontend/js/axis_utils.js create mode 100644 testgen/ui/components/frontend/js/components/box_plot.js create mode 100644 testgen/ui/components/frontend/js/components/frequency_bars.js create mode 100644 testgen/ui/components/frontend/js/components/percent_bar.js diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index 7adb2bf..a4884ec 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -20,6 +20,8 @@ body { --blue: #42A5F5; --brown: #8D6E63; --grey: #BDBDBD; + --empty: #EEEEEE; + --empty-light: #FAFAFA; --primary-text-color: #000000de; --secondary-text-color: #0000008a; @@ -62,6 +64,9 @@ body { @media (prefers-color-scheme: dark) { body { + --empty: #424242; + --empty-light: #212121; + --primary-text-color: rgba(255, 255, 255); --secondary-text-color: rgba(255, 255, 255, .7); --disabled-text-color: rgba(255, 255, 255, .5); @@ -150,15 +155,12 @@ body { .flex-row { display: flex; flex-direction: row; - flex-grow: 1; - width: 100%; align-items: center; } .flex-column { display: flex; flex-direction: column; - flex-grow: 1; } .fx-flex { @@ -209,6 +211,34 @@ body { align-content: flex-start; } +.fx-gap-1 { + gap: 4px; +} + +.fx-gap-2 { + gap: 8px; +} + +.fx-gap-3 { + gap: 12px; +} + +.fx-gap-4 { + gap: 16px; +} + +.fx-gap-5 { + gap: 24px; +} + +.fx-gap-6 { + gap: 32px; +} + +.fx-gap-7 { + gap: 40px; +} + /* */ /* Whitespace utilities */ diff --git a/testgen/ui/components/frontend/js/axis_utils.js b/testgen/ui/components/frontend/js/axis_utils.js new file mode 100644 index 0000000..6c7e835 --- /dev/null +++ b/testgen/ui/components/frontend/js/axis_utils.js @@ -0,0 +1,54 @@ +// https://stackoverflow.com/a/4955179 +function niceNumber(value, round = false) { + const exponent = Math.floor(Math.log10(value)); + const fraction = value / Math.pow(10, exponent); + let niceFraction; + + if (round) { + if (fraction < 1.5) { + niceFraction = 1; + } else if (fraction < 3) { + niceFraction = 2; + } else if (fraction < 7) { + niceFraction = 5; + } else { + niceFraction = 10; + } + } else { + if (fraction <= 1) { + niceFraction = 1; + } else if (fraction <= 2) { + niceFraction = 2; + } else if (fraction <= 5) { + niceFraction = 5; + } else { + niceFraction = 10; + } + } + + return niceFraction * Math.pow(10, exponent); +} + +function niceBounds(axisStart, axisEnd, tickCount = 4) { + let axisWidth = axisEnd - axisStart; + + if (axisWidth == 0) { + axisStart -= 0.5; + axisEnd += 0.5; + axisWidth = axisEnd - axisStart; + } + + const niceRange = niceNumber(axisWidth); + const niceTick = niceNumber(niceRange / (tickCount - 1), true); + axisStart = Math.floor(axisStart / niceTick) * niceTick; + axisEnd = Math.ceil(axisEnd / niceTick) * niceTick; + + return { + min: axisStart, + max: axisEnd, + step: niceTick, + range: axisEnd - axisStart, + }; +} + +export { niceBounds }; diff --git a/testgen/ui/components/frontend/js/components/box_plot.js b/testgen/ui/components/frontend/js/components/box_plot.js new file mode 100644 index 0000000..81447d3 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/box_plot.js @@ -0,0 +1,290 @@ +/** + * @typedef Properties + * @type {object} + * @property {number} minimum + * @property {number} maximum + * @property {number} median + * @property {number} lowerQuartile + * @property {number} upperQuartile + * @property {number} average + * @property {number} standardDeviation + * @property {number?} width + */ +import van from '../van.min.js'; +import { getValue, loadStylesheet } from '../utils.js'; +import { colorMap } from '../display_utils.js'; +import { niceBounds } from '../axis_utils.js'; + +const { div } = van.tags; +const boxColor = colorMap.teal; +const lineColor = colorMap.limeGreen; + +const BoxPlot = (/** @type Properties */ props) => { + loadStylesheet('boxPlot', stylesheet); + + const { minimum, maximum, median, lowerQuartile, upperQuartile, average, standardDeviation, width } = props; + const axisTicks = van.derive(() => niceBounds(getValue(minimum), getValue(maximum))); + + return div( + { + class: 'flex-row fx-flex-wrap fx-gap-6', + style: () => `max-width: ${width ? getValue(width) + 'px' : '100%'};`, + }, + div( + { style: 'flex: 300px' }, + div( + { + class: 'tg-box-plot--line', + style: () => { + const { min, range } = axisTicks.val; + return `left: ${(getValue(average) - getValue(standardDeviation) - min) * 100 / range}%; + width: ${getValue(standardDeviation) * 2 * 100 / range}%;`; + }, + }, + div({ class: 'tg-box-plot--dot' }), + ), + div( + { + class: 'tg-box-plot--grid', + style: () => { + const { min, max, range } = axisTicks.val; + + return `grid-template-columns: + ${(getValue(minimum) - min) * 100 / range}% + ${(getValue(lowerQuartile) - getValue(minimum)) * 100 / range}% + ${(getValue(median) - getValue(lowerQuartile)) * 100 / range}% + ${(getValue(upperQuartile) - getValue(median)) * 100 / range}% + ${(getValue(maximum) - getValue(upperQuartile)) * 100 / range}% + ${(max - getValue(maximum)) * 100 / range}%;`; + }, + }, + div({ class: 'tg-box-plot--space-left' }), + div({ class: 'tg-box-plot--top-left' }), + div({ class: 'tg-box-plot--bottom-left' }), + div({ class: 'tg-box-plot--mid-left' }), + div({ class: 'tg-box-plot--mid-right' }), + div({ class: 'tg-box-plot--top-right' }), + div({ class: 'tg-box-plot--bottom-right' }), + div({ class: 'tg-box-plot--space-right' }), + ), + () => { + const { min, max, step, range } = axisTicks.val; + const ticks = []; + let currentTick = min; + while (currentTick <= max) { + ticks.push(currentTick); + currentTick += step; + } + + return div( + { class: 'tg-box-plot--axis' }, + ticks.map(position => div( + { + class: 'tg-box-plot--axis-tick', + style: `left: ${(position - min) * 100 / range}%;` + }, + position, + )), + ); + }, + ), + div( + { class: 'flex-column fx-gap-2 text-caption', style: 'flex: 150px;' }, + div( + { class: 'flex-row fx-gap-2' }, + div({ class: 'tg-blox-plot--legend-line' }), + 'Average---Standard Deviation', + ), + div( + { class: 'flex-row fx-gap-2' }, + div({ class: 'tg-blox-plot--legend-whisker' }), + 'Minimum---Maximum', + ), + div( + { class: 'flex-row fx-gap-2' }, + div({ class: 'tg-blox-plot--legend-box' }), + '25th---Median---75th', + ), + ), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-box-plot--line { + position: relative; + margin: 8px 0 24px 0; + border-top: 2px dotted ${lineColor}; +} + +.tg-box-plot--dot { + position: absolute; + top: -1px; + left: 50%; + transform: translateX(-50%) translateY(-50%); + width: 10px; + height: 10px; + border-radius: 5px; + background-color: ${lineColor}; +} + +.tg-box-plot--grid { + height: 24px; + display: grid; + grid-template-rows: 50% 50%; +} + +.tg-box-plot--grid div { + border-color: var(--caption-text-color); + border-style: solid; +} + +.tg-box-plot--space-left { + grid-column-start: 1; + grid-column-end: 2; + grid-row-start: 1; + grid-row-end: 3; + border: 0; +} + +.tg-box-plot--top-left { + grid-column-start: 2; + grid-column-end: 3; + grid-row-start: 1; + grid-row-end: 2; + border-width: 0 0 1px 2px; +} + +.tg-box-plot--bottom-left { + grid-column-start: 2; + grid-column-end: 3; + grid-row-start: 2; + grid-row-end: 3; + border-width: 1px 0 0 2px; +} + +.tg-box-plot--mid-left { + grid-column-start: 3; + grid-column-end: 4; + grid-row-start: 1; + grid-row-end: 3; + border-width: 1px 2px 1px 1px; + border-radius: 4px 0 0 4px; + background-color: ${boxColor}; +} + +.tg-box-plot--mid-right { + grid-column-start: 4; + grid-column-end: 5; + grid-row-start: 1; + grid-row-end: 3; + border-width: 1px 1px 1px 2px; + border-radius: 0 4px 4px 0; + background-color: ${boxColor}; +} + +.tg-box-plot--top-right { + grid-column-start: 5; + grid-column-end: 6; + grid-row-start: 1; + grid-row-end: 2; + border-width: 0 2px 1px 0; +} + +.tg-box-plot--bottom-right { + grid-column-start: 5; + grid-column-end: 6; + grid-row-start: 2; + grid-row-end: 3; + border-width: 1px 2px 0 0; +} + +.tg-box-plot--space-right { + grid-column-start: 6; + grid-column-end: 7; + grid-row-start: 1; + grid-row-end: 3; + border: 0; +} + +.tg-box-plot--axis { + position: relative; + margin: 24px 0; + width: 100%; + height: 2px; + background-color: var(--disabled-text-color); + color: var(--caption-text-color); +} + +.tg-box-plot--axis-tick { + position: absolute; + top: 8px; + transform: translateX(-50%); +} + +.tg-box-plot--axis-tick::before { + position: absolute; + top: -9px; + left: 50%; + transform: translateX(-50%); + width: 4px; + height: 4px; + border-radius: 2px; + background-color: var(--disabled-text-color); + content: ''; +} + +.tg-blox-plot--legend-line { + width: 26px; + border: 1px dotted ${lineColor}; + position: relative; +} + +.tg-blox-plot--legend-line::after { + position: absolute; + left: 50%; + transform: translateX(-50%) translateY(-50%); + width: 6px; + height: 6px; + border-radius: 6px; + background-color: ${lineColor}; + content: ''; +} + +.tg-blox-plot--legend-whisker { + width: 24px; + height: 12px; + border: solid var(--caption-text-color); + border-width: 0 2px 0 2px; + position: relative; +} + +.tg-blox-plot--legend-whisker::after { + position: absolute; + top: 5px; + width: 24px; + height: 2px; + background-color: var(--caption-text-color); + content: ''; +} + +.tg-blox-plot--legend-box { + width: 26px; + height: 12px; + border: 1px solid var(--caption-text-color); + border-radius: 4px; + background-color: ${boxColor}; + position: relative; +} + +.tg-blox-plot--legend-box::after { + position: absolute; + left: 12px; + width: 2px; + height: 12px; + background-color: var(--caption-text-color); + content: ''; +} +`); + +export { BoxPlot }; diff --git a/testgen/ui/components/frontend/js/components/frequency_bars.js b/testgen/ui/components/frontend/js/components/frequency_bars.js new file mode 100644 index 0000000..ed49bf5 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/frequency_bars.js @@ -0,0 +1,94 @@ +/** + * @typedef FrequencyItem + * @type {object} + * @property {string} value + * @property {number} count + * + * @typedef Properties + * @type {object} + * @property {FrequencyItem[]} items + * @property {number} total + * @property {string} title + * @property {string?} color + */ +import van from '../van.min.js'; +import { getValue, loadStylesheet } from '../utils.js'; +import { colorMap } from '../display_utils.js'; + +const { div, span } = van.tags; +const defaultColor = 'teal'; + +const FrequencyBars = (/** @type Properties */ props) => { + loadStylesheet('frequencyBars', stylesheet); + + const total = van.derive(() => getValue(props.total)); + const color = van.derive(() => { + const colorValue = getValue(props.color) || defaultColor; + return colorMap[colorValue] || colorValue; + }); + const width = van.derive(() => { + const maxCount = getValue(props.items).reduce((max, { count }) => Math.max(max, count), 0); + return String(maxCount).length * 7; + }); + + return () => div( + div( + { class: 'mb-2 text-secondary' }, + props.title, + ), + getValue(props.items).map(({ value, count }) => { + return div( + { class: 'flex-row fx-gap-2' }, + div( + { class: 'tg-frequency-bars' }, + span({ class: 'tg-frequency-bars--empty' }), + span({ + class: 'tg-frequency-bars--fill', + style: () => `width: ${count * 100 / total.val}%; + ${count ? 'min-width: 1px;' : ''} + background-color: ${color.val};`, + }), + ), + div( + { + class: 'text-caption tg-frequency-bars--count', + style: () => `width: ${width.val}px;`, + }, + count, + ), + div(value), + ); + }), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-frequency-bars { + width: 150px; + height: 15px; + flex-shrink: 0; + position: relative; +} + +.tg-frequency-bars--empty { + position: absolute; + width: 100%; + height: 100%; + border-radius: 4px; + background-color: ${colorMap['emptyLight']} +} + +.tg-frequency-bars--fill { + position: absolute; + border-radius: 4px; + height: 100%; +} + +.tg-frequency-bars--count { + flex-shrink: 0; + text-align: right; +} +`); + +export { FrequencyBars }; diff --git a/testgen/ui/components/frontend/js/components/percent_bar.js b/testgen/ui/components/frontend/js/components/percent_bar.js new file mode 100644 index 0000000..e6a5321 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/percent_bar.js @@ -0,0 +1,79 @@ +/** + * @typedef Properties + * @type {object} + * @property {string} label + * @property {number} value + * @property {number} total + * @property {string?} color + * @property {number?} height + * @property {number?} width + */ +import van from '../van.min.js'; +import { getValue, loadStylesheet } from '../utils.js'; +import { colorMap } from '../display_utils.js'; + +const { div, span } = van.tags; +const defaultHeight = 10; +const defaultColor = 'purpleLight'; + +const PercentBar = (/** @type Properties */ props) => { + loadStylesheet('percentBar', stylesheet); + const value = van.derive(() => getValue(props.value)); + const total = van.derive(() => getValue(props.total)); + + return div( + { style: () => `max-width: ${props.width ? getValue(props.width) + 'px' : '100%'};` }, + div( + { class: () => `tg-percent-bar--label ${value.val ? '' : 'text-secondary'}` }, + () => `${getValue(props.label)}: ${value.val}`, + ), + div( + { + class: 'tg-percent-bar', + style: () => `height: ${getValue(props.height) || defaultHeight}px;`, + }, + span({ + class: 'tg-percent-bar--fill', + style: () => { + const color = getValue(props.color) || defaultColor; + return `width: ${value.val * 100 / total.val}%; + ${value.val ? 'min-width: 1px;' : ''} + background-color: ${colorMap[color] || color};` + }, + }), + span({ + class: 'tg-percent-bar--empty', + style: () => `width: ${(total.val - value.val) * 100 / total.val}%; + ${(total.val - value.val) ? 'min-width: 1px;' : ''};`, + }), + ), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-percent-bar--label { + margin-bottom: 4px; +} + +.tg-percent-bar { + height: 100%; + display: flex; + flex-flow: row nowrap; + align-items: flex-start; + justify-content: flex-start; + border-radius: 4px; + overflow: hidden; +} + +.tg-percent-bar--fill { + height: 100%; +} + +.tg-percent-bar--empty { + height: 100%; + background-color: ${colorMap['empty']} +} +`); + +export { PercentBar }; diff --git a/testgen/ui/components/frontend/js/components/summary_bar.js b/testgen/ui/components/frontend/js/components/summary_bar.js index b73ea5c..e331000 100644 --- a/testgen/ui/components/frontend/js/components/summary_bar.js +++ b/testgen/ui/components/frontend/js/components/summary_bar.js @@ -14,18 +14,9 @@ */ import van from '../van.min.js'; import { getValue, loadStylesheet } from '../utils.js'; +import { colorMap } from '../display_utils.js'; const { div, span } = van.tags; -const colorMap = { - red: '#EF5350', - orange: '#FF9800', - yellow: '#FDD835', - green: '#9CCC65', - purple: '#AB47BC', - blue: '#42A5F5', - brown: '#8D6E63', - grey: '#BDBDBD', -} const defaultHeight = 24; const SummaryBar = (/** @type Properties */ props) => { diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js index 512cc0f..a2d6384 100644 --- a/testgen/ui/components/frontend/js/display_utils.js +++ b/testgen/ui/components/frontend/js/display_utils.js @@ -26,4 +26,25 @@ function formatDuration(/** @type string */ duration) { return formatted.trim() || '< 1s'; } -export { formatTimestamp, formatDuration }; +// https://m2.material.io/design/color/the-color-system.html#tools-for-picking-colors +const colorMap = { + red: '#EF5350', // Red 400 + orange: '#FF9800', // Orange 500 + yellow: '#FDD835', // Yellow 600 + green: '#9CCC65', // Light Green 400 + limeGreen: '#C0CA33', // Lime Green 600 + purple: '#AB47BC', // Purple 400 + purpleLight: '#CE93D8', // Purple 200 + blue: '#2196F3', // Blue 500 + blueLight: '#90CAF9', // Blue 200 + indigo: '#5C6BC0', // Indigo 400 + teal: '#26A69A', // Teal 400 + brown: '#8D6E63', // Brown 400 + brownLight: '#D7CCC8', // Brown 100 + brownDark: '#4E342E', // Brown 800 + grey: '#BDBDBD', // Gray 400 + empty: 'var(--empty)', // Light: Gray 200, Dark: Gray 800 + emptyLight: 'var(--empty-light)', // Light: Gray 50, Dark: Gray 900 +} + +export { formatTimestamp, formatDuration, colorMap }; From 6f1f5957b7e910abadd2603d7d93e3e21fa1eb11 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 13:03:55 -0500 Subject: [PATCH 51/91] feat(components): add Attribute, Card and EditableCard components --- testgen/ui/components/frontend/css/shared.css | 3 + .../frontend/js/components/attribute.js | 39 +++++++++++ .../components/frontend/js/components/card.js | 47 ++++++++++++++ .../frontend/js/components/editable_card.js | 64 +++++++++++++++++++ 4 files changed, 153 insertions(+) create mode 100644 testgen/ui/components/frontend/js/components/attribute.js create mode 100644 testgen/ui/components/frontend/js/components/card.js create mode 100644 testgen/ui/components/frontend/js/components/editable_card.js diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index a4884ec..e387b81 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -149,6 +149,9 @@ body { font-size: 12px; color: var(--caption-text-color); } +.text-capitalize { + text-transform: capitalize; +} /* */ /* Flex utilities */ diff --git a/testgen/ui/components/frontend/js/components/attribute.js b/testgen/ui/components/frontend/js/components/attribute.js new file mode 100644 index 0000000..5ca702f --- /dev/null +++ b/testgen/ui/components/frontend/js/components/attribute.js @@ -0,0 +1,39 @@ +/** + * @typedef Properties + * @type {object} + * @property {string} label + * @property {string | number} value + * @property {number?} width + */ +import { getValue, loadStylesheet } from '../utils.js'; +import van from '../van.min.js'; + +const { div } = van.tags; + +const Attribute = (/** @type Properties */ props) => { + loadStylesheet('attribute', stylesheet); + + return div( + { style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` }, + div( + { class: 'text-caption text-capitalize mb-1' }, + props.label, + ), + div( + { class: 'attribute-value' }, + () => { + const value = getValue(props.value); + return (value || value === 0) ? value : '--'; + }, + ), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.attribute-value { + word-wrap: break-word; +} +`); + +export { Attribute }; diff --git a/testgen/ui/components/frontend/js/components/card.js b/testgen/ui/components/frontend/js/components/card.js new file mode 100644 index 0000000..66c6ebb --- /dev/null +++ b/testgen/ui/components/frontend/js/components/card.js @@ -0,0 +1,47 @@ +/** + * @typedef Properties + * @type {object} + * @property {string} title + * @property {object} content + * @property {object?} actionContent + */ +import { loadStylesheet } from '../utils.js'; +import van from '../van.min.js'; + +const { div, h3 } = van.tags; + +const Card = (/** @type Properties */ props) => { + loadStylesheet('card', stylesheet); + + return div( + { class: 'tg-card mb-4' }, + div( + { class: 'flex-row fx-justify-space-between fx-align-flex-start' }, + h3( + { class: 'tg-card--title' }, + props.title, + ), + props.actionContent, + ), + props.content, + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-card { + border-radius: 8px; + background-color: var(--dk-card-background); + padding: 16px; +} + +.tg-card--title { + margin: 0 0 16px; + color: var(--secondary-text-color); + font-size: 16px; + font-weight: 500; + text-transform: capitalize; +} +`); + +export { Card }; diff --git a/testgen/ui/components/frontend/js/components/editable_card.js b/testgen/ui/components/frontend/js/components/editable_card.js new file mode 100644 index 0000000..4dc8e54 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/editable_card.js @@ -0,0 +1,64 @@ +/** + * @typedef Properties + * @type {object} + * @property {string} title + * @property {object} content + * @property {object} editingContent + * @property {function} onSave + * @property {function?} onCancel + * @property {function?} hasChanges + */ +import { getValue } from '../utils.js'; +import van from '../van.min.js'; +import { Card } from './card.js'; +import { Button } from './button.js'; + +const { div } = van.tags; + +const EditableCard = (/** @type Properties */ props) => { + const editing = van.state(false); + const onCancel = van.derive(() => { + const cancelFunction = props.onCancel?.val ?? props.onCancel; + return () => { + editing.val = false; + cancelFunction?.(); + } + }); + const saveDisabled = van.derive(() => { + const hasChanges = props.hasChanges?.val ?? props.hasChanges; + return !hasChanges?.(); + }); + + return Card({ + title: props.title, + content: [ + () => editing.val ? getValue(props.editingContent) : getValue(props.content), + () => editing.val ? div( + { class: 'flex-row fx-justify-content-flex-end fx-gap-3 mt-4' }, + Button({ + type: 'stroked', + label: 'Cancel', + width: 'auto', + onclick: onCancel, + }), + Button({ + type: 'stroked', + color: 'primary', + label: 'Save', + width: 'auto', + disabled: saveDisabled, + onclick: props.onSave, + }), + ) : '', + ], + actionContent: () => !editing.val ? Button({ + type: 'stroked', + label: 'Edit', + icon: 'edit', + width: 'auto', + onclick: () => editing.val = true, + }) : '', + }); +}; + +export { EditableCard }; From b8cb63ab55fa0419c123f2bc9086eac26fcc4806 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 13:06:05 -0500 Subject: [PATCH 52/91] feat(components): add Input, Checkbox and RadioGroup components --- testgen/ui/components/frontend/css/shared.css | 8 +- .../frontend/js/components/checkbox.js | 83 ++++++++++++++ .../frontend/js/components/input.js | 104 ++++++++++++++++++ .../frontend/js/components/radio_group.js | 104 ++++++++++++++++++ .../frontend/js/components/select.js | 4 +- testgen/ui/components/frontend/js/utils.js | 18 ++- 6 files changed, 317 insertions(+), 4 deletions(-) create mode 100644 testgen/ui/components/frontend/js/components/checkbox.js create mode 100644 testgen/ui/components/frontend/js/components/input.js create mode 100644 testgen/ui/components/frontend/js/components/radio_group.js diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index e387b81..4955a1b 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -27,7 +27,8 @@ body { --secondary-text-color: #0000008a; --disabled-text-color: #00000042; --caption-text-color: rgba(49, 51, 63, 0.6); /* Match Streamlit's caption color */ - --border-color: rgba(0, 0, 0, .12); + --form-field-color: rgb(240, 242, 246); /* Match Streamlit's form field color */ + --border-color: rgba(0, 0, 0, .12); --tooltip-color: #333d; --dk-card-background: #fff; @@ -71,6 +72,7 @@ body { --secondary-text-color: rgba(255, 255, 255, .7); --disabled-text-color: rgba(255, 255, 255, .5); --caption-text-color: rgba(250, 250, 250, .6); /* Match Streamlit's caption color */ + --form-field-color: rgb(38, 39, 48); /* Match Streamlit's form field color */ --border-color: rgba(255, 255, 255, .25); --dk-card-background: #14181f; @@ -94,6 +96,10 @@ body { } } +.clickable { + cursor: pointer; +} + .hidden { display: none !important; } diff --git a/testgen/ui/components/frontend/js/components/checkbox.js b/testgen/ui/components/frontend/js/components/checkbox.js new file mode 100644 index 0000000..c7cf9a9 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/checkbox.js @@ -0,0 +1,83 @@ +/** + * @typedef Properties + * @type {object} + * @property {string} label + * @property {boolean?} checked + * @property {function?} onChange + * @property {number?} width + */ +import van from '../van.min.js'; +import { getValue, loadStylesheet } from '../utils.js'; + +const { input, label } = van.tags; + +const Checkbox = (/** @type Properties */ props) => { + loadStylesheet('checkbox', stylesheet); + + return label( + { + class: 'flex-row fx-gap-2 clickable', + style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}`, + }, + input({ + type: 'checkbox', + class: 'tg-checkbox--input clickable', + checked: props.checked, + onchange: van.derive(() => { + const onChange = props.onChange?.val ?? props.onChange; + return onChange ? (event) => onChange(event.target.checked) : null; + }), + }), + props.label, + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-checkbox--input { + appearance: none; + box-sizing: border-box; + margin: 0; + width: 18px; + height: 18px; + border: 1px solid var(--secondary-text-color); + border-radius: 4px; + position: relative; + transition-property: border-color, background-color; + transition-duration: 0.3s; +} + +.tg-checkbox--input:focus, +.tg-checkbox--input:focus-visible { + outline: none; +} + +.tg-checkbox--input:focus-visible::before { + content: ''; + box-sizing: border-box; + position: absolute; + top: -4px; + left: -4px; + width: 24px; + height: 24px; + border: 3px solid var(--border-color); + border-radius: 7px; +} + +.tg-checkbox--input:checked { + border-color: transparent; + background-color: var(--primary-color); +} + +.tg-checkbox--input:checked::after { + position: absolute; + top: -4px; + left: -3px; + content: 'check'; + font-family: 'Material Symbols Rounded'; + font-size: 22px; + color: white; +} +`); + +export { Checkbox }; diff --git a/testgen/ui/components/frontend/js/components/input.js b/testgen/ui/components/frontend/js/components/input.js new file mode 100644 index 0000000..be2aa03 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/input.js @@ -0,0 +1,104 @@ +/** + * @typedef Properties + * @type {object} + * @property {string?} label + * @property {(string | number)?} value + * @property {string?} placeholder + * @property {string?} icon + * @property {boolean?} clearable + * @property {function?} onChange + * @property {number?} width + */ +import van from '../van.min.js'; +import { debounce, getValue, loadStylesheet } from '../utils.js'; + +const { input, label, i } = van.tags; + +const Input = (/** @type Properties */ props) => { + loadStylesheet('input', stylesheet); + + const value = van.derive(() => getValue(props.value) ?? ''); + van.derive(() => { + const onChange = props.onChange?.val ?? props.onChange; + onChange?.(value.val); + }); + + return label( + { + class: 'flex-column fx-gap-1 text-caption text-capitalize tg-input--label', + style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}`, + }, + props.label, + () => getValue(props.icon) ? i( + { class: 'material-symbols-rounded tg-input--icon' }, + props.icon, + ) : '', + () => getValue(props.clearable) ? i( + { + class: () => `material-symbols-rounded tg-input--clear clickable ${value.val ? '' : 'hidden'}`, + onclick: () => value.val = '', + }, + 'clear', + ) : '', + input({ + class: 'tg-input--field', + value, + placeholder: () => getValue(props.placeholder) ?? '', + oninput: debounce(event => value.val = event.target.value, 300), + }), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-input--label { + position: relative; +} + +.tg-input--icon { + position: absolute; + bottom: 5px; + left: 4px; + font-size: 22px; +} + +.tg-input--icon ~ .tg-input--field { + padding-left: 28px; +} + +.tg-input--clear { + position: absolute; + bottom: 6px; + right: 4px; + font-size: 20px; +} + +.tg-input--clear ~ .tg-input--field { + padding-right: 24px; +} + +.tg-input--field { + box-sizing: border-box; + width: 100%; + height: 32px; + border-radius: 8px; + border: 1px solid transparent; + transition: border-color 0.3s; + background-color: var(--form-field-color); + padding: 4px 8px; + color: var(--primary-text-color); + font-size: 14px; +} + +.tg-input--field::placeholder { + color: var(--disabled-text-color); +} + +.tg-input--field:focus, +.tg-input--field:focus-visible { + outline: none; + border-color: var(--primary-color); +} +`); + +export { Input }; diff --git a/testgen/ui/components/frontend/js/components/radio_group.js b/testgen/ui/components/frontend/js/components/radio_group.js new file mode 100644 index 0000000..0c7f5e4 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/radio_group.js @@ -0,0 +1,104 @@ +/** +* @typedef Option + * @type {object} + * @property {string} label + * @property {string | number | boolean | null} value + * + * @typedef Properties + * @type {object} + * @property {string} label + * @property {Option[]} options + * @property {string | number | boolean | null} selected + * @property {function?} onChange + * @property {number?} width + */ +import van from '../van.min.js'; +import { getRandomId, getValue, loadStylesheet } from '../utils.js'; + +const { div, input, label } = van.tags; + +const RadioGroup = (/** @type Properties */ props) => { + loadStylesheet('radioGroup', stylesheet); + const groupName = getRandomId(); + + return div( + { style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` }, + div( + { class: 'text-caption text-capitalize mb-1' }, + props.label, + ), + () => div( + { class: 'flex-row fx-gap-4 tg-radio-group' }, + getValue(props.options).map(option => label( + { class: 'flex-row fx-gap-2 text-capitalize clickable' }, + input({ + type: 'radio', + name: groupName, + value: option.value, + checked: () => option.value === getValue(props.value), + onchange: van.derive(() => { + const onChange = props.onChange?.val ?? props.onChange; + return onChange ? () => onChange(option.value) : null; + }), + class: 'tg-radio-group--input', + }), + option.label, + )), + ), + ); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-radio-group { + height: 32px; +} + +.tg-radio-group--input { + appearance: none; + box-sizing: border-box; + margin: 0; + width: 18px; + height: 18px; + border: 1px solid var(--secondary-text-color); + border-radius: 9px; + position: relative; + transition-property: border-color, background-color; + transition-duration: 0.3s; +} + +.tg-radio-group--input:focus, +.tg-radio-group--input:focus-visible { + outline: none; +} + +.tg-radio-group--input:focus-visible::before { + content: ''; + box-sizing: border-box; + position: absolute; + top: -4px; + left: -4px; + width: 24px; + height: 24px; + border: 3px solid var(--border-color); + border-radius: 12px; +} + +.tg-radio-group--input:checked { + border-color: var(--primary-color); +} + +.tg-radio-group--input:checked::after { + content: ''; + box-sizing: border-box; + position: absolute; + top: 3px; + left: 3px; + width: 10px; + height: 10px; + background-color: var(--primary-color); + border-radius: 5px; +} +`); + +export { RadioGroup }; diff --git a/testgen/ui/components/frontend/js/components/select.js b/testgen/ui/components/frontend/js/components/select.js index 5f4f68c..f4fe618 100644 --- a/testgen/ui/components/frontend/js/components/select.js +++ b/testgen/ui/components/frontend/js/components/select.js @@ -13,7 +13,7 @@ */ import van from '../van.min.js'; import { Streamlit } from '../streamlit.js'; -import { getValue, loadStylesheet } from '../utils.js'; +import { getRandomId, getValue, loadStylesheet } from '../utils.js'; const { div, label, option, select } = van.tags; @@ -21,7 +21,7 @@ const Select = (/** @type {Properties} */ props) => { loadStylesheet('select', stylesheet); Streamlit.setFrameHeight(); - const domId = Math.random().toString(36).substring(2); + const domId = getRandomId(); const changeHandler = props.onChange || post; return div( {class: 'tg-select'}, diff --git a/testgen/ui/components/frontend/js/utils.js b/testgen/ui/components/frontend/js/utils.js index d8d712c..b5bdc96 100644 --- a/testgen/ui/components/frontend/js/utils.js +++ b/testgen/ui/components/frontend/js/utils.js @@ -53,4 +53,20 @@ function getValue(/** @type object */ prop) { // van state or static value return prop; } -export { emitEvent, enforceElementWidth, getValue, loadStylesheet, resizeFrameHeightToElement }; +function getRandomId() { + return Math.random().toString(36).substring(2); +} + +// https://stackoverflow.com/a/75988895 +function debounce( + /** @type function */ callback, + /** @type number */ wait, +) { + let timeoutId = null; + return (...args) => { + window.clearTimeout(timeoutId); + timeoutId = window.setTimeout(() => callback(...args), wait); + }; +} + +export { debounce, emitEvent, enforceElementWidth, getRandomId, getValue, loadStylesheet, resizeFrameHeightToElement }; From d87f69cafd8768e11cd86b79c080c19efd5d1733 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 13:07:21 -0500 Subject: [PATCH 53/91] feat(components): add Tree component --- testgen/ui/components/frontend/css/shared.css | 4 + .../components/frontend/js/components/tree.js | 211 ++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 testgen/ui/components/frontend/js/components/tree.js diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index 4955a1b..0d61aa6 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -104,6 +104,10 @@ body { display: none !important; } +.invisible { + visibility: hidden !important; +} + .dot { font-size: 10px; font-style: normal; diff --git a/testgen/ui/components/frontend/js/components/tree.js b/testgen/ui/components/frontend/js/components/tree.js new file mode 100644 index 0000000..d29dd2a --- /dev/null +++ b/testgen/ui/components/frontend/js/components/tree.js @@ -0,0 +1,211 @@ +/** + * @typedef TreeNode + * @type {object} + * @property {string} id + * @property {string} label + * @property {string?} classes + * @property {string?} icon + * @property {number?} iconSize + * @property {TreeNode[]?} children + * @property {number?} level + * @property {boolean?} expanded + * @property {boolean?} hidden + * + * @typedef Properties + * @type {object} + * @property {TreeNode[]} nodes + * @property {string} selected + * @property {string} classes + */ +import van from '../van.min.js'; +import { emitEvent, getValue, loadStylesheet } from '../utils.js'; +import { Input } from './input.js'; +import { Button } from './button.js'; + +const { div, i } = van.tags; +const levelOffset = 14; + +const Tree = (/** @type Properties */ props) => { + loadStylesheet('tree', stylesheet); + + // Use only initial prop value as default and maintain internal state + const initialSelection = props.selected?.rawVal || props.selected || null; + const selected = van.state(initialSelection); + + const treeNodes = van.derive(() => { + const nodes = getValue(props.nodes) || []; + const treeSelected = initTreeState(nodes, initialSelection); + if (!treeSelected) { + selected.val = null; + } + return nodes; + }); + + return div( + { class: () => `flex-column ${getValue(props.classes)}` }, + div( + { class: 'flex-row fx-gap-1 tg-tree--actions' }, + Input({ + icon: 'search', + clearable: true, + onChange: (value) => searchTree(treeNodes.val, value), + }), + Button({ + type: 'icon', + icon: 'expand_all', + style: 'width: 24px; height: 24px; padding: 4px;', + tooltip: 'Expand All', + tooltipPosition: 'bottom', + onclick: () => expandOrCollapseTree(treeNodes.val, true), + }), + Button({ + type: 'icon', + icon: 'collapse_all', + style: 'width: 24px; height: 24px; padding: 4px;', + tooltip: 'Collapse All', + tooltipPosition: 'bottom', + onclick: () => expandOrCollapseTree(treeNodes.val, false), + }), + ), + div( + { class: 'tg-tree' }, + () => div( + { class: 'tg-tree--nodes' }, + treeNodes.val.map(node => TreeNode(node, selected)), + ), + ), + ); +}; + +const TreeNode = ( + /** @type TreeNode */ node, + /** @type string */ selected, +) => { + const hasChildren = !!node.children?.length; + return div( + div( + { + class: () => `tg-tree--row flex-row clickable ${node.classes || ''} + ${selected.val === node.id ? 'selected' : ''} + ${node.hidden.val ? 'hidden' : ''}`, + style: `padding-left: ${levelOffset * node.level}px;`, + onclick: () => { + selected.val = node.id; + emitEvent('TreeNodeSelected', { payload: node.id }); + }, + }, + i( + { + class: `material-symbols-rounded text-secondary ${hasChildren ? '' : 'invisible'}`, + onclick: () => { + node.expanded.val = hasChildren ? !node.expanded.val : false; + }, + }, + () => node.expanded.val ? 'arrow_drop_down' : 'arrow_right', + ), + node.icon ? i( + { + class: 'material-symbols-rounded tg-tree--row-icon', + style: `font-size: ${node.iconSize || 24}px;`, + }, + node.icon, + ) : null, + node.label, + ), + hasChildren ? div( + { class: () => node.expanded.val ? '' : 'hidden' }, + node.children.map(node => TreeNode(node, selected)), + ) : null, + ); +}; + +const initTreeState = ( + /** @type TreeNode[] */ nodes, + /** @type string */ selected, + /** @type number */ level = 0, +) => { + let treeExpanded = false; + nodes.forEach(node => { + node.level = level; + // Expand node if it is initial selection + let expanded = node.id === selected; + if (node.children) { + // Expand node if initial selection is a descendent + expanded = initTreeState(node.children, selected, level + 1) || expanded; + } + node.expanded = van.state(expanded); + node.hidden = van.state(false); + treeExpanded = treeExpanded || expanded; + }); + return treeExpanded; +}; + +const searchTree = ( + /** @type TreeNode[] */ nodes, + /** @type string */ search, +) => { + nodes.forEach(node => { + let hidden = !node.label.includes(search); + if (node.children) { + searchTree(node.children, search); + hidden = hidden && node.children.every(child => child.hidden.rawVal); + } + node.hidden.val = hidden; + }); +}; + +const expandOrCollapseTree = ( + /** @type TreeNode[] */ nodes, + /** @type boolean */ expanded, +) => { + nodes.forEach(node => { + if (node.children) { + expandOrCollapseTree(node.children, expanded); + node.expanded.val = expanded; + } + }); +} + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-tree { + overflow: auto; +} + +.tg-tree--actions { + margin: 4px; +} + +.tg-tree--nodes { + width: fit-content; + min-width: 100%; +} + +.tg-tree--row { + box-sizing: border-box; + width: auto; + min-width: fit-content; + border: solid transparent; + border-width: 1px 0; + padding-right: 8px; + transition: background-color 0.3s; +} + +.tg-tree--row:hover { + background-color: var(--sidebar-item-hover-color); +} + +.tg-tree--row.selected { + background-color: #06a04a17; + font-weight: 500; +} + +.tg-tree--row-icon { + margin-right: 4px; + width: 24px; + color: #B0BEC5; + text-align: center; +} +`); + +export { Tree }; From fd343eb0b253c360c270b137b3435babe64d6509 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 13:10:13 -0500 Subject: [PATCH 54/91] feat(components): add ColumnProfile component --- .../frontend/js/components/column_profile.js | 287 ++++++++++++++++++ .../components/frontend/js/display_utils.js | 31 +- 2 files changed, 308 insertions(+), 10 deletions(-) create mode 100644 testgen/ui/components/frontend/js/components/column_profile.js diff --git a/testgen/ui/components/frontend/js/components/column_profile.js b/testgen/ui/components/frontend/js/components/column_profile.js new file mode 100644 index 0000000..bdbef62 --- /dev/null +++ b/testgen/ui/components/frontend/js/components/column_profile.js @@ -0,0 +1,287 @@ +/** + * @typedef ColumnProfile + * @type {object} + * @property {'A' | 'B' | 'D' | 'N' | 'T' | 'X'} general_type + * * Value Counts + * @property {number} record_ct + * @property {number} value_ct + * @property {number} distinct_value_ct + * @property {number} null_value_ct + * @property {number} zero_value_ct + * * Alpha + * @property {number} zero_length_ct + * @property {number} filled_value_ct + * @property {number} includes_digit_ct + * @property {number} numeric_ct + * @property {number} date_ct + * @property {number} quoted_value_ct + * @property {number} lead_space_ct + * @property {number} embedded_space_ct + * @property {number} avg_embedded_spaces + * @property {number} min_length + * @property {number} max_length + * @property {number} avg_length + * @property {string} min_text + * @property {string} max_text + * @property {number} distinct_std_value_ct + * @property {number} distinct_pattern_ct + * @property {'STREET_ADDR' | 'STATE_USA' | 'PHONE_USA' | 'EMAIL' | 'ZIP_USA' | 'FILE_NAME' | 'CREDIT_CARD' | 'DELIMITED_DATA' | 'SSN'} std_pattern_match + * @property {string} top_freq_values + * @property {string} top_patterns + * * Numeric + * @property {number} min_value + * @property {number} min_value_over_0 + * @property {number} max_value + * @property {number} avg_value + * @property {number} stdev_value + * @property {number} percentile_25 + * @property {number} percentile_50 + * @property {number} percentile_75 + * * Date + * @property {number} min_date + * @property {number} max_date + * @property {number} before_1yr_date_ct + * @property {number} before_5yr_date_ct + * @property {number} before_20yr_date_ct + * @property {number} within_1yr_date_ct + * @property {number} within_1mo_date_ct + * @property {number} future_date_ct + * * Boolean + * @property {number} boolean_true_ct + */ +import van from '../van.min.js'; +import { Attribute } from '../components/attribute.js'; +import { SummaryBar } from './summary_bar.js'; +import { PercentBar } from './percent_bar.js'; +import { FrequencyBars } from './frequency_bars.js'; +import { BoxPlot } from './box_plot.js'; +import { loadStylesheet } from '../utils.js'; +import { formatTimestamp, roundDigits } from '../display_utils.js'; + +const { div } = van.tags; +const columnTypeFunctionMap = { + A: AlphaColumn, + B: BooleanColumn, + D: DatetimeColumn, + N: NumericColumn, +}; +const attributeWidth = 200; +const percentWidth = 250; +const summaryWidth = 400; +const summaryHeight = 10; +const boxPlotWidth = 800; + +const ColumnProfile = (/** @type ColumnProfile */ item) => { + loadStylesheet('column_profile', stylesheet); + const columnFunction = columnTypeFunctionMap[item.general_type]; + return columnFunction ? columnFunction(item) : null; +}; + +function AlphaColumn(/** @type ColumnProfile */ item) { + const standardPatternLabels = { + STREET_ADDR: 'Street Address', + STATE_USA: 'State (USA)', + PHONE_USA: 'Phone (USA)', + EMAIL: 'Email', + ZIP_USA: 'Zip Code (USA)', + FILE_NAME: 'Filename', + CREDIT_CARD: 'Credit Card', + DELIMITED_DATA: 'Delimited Data', + SSN: 'SSN (USA)', + }; + let standardPattern = standardPatternLabels[item.std_pattern_match]; + if (!standardPattern) { + standardPattern = (item.std_pattern_match || '').split('_') + .map(word => word ? (word[0].toUpperCase() + word.substring(1)) : '') + .join(' '); + } + + const total = item.record_ct; + + return div( + { class: 'flex-column fx-gap-4' }, + div( + { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4 tg-profile--fx-basis-content' }, + div( + { + class: 'flex-column fx-gap-5', + }, + DistinctsBar(item), + SummaryBar({ + height: summaryHeight, + width: summaryWidth, + label: `Missing Values: ${item.zero_length_ct + item.zero_value_ct + item.filled_value_ct + item.null_value_ct}`, + items: [ + { label: 'Values', value: item.value_ct - item.zero_value_ct - item.filled_value_ct - item.zero_length_ct, color: 'green' }, + { label: 'Zero Values', value: item.zero_value_ct, color: 'brown' }, + { label: 'Dummy Values', value: item.filled_value_ct, color: 'orange' }, + { label: 'Zero Length', value: item.zero_length_ct, color: 'yellow' }, + { label: 'Null', value: item.null_value_ct, color: 'brownLight' }, + ], + }), + ), + div( + { + class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-3 mb-1 tg-profile--fx-grow-content', + }, + div( + { class: 'flex-column fx-gap-3' }, + PercentBar({ label: 'Includes Digits', value: item.includes_digit_ct, total, width: percentWidth }), + PercentBar({ label: 'Numeric Values', value: item.numeric_ct, total, width: percentWidth }), + PercentBar({ label: 'Date Values', value: item.date_ct, total, width: percentWidth }), + PercentBar({ label: 'Quoted Values', value: item.quoted_value_ct, total, width: percentWidth }), + ), + div( + { class: 'flex-column fx-gap-3' }, + PercentBar({ label: 'Leading Spaces', value: item.lead_space_ct, total, width: percentWidth }), + PercentBar({ label: 'Embedded Spaces', value: item.embedded_space_ct ?? 0, total, width: percentWidth }), + Attribute({ label: 'Average Embedded Spaces', value: roundDigits(item.avg_embedded_spaces), width: attributeWidth }), + ), + ), + ), + div( + { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' }, + Attribute({ label: 'Minimum Length', value: item.min_length, width: attributeWidth }), + Attribute({ label: 'Maximum Length', value: item.max_length, width: attributeWidth }), + Attribute({ label: 'Average Length', value: roundDigits(item.avg_length), width: attributeWidth }), + Attribute({ label: 'Minimum Text', value: item.min_text, width: attributeWidth }), + Attribute({ label: 'Maximum Text', value: item.max_text, width: attributeWidth }), + Attribute({ label: 'Distinct Standard Values', value: item.distinct_std_value_ct, width: attributeWidth }), + Attribute({ label: 'Distinct Patterns', value: item.distinct_pattern_ct, width: attributeWidth }), + Attribute({ label: 'Standard Pattern Match', value: standardPattern, width: attributeWidth }), + ), + item.top_freq_values || item.top_patterns ? div( + { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4 mt-2 mb-2 tg-profile--fx-basis-content' }, + item.top_freq_values ? FrequencyBars({ + title: 'Frequent Values', + total: item.record_ct, + items: item.top_freq_values.substring(2).split('\n| ').map(parts => { + const [value, count] = parts.split(' | '); + return { value, count: Number(count) }; + }), + }) : null, + item.top_patterns ? FrequencyBars({ + title: 'Frequent Patterns', + total: item.record_ct, + items: item.top_patterns.split(' | ').reduce((array, item, index) => { + if (index % 2) { + array[(index - 1) / 2].value = item; + } else { + array.push({ count: Number(item) }); + } + return array; + }, []), + }) : null, + ) : null, + ); +} + +function BooleanColumn(/** @type ColumnProfile */ item) { + return SummaryBar({ + height: summaryHeight, + width: summaryWidth, + label: `Record count: ${item.record_ct}`, + items: [ + { label: 'True', value: item.boolean_true_ct, color: 'brownLight' }, + { label: 'False', value: item.value_ct - item.boolean_true_ct, color: 'brown' }, + { label: 'Null', value: item.null_value_ct, color: 'brownDark' }, + ], + }); +} + +function DatetimeColumn(/** @type ColumnProfile */ item) { + const total = item.record_ct; + + return div( + { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4 tg-profile--fx-basis-content' }, + div( + DistinctsBar(item), + div( + { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-3 mt-5 tg-profile--fx-grow-content' }, + Attribute({ label: 'Minimum Date', value: formatTimestamp(item.min_date, true) }), + Attribute({ label: 'Maximum Date', value: formatTimestamp(item.max_date, true) }), + ), + ), + div( + { + class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-3 mb-1 tg-profile--fx-grow-content', + }, + div( + { class: 'flex-column fx-gap-3' }, + PercentBar({ label: 'Before 1 Year', value: item.before_1yr_date_ct, total, width: percentWidth }), + PercentBar({ label: 'Before 5 Year', value: item.before_5yr_date_ct, total, width: percentWidth }), + PercentBar({ label: 'Before 20 Year', value: item.before_20yr_date_ct, total, width: percentWidth }), + ), + div( + { class: 'flex-column fx-gap-3' }, + PercentBar({ label: 'Within 1 Year', value: item.within_1yr_date_ct, total, width: percentWidth }), + PercentBar({ label: 'Within 1 Month', value: item.within_1mo_date_ct, total, width: percentWidth }), + PercentBar({ label: 'Future Dates', value: item.future_date_ct, total, width: percentWidth }), + ), + ), + ); +} + +function NumericColumn(/** @type ColumnProfile */ item) { + return [ + div( + { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4 mb-5 tg-profile--fx-basis-content tg-profile--fx-grow-content' }, + div( + DistinctsBar(item), + ), + div( + PercentBar({ label: 'Zero Values', value: item.zero_value_ct, total: item.record_ct, width: percentWidth }), + ), + ), + div( + { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' }, + Attribute({ label: 'Minimum Value', value: item.min_value, width: attributeWidth }), + Attribute({ label: 'Minimum Value > 0', value: item.min_value_over_0, width: attributeWidth }), + Attribute({ label: 'Maximum Value', value: item.max_value, width: attributeWidth }), + Attribute({ label: 'Average Value', value: roundDigits(item.avg_value), width: attributeWidth }), + Attribute({ label: 'Standard Deviation', value: roundDigits(item.stdev_value), width: attributeWidth }), + Attribute({ label: '25th Percentile', value: roundDigits(item.percentile_25), width: attributeWidth }), + Attribute({ label: 'Median Value', value: roundDigits(item.percentile_50), width: attributeWidth }), + Attribute({ label: '75th Percentile', value: roundDigits(item.percentile_75), width: attributeWidth }), + ), + div( + { class: 'flex-row fx-justify-center mt-5 tg-profile--fx-grow-content' }, + BoxPlot({ + minimum: item.min_value, + maximum: item.max_value, + median: item.percentile_50, + lowerQuartile: item.percentile_25, + upperQuartile: item.percentile_75, + average: item.avg_value, + standardDeviation: item.stdev_value, + width: boxPlotWidth, + }), + ), + ]; +} + +const DistinctsBar = (/** @type ColumnProfile */ item) => { + return SummaryBar({ + height: summaryHeight, + width: summaryWidth, + label: `Record count: ${item.record_ct}`, + items: [ + { label: 'Distinct', value: item.distinct_value_ct, color: 'blue' }, + { label: 'Non-Distinct', value: item.value_ct - item.distinct_value_ct, color: 'blueLight' }, + { label: 'Null', value: item.null_value_ct, color: 'brownLight' }, + ], + }); +}; + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-profile--fx-grow-content > * { + flex-grow: 1; +} + +.tg-profile--fx-basis-content > * { + flex: 300px; +} +`); + +export { ColumnProfile }; diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js index a2d6384..1be340b 100644 --- a/testgen/ui/components/frontend/js/display_utils.js +++ b/testgen/ui/components/frontend/js/display_utils.js @@ -1,13 +1,17 @@ -function formatTimestamp(/** @type number */ timestamp) { - if (!timestamp) { - return '--'; +function formatTimestamp( + /** @type number | string */ timestamp, + /** @type boolean */ show_year, +) { + if (timestamp) { + const date = new Date(timestamp); + if (!isNaN(date)) { + const months = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]; + const hours = date.getHours(); + const minutes = date.getMinutes(); + return `${months[date.getMonth()]} ${date.getDate()}, ${show_year ? date.getFullYear() + ' at ': ''}${hours % 12}:${String(minutes).padStart(2, '0')} ${hours / 12 > 1 ? 'PM' : 'AM'}`; + } } - - const date = new Date(timestamp); - const months = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]; - const hours = date.getHours(); - const minutes = date.getMinutes(); - return `${months[date.getMonth()]} ${date.getDate()}, ${hours % 12}:${String(minutes).padStart(2, '0')} ${hours / 12 > 1 ? 'PM' : 'AM'}`; + return '--'; } function formatDuration(/** @type string */ duration) { @@ -26,6 +30,13 @@ function formatDuration(/** @type string */ duration) { return formatted.trim() || '< 1s'; } +function roundDigits(/** @type number | string */ number, /** @type number */ precision = 3) { + if (!['number', 'string'].includes(typeof number) || isNaN(number)) { + return '--'; + } + return parseFloat(Number(number).toPrecision(precision)); +} + // https://m2.material.io/design/color/the-color-system.html#tools-for-picking-colors const colorMap = { red: '#EF5350', // Red 400 @@ -47,4 +58,4 @@ const colorMap = { emptyLight: 'var(--empty-light)', // Light: Gray 50, Dark: Gray 900 } -export { formatTimestamp, formatDuration, colorMap }; +export { formatTimestamp, formatDuration, roundDigits, colorMap }; From b1bf3eaefe044b629e8439d21c5e5b4d76e91b11 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 13:10:42 -0500 Subject: [PATCH 55/91] feat(ui): add Data Hierarchy page --- testgen/ui/assets/style.css | 9 + testgen/ui/bootstrap.py | 2 + testgen/ui/components/frontend/css/shared.css | 29 +- testgen/ui/components/frontend/js/main.js | 2 + .../frontend/js/pages/data_hierarchy.js | 663 ++++++++++++++++++ .../components/widgets/testgen_component.py | 33 +- testgen/ui/views/data_hierarchy.py | 487 +++++++++++++ testgen/utils/__init__.py | 10 + 8 files changed, 1224 insertions(+), 11 deletions(-) create mode 100644 testgen/ui/components/frontend/js/pages/data_hierarchy.js create mode 100644 testgen/ui/views/data_hierarchy.py diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css index 67266d7..3122291 100644 --- a/testgen/ui/assets/style.css +++ b/testgen/ui/assets/style.css @@ -51,6 +51,7 @@ footer { /* Sidebar */ section[data-testid="stSidebar"] { + width: 250px; z-index: 999; background-color: var(--sidebar-background-color); } @@ -86,6 +87,14 @@ div[data-testid="stDialog"] div[role="dialog"] { } /* */ +div[data-testid="stSpinner"] { + background: transparent; +} + +div[data-testid="stSpinner"] > div > i { + border-color: var(--primary-color) rgba(49, 51, 63, 0.2) rgba(49, 51, 63, 0.2); +} + /* Theming for buttons, tabs and form inputs */ button[data-testid="stBaseButton-secondary"]:hover, button[data-testid="stBaseButton-secondary"]:focus:not(:active), diff --git a/testgen/ui/bootstrap.py b/testgen/ui/bootstrap.py index e3a99a6..414f7e5 100644 --- a/testgen/ui/bootstrap.py +++ b/testgen/ui/bootstrap.py @@ -11,6 +11,7 @@ from testgen.ui.navigation.router import Router from testgen.ui.session import session from testgen.ui.views.connections import ConnectionsPage +from testgen.ui.views.data_hierarchy import DataHierarchyPage from testgen.ui.views.login import LoginPage from testgen.ui.views.overview import OverviewPage from testgen.ui.views.profiling_anomalies import ProfilingAnomaliesPage @@ -27,6 +28,7 @@ BUILTIN_PAGES: list[type[Page]] = [ LoginPage, OverviewPage, + DataHierarchyPage, DataProfilingPage, ProfilingResultsPage, ProfilingAnomaliesPage, diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css index 0d61aa6..460fc3a 100644 --- a/testgen/ui/components/frontend/css/shared.css +++ b/testgen/ui/components/frontend/css/shared.css @@ -155,10 +155,23 @@ body { color: var(--secondary-text-color); } +.text-disabled { + color: var(--disabled-text-color); +} + .text-caption { font-size: 12px; color: var(--caption-text-color); } + +.text-error { + color: var(--error-color); +} + +.text-green { + color: var(--primary-color); +} + .text-capitalize { text-transform: capitalize; } @@ -256,7 +269,7 @@ body { /* Whitespace utilities */ .mt-0 { - margin-top: 2px; + margin-top: 0; } .mt-1 { @@ -288,7 +301,7 @@ body { } .mr-0 { - margin-right: 2px; + margin-right: 0; } .mr-1 { @@ -320,7 +333,7 @@ body { } .mb-0 { - margin-bottom: 2px; + margin-bottom: 0; } .mb-1 { @@ -352,7 +365,7 @@ body { } .ml-0 { - margin-left: 2px; + margin-left: 0; } .ml-1 { @@ -384,7 +397,7 @@ body { } .pt-0 { - padding-top: 2px; + padding-top: 0; } .pt-1 { @@ -416,7 +429,7 @@ body { } .pr-0 { - padding-right: 2px; + padding-right: 0; } .pr-1 { @@ -448,7 +461,7 @@ body { } .pb-0 { - padding-bottom: 2px; + padding-bottom: 0; } .pb-1 { @@ -480,7 +493,7 @@ body { } .pl-0 { - padding-left: 2px; + padding-left: 0; } .pl-1 { diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js index 3dc7f62..bc75e9a 100644 --- a/testgen/ui/components/frontend/js/main.js +++ b/testgen/ui/components/frontend/js/main.js @@ -17,6 +17,7 @@ import { SortingSelector } from './components/sorting_selector.js'; import { TestRuns } from './pages/test_runs.js'; import { ProfilingRuns } from './pages/profiling_runs.js'; import { DatabaseFlavorSelector } from './components/flavor_selector.js'; +import { DataHierarchy } from './pages/data_hierarchy.js'; let currentWindowVan = van; let topWindowVan = window.top.van; @@ -34,6 +35,7 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props) test_runs: TestRuns, profiling_runs: ProfilingRuns, database_flavor_selector: DatabaseFlavorSelector, + data_hierarchy: DataHierarchy, }; if (Object.keys(componentById).includes(id)) { diff --git a/testgen/ui/components/frontend/js/pages/data_hierarchy.js b/testgen/ui/components/frontend/js/pages/data_hierarchy.js new file mode 100644 index 0000000..a1d09ce --- /dev/null +++ b/testgen/ui/components/frontend/js/pages/data_hierarchy.js @@ -0,0 +1,663 @@ +/** + * @typedef ColumnPath + * @type {object} + * @property {string} column_id + * @property {string} table_id + * @property {string} column_name + * @property {string} table_name + * @property {'A' | 'B' | 'D' | 'N' | 'T' | 'X'} general_type + * @property {number} column_drop_date + * @property {number} table_drop_date + * + * @typedef Anomaly + * @type {object} + * @property {string} column_name + * @property {string} anomaly_name + * @property {'Definite' | 'Likely' | 'Possible' | 'Potential PII'} issue_likelihood + * @property {string} detail + * @property {'High' | 'Moderate'} pii_risk + * + * @typedef TestIssue + * @type {object} + * @property {string} column_name + * @property {string} test_name + * @property {'Failed' | 'Warning' | 'Error' } result_status + * @property {string} result_message + * @property {string} test_suite + * @property {string} test_run_id + * @property {number} test_run_date + * + * @typedef Column + * @type {ColumnProfile} + * @property {string} id + * @property {'column'} type + * @property {string} column_name + * @property {string} table_name + * @property {string} table_group_id + * * Characteristics + * @property {string} column_type + * @property {string} functional_data_type + * @property {string} datatype_suggestion + * @property {number} add_date + * @property {number} last_mod_date + * @property {number} drop_date + * * Column Metadata + * @property {boolean} critical_data_element + * @property {string} data_source + * @property {string} source_system + * @property {string} source_process + * @property {string} business_domain + * @property {string} stakeholder_group + * @property {string} transform_level + * @property {string} aggregation_level + * * Table Metadata + * @property {boolean} table_critical_data_element + * @property {string} table_cdata_source + * @property {string} table_csource_system + * @property {string} table_csource_process + * @property {string} table_cbusiness_domain + * @property {string} table_cstakeholder_group + * @property {string} table_ctransform_level + * @property {string} table_caggregation_level + * * Latest Profile & Test Runs + * @property {string} latest_profile_id + * @property {number} latest_profile_date + * @property {number} latest_test_run_date + * * Issues + * @property {Anomaly[]} latest_anomalies + * @property {TestIssue[]} latest_test_issues + * + * @typedef Table + * @type {object} + * @property {string} id + * @property {'table'} type + * @property {string} table_name + * @property {string} table_group_id + * * Characteristics + * @property {string} functional_table_type + * @property {number} record_ct + * @property {number} column_ct + * @property {number} data_point_ct + * @property {number} add_date + * @property {number} drop_date + * * Metadata + * @property {boolean} critical_data_element + * @property {string} data_source + * @property {string} source_system + * @property {string} source_process + * @property {string} business_domain + * @property {string} stakeholder_group + * @property {string} transform_level + * @property {string} aggregation_level + * * Latest Profile & Test Runs + * @property {string} latest_profile_id + * @property {number} latest_profile_date + * @property {number} latest_test_run_date + * * Issues + * @property {Anomaly[]} latest_anomalies + * @property {TestResult[]} latest_test_results + * + * @typedef Properties + * @type {object} + * @property {ColumnPath[]} columns + * @property {Table | Column} selected + */ +import van from '../van.min.js'; +import { Tree } from '../components/tree.js'; +import { Card } from '../components/card.js'; +import { EditableCard } from '../components/editable_card.js'; +import { Link } from '../components/link.js'; +import { Attribute } from '../components/attribute.js'; +import { Input } from '../components/input.js'; +import { TooltipIcon } from '../components/tooltip_icon.js'; +import { Streamlit } from '../streamlit.js'; +import { emitEvent, getValue, loadStylesheet } from '../utils.js'; +import { formatTimestamp } from '../display_utils.js'; +import { ColumnProfile } from '../components/column_profile.js'; +import { RadioGroup } from '../components/radio_group.js'; + +const { div, h2, span, i } = van.tags; + +const tableIcon = { icon: 'table', iconSize: 20 }; +const columnIcons = { + A: { icon: 'abc' }, + B: { icon: 'toggle_off', iconSize: 20 }, + D: { icon: 'calendar_clock', iconSize: 20 }, + N: { icon: '123' }, + T: { icon: 'calendar_clock', iconSize: 20 }, + X: { icon: 'question_mark', iconSize: 18 }, +}; + +const DataHierarchy = (/** @type Properties */ props) => { + loadStylesheet('data_hierarchy', stylesheet); + Streamlit.setFrameHeight(1); // Non-zero value is needed to render + window.frameElement.style.setProperty('height', 'calc(100vh - 200px)'); + window.testgen.isPage = true; + + const treeNodes = van.derive(() => { + let columns = []; + try { + columns = JSON.parse(getValue(props.columns)); + } catch { } + + const tables = {}; + columns.forEach(({ column_id, table_id, column_name, table_name, general_type, column_drop_date, table_drop_date }) => { + if (!tables[table_id]) { + tables[table_id] = { + id: table_id, + label: table_name, + classes: table_drop_date ? 'text-disabled' : '', + ...tableIcon, + children: [], + }; + } + tables[table_id].children.push({ + id: column_id, + label: column_name, + classes: column_drop_date ? 'text-disabled' : '', + ...columnIcons[general_type || 'X'], + }); + }); + return Object.values(tables); + }); + + const selectedItem = van.derive(() => { + try { + return JSON.parse(getValue(props.selected)); + } catch (e) { + console.error(e) + return null; + } + }); + + return div( + { class: 'flex-row tg-dh' }, + Tree({ + nodes: treeNodes, + // Use .rawVal, so only initial value from query params is passed to tree + selected: selectedItem.rawVal?.id, + classes: 'tg-dh--tree', + }), + () => { + const item = selectedItem.val; + if (item) { + return div( + { class: 'tg-dh--details' }, + h2( + { class: 'tg-dh--title' }, + item.type === 'column' ? [ + span( + { class: 'text-secondary' }, + `${item.table_name}: `, + ), + item.column_name, + ] : item.table_name, + ), + span( + { class: 'flex-row fx-gap-1 fx-justify-content-flex-end mb-2 text-secondary' }, + '* as of latest profiling run on ', + Link({ + href: 'profiling-runs:results', + params: { + run_id: item.latest_profile_id, + table_name: item.table_name, + column_name: item.column_name, + }, + open_new: true, + label: formatTimestamp(item.latest_profile_date), + }), + ), + CharacteristicsCard(item), + item.type === 'column' ? Card({ + title: 'Value Distribution *', + content: ColumnProfile(item), + }) : null, + MetadataCard(item), + PotentialPIICard(item), + HygieneIssuesCard(item), + TestIssuesCard(item), + ); + } + + return div( + { class: 'flex-column fx-align-flex-center fx-justify-center tg-dh--no-selection' }, + i( + { class: 'material-symbols-rounded text-disabled mb-5' }, + 'quick_reference_all', + ), + span( + { class: 'text-secondary' }, + 'Select a table or column on the left to view its details.', + ), + ); + }, + ); +}; + +const CharacteristicsCard = (/** @type Table | Column */ item) => { + let attributes = []; + if (item.type === 'column') { + attributes.push( + { key: 'column_type', label: 'Data Type' }, + { key: 'datatype_suggestion', label: 'Suggested Data Type' }, + { key: 'functional_data_type', label: 'Semantic Data Type' }, + { key: 'add_date', label: 'First Detected' }, + ); + if (item.last_mod_date !== item.add_date) { + attributes.push({ key: 'last_mod_date', label: 'Modification Detected' }); + } + } else { + attributes.push( + { key: 'functional_table_type', label: 'Semantic Table Type' }, + { key: 'record_ct', label: 'Row Count' }, + { key: 'column_ct', label: 'Column Count' }, + { key: 'data_point_ct', label: 'Data Point Count' }, + { key: 'add_date', label: 'First Detected' }, + ); + } + if (item.drop_date) { + attributes.push({ key: 'drop_date', label: 'Drop Detected' }); + } + + return Card({ + title: `${item.type} Characteristics *`, + content: div( + { class: 'flex-row fx-flex-wrap fx-gap-4' }, + attributes.map(({ key, label }) => { + let value = item[key]; + if (key === 'column_type') { + const { icon, iconSize } = columnIcons[item.general_type || 'X']; + value = div( + { class: 'flex-row' }, + i( + { + class: 'material-symbols-rounded tg-dh--column-icon', + style: `font-size: ${iconSize || 24}px;`, + }, + icon, + ), + (value || 'unknown').toLowerCase(), + ); + } else if (key === 'datatype_suggestion') { + value = (value || '').toLowerCase(); + } else if (key === 'functional_table_type') { + value = (value || '').split('-') + .map(word => word ? (word[0].toUpperCase() + word.substring(1)) : '') + .join(' '); + } else if (['add_date', 'last_mod_date', 'drop_date'].includes(key)) { + value = formatTimestamp(value, true); + if (key === 'drop_date') { + label = span({ class: 'text-error' }, label); + } + } + + return Attribute({ label, value, width: 300 }); + }), + ), + }); +}; + +const MetadataCard = (/** @type Table | Column */ item) => { + const attributes = [ + 'critical_data_element', + 'data_source', + 'source_system', + 'source_process', + 'business_domain', + 'stakeholder_group', + 'transform_level', + 'aggregation_level', + ].map(key => ({ + key, + label: key.replaceAll('_', ' '), + state: van.state(item[key]), + inherited: item[`table_${key}`], // Table values inherited by column + })); + + const InheritedIcon = () => TooltipIcon({ + icon: 'layers', + iconSize: 18, + classes: 'text-disabled', + tooltip: 'Inherited from table metadata', + tooltipPosition: 'top-right', + }); + const width = 300; + + const content = div( + { class: 'flex-row fx-flex-wrap fx-gap-4' }, + attributes.map(({ key, label, state, inherited }) => { + let value = state.rawVal ?? inherited; + const isInherited = item.type === 'column' && state.rawVal === null; + + if (key === 'critical_data_element') { + return span( + { class: 'flex-row fx-gap-1', style: `width: ${width}px` }, + i( + { class: `material-symbols-rounded ${value ? 'text-green' : 'text-disabled'}` }, + value ? 'check_circle' : 'cancel', + ), + span( + { class: value ? 'text-capitalize' : 'text-secondary' }, + value ? label : `Not a ${label}`, + ), + isInherited ? InheritedIcon() : null, + ); + } + + if (isInherited && value) { + value = span( + { class: 'flex-row fx-gap-1' }, + InheritedIcon(), + value, + ); + } + return Attribute({ label, value, width }); + }), + ); + + const editingContent = div( + { class: 'flex-row fx-flex-wrap fx-gap-4' }, + attributes.map(({ key, label, state, inherited }) => { + if (key === 'critical_data_element') { + const options = [ + { label: 'Yes', value: true }, + { label: 'No', value: false }, + ]; + if (item.type === 'column') { + options.push({ label: 'Inherit', value: null }); + } + return RadioGroup({ + label, width, options, + value: item.type === 'column' ? state.rawVal : !!state.rawVal, // Coerce null to false for tables + onChange: (value) => state.val = value, + }); + }; + + return Input({ + label, width, + value: state.rawVal, + placeholder: inherited ? `Inherited: ${inherited}` : null, + onChange: (value) => state.val = value || null, + }); + }), + ); + + return EditableCard({ + title: `${item.type} Metadata`, + content, + // Pass as function so the block is re-rendered with reset values when re-editing after a cancel + editingContent: () => editingContent, + onSave: () => { + const payload = attributes.reduce((object, { key, state }) => { + object[key] = state.rawVal; + return object; + }, { id: item.id }); + emitEvent('MetadataChanged', { payload }) + }, + // Reset states to original values on cancel + onCancel: () => attributes.forEach(({ key, state }) => state.val = item[key]), + hasChanges: () => attributes.some(({ key, state }) => state.val !== item[key]), + }); +}; + +const PotentialPIICard = (/** @type Table | Column */ item) => { + const riskColors = { + High: 'red', + Moderate: 'orange', + }; + + const attributes = [ + { + key: 'detail', width: 150, label: 'Type', + value_function: (issue) => (issue.detail || '').split('Type: ')[1], + }, + { + key: 'pii_risk', width: 100, label: 'Risk', classes: 'text-secondary', + value_function: (issue) => div( + { class: 'flex-row' }, + span({ class: 'dot mr-2', style: `color: var(--${riskColors[issue.pii_risk]});` }), + issue.pii_risk, + ), + }, + ]; + if (item.type === 'table') { + attributes.unshift( + { key: 'column_name', width: 150, label: 'Column' }, + ); + } + + const potentialPII = item.latest_anomalies.filter(({ issue_likelihood }) => issue_likelihood === 'Potential PII'); + const linkProps = { + href: 'profiling-runs:hygiene', + params: { run_id: item.latest_profile_id, issue_class: 'Potential PII' }, + }; + + return IssuesCard('Potential PII', potentialPII, attributes, linkProps, 'No potential PII detected'); +}; + +const HygieneIssuesCard = (/** @type Table | Column */ item) => { + const likelihoodColors = { + Definite: 'red', + Likely: 'orange', + Possible: 'yellow', + }; + + const attributes = [ + { key: 'anomaly_name', width: 200, label: 'Issue' }, + { + key: 'issue_likelihood', width: 80, label: 'Likelihood', classes: 'text-secondary', + value_function: (issue) => div( + { class: 'flex-row' }, + span({ class: 'dot mr-2', style: `color: var(--${likelihoodColors[issue.issue_likelihood]});` }), + issue.issue_likelihood, + ), + }, + { key: 'detail', width: 300, label: 'Detail' }, + ]; + if (item.type === 'table') { + attributes.unshift( + { key: 'column_name', width: 150, label: 'Column' }, + ); + } + + const hygieneIssues = item.latest_anomalies.filter(({ issue_likelihood }) => issue_likelihood !== 'Potential PII'); + const linkProps = { + href: 'profiling-runs:hygiene', + params: { run_id: item.latest_profile_id }, + }; + + return IssuesCard('Hygiene Issues', hygieneIssues, attributes, linkProps, 'No hygiene issues detected'); +}; + +const TestIssuesCard = (/** @type Table | Column */ item) => { + const statusColors = { + Failed: 'red', + Warning: 'yellow', + Error: 'brown', + }; + + const attributes = [ + { key: 'test_name', width: 150, label: 'Test' }, + { + key: 'result_status', width: 80, label: 'Status', classes: 'text-secondary', + value_function: (issue) => div( + { class: 'flex-row' }, + span({ class: 'dot mr-2', style: `color: var(--${statusColors[issue.result_status]});` }), + issue.result_status, + ), + }, + { key: 'result_message', width: 300, label: 'Details' }, + { + key: 'test_run_id', width: 150, label: 'Test Suite | Start Time', + value_function: (issue) => div( + div( + { class: 'text-secondary' }, + issue.test_suite, + ), + Link({ + href: 'test-runs:results', + params: { run_id: issue.test_run_id }, + open_new: true, + label: formatTimestamp(issue.test_run_date), + style: 'font-size: 12px; margin-top: 2px;', + }), + ), + }, + ]; + if (item.type === 'table') { + attributes.unshift( + { key: 'column_name', width: 150, label: 'Column' }, + ); + } + + let noneContent = 'No test issues detected'; + if (!item.latest_test_run_date) { + if (item.drop_date) { + noneContent = span({ class: 'text-secondary' }, `No test results for ${item.type}`); + } else { + noneContent = span( + { class: 'text-secondary flex-row fx-gap-1 fx-justify-content-flex-end' }, + `No test results yet for ${item.type}.`, + Link({ + href: 'test-suites', + open_new: true, + label: 'Go to Test Suites', + right_icon: 'chevron_right', + }), + ); + } + } + + return IssuesCard('Test Issues', item.latest_test_issues, attributes, null, noneContent); +}; + +/** + * @typedef Attribute + * @type {object} + * @property {string} key + * @property {number} width + * @property {string} label + * @property {string} classes + * @property {function?} value_function + */ +const IssuesCard = ( + /** @type string */ title, + /** @type (Anomaly | TestIssue)[] */ items, + /** @type Attribute[] */ attributes, + /** @type object? */ linkProps, + /** @type (string | object)? */ noneContent, +) => { + const gap = 8; + const minWidth = attributes.reduce((sum, { width }) => sum + width, attributes.length * gap); + + let content = null; + let actionContent = null; + if (items.length) { + content = div( + { style: 'overflow: auto; max-height: 300px;' }, + div( + { + class: 'flex-row table-row text-caption pt-0', + style: `gap: ${gap}px; min-width: ${minWidth}px;`, + }, + attributes.map(({ label, width }) => span( + { style: `flex: 1 0 ${width}px;` }, + label, + )), + ), + items.map(item => div( + { + class: 'flex-row table-row pt-2 pb-2', + style: `gap: ${gap}px; min-width: ${minWidth}px;`, + }, + attributes.map(({ key, width, value_function, classes }) => { + const value = value_function ? value_function(item) : item[key]; + return span( + { + class: classes || '', + style: `flex: 1 0 ${width}px; word-break: break-word;`, + }, + value || '--', + ); + }), + )), + ); + + if (linkProps) { + actionContent = Link({ + ...linkProps, + open_new: true, + label: 'View details', + right_icon: 'chevron_right', + }); + } + } else { + actionContent = typeof noneContent === 'string' ? span( + { class: 'text-secondary flex-row fx-gap-1' }, + noneContent, + i({ class: 'material-symbols-rounded text-green' }, 'check_circle'), + ) : (noneContent || null); + } + + return Card({ + title: `${title} (${items.length})`, + content, + actionContent, + }); +} + +const stylesheet = new CSSStyleSheet(); +stylesheet.replace(` +.tg-dh { + height: 100%; + align-items: stretch; +} + +.tg-dh--tree { + min-width: 250px; + border-radius: 8px; + border: 1px solid var(--border-color); + background-color: var(--sidebar-background-color); +} + +.tg-dh--details { + padding: 8px 0 0 20px; + overflow: auto; + flex-grow: 1; +} + +.tg-dh--title { + margin: 0; + color: var(--primary-text-color); + font-size: 20px; + font-weight: 500; +} + +.tg-dh--details > .tg-card { + min-width: 400px; +} + +.tg-dh--column-icon { + margin-right: 4px; + width: 24px; + color: #B0BEC5; + text-align: center; +} + +.tg-dh--no-selection { + flex: auto; + max-height: 400px; + padding: 16px; +} + +.tg-dh--no-selection > i { + font-size: 80px; +} + +.tg-dh--no-selection > span { + font-size: 18px; + text-align: center; +} +`); + +export { DataHierarchy }; diff --git a/testgen/ui/components/widgets/testgen_component.py b/testgen/ui/components/widgets/testgen_component.py index 7fb2be2..89b8ef0 100644 --- a/testgen/ui/components/widgets/testgen_component.py +++ b/testgen/ui/components/widgets/testgen_component.py @@ -1,20 +1,47 @@ import typing +import streamlit as st + from testgen.ui.components.utils.component import component from testgen.ui.navigation.router import Router from testgen.ui.session import session def testgen_component( - component_id: typing.Literal["profiling_runs", "test_runs", "database_flavor_selector"], + component_id: typing.Literal["profiling_runs", "test_runs", "database_flavor_selector", "data_hierarchy"], props: dict, - event_handlers: dict | None, + on_change_handlers: dict[str, typing.Callable] | None = None, + event_handlers: dict[str, typing.Callable] | None = None, ) -> dict | None: + """ + Testgen component to display a VanJS page. + + # Parameters + :param component_id: name of page + :param props: properties expected by the page + :param on_change_handlers: event handlers to be called during on_change callback (recommended, but does not support calling st.rerun()) + :param event_handlers: event handlers to be called on next run (supports calling st.rerun()) + + For both on_change_handlers and event_handlers, the "payload" data from the event is passed as the only argument to the callback function + """ + + key = f"testgen:{component_id}" + + def on_change(): + event_data = st.session_state[key] + if event_data and (event := event_data.get("event")): + if on_change_handlers and (handler := on_change_handlers.get(event)): + # Prevent handling the same event multiple times + event_id = f"{component_id}:{event_data.get('_id', '')}" + if event_id != session.testgen_event_id: + session.testgen_event_id = event_id + handler(event_data.get("payload")) event_data = component( id_=component_id, - key=f"testgen:{component_id}", + key=key, props=props, + on_change=on_change if on_change_handlers else None, ) if event_data and (event := event_data.get("event")): if event == "LinkClicked": diff --git a/testgen/ui/views/data_hierarchy.py b/testgen/ui/views/data_hierarchy.py new file mode 100644 index 0000000..59421b3 --- /dev/null +++ b/testgen/ui/views/data_hierarchy.py @@ -0,0 +1,487 @@ +import json +import typing +from functools import partial + +import pandas as pd +import streamlit as st + +import testgen.ui.services.database_service as db +import testgen.ui.services.query_service as dq +from testgen.ui.components import widgets as testgen +from testgen.ui.components.widgets import testgen_component +from testgen.ui.navigation.menu import MenuItem +from testgen.ui.navigation.page import Page +from testgen.ui.queries import project_queries +from testgen.ui.session import session +from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog +from testgen.utils import is_uuid4 + +PAGE_ICON = "dataset" + +class DataHierarchyPage(Page): + path = "data-hierarchy" + can_activate: typing.ClassVar = [ + lambda: session.authentication_status, + ] + menu_item = MenuItem(icon=PAGE_ICON, label="Data Hierarchy", order=1) + + def render(self, project_code: str | None = None, table_group_id: str | None = None, selected: str | None = None, **_kwargs) -> None: + testgen.page_header( + "Data Hierarchy", + ) + + project_code = project_code or session.project + + if render_empty_state(project_code): + return + + group_filter_column, _, loading_column = st.columns([.3, .5, .2], vertical_alignment="center") + + with group_filter_column: + table_groups_df = get_table_group_options(project_code) + table_group_id = testgen.select( + options=table_groups_df, + value_column="id", + display_column="table_groups_name", + default_value=table_group_id, + required=True, + label="Table Group", + bind_to_query="table_group_id", + ) + + with loading_column: + columns_df = get_table_group_columns(table_group_id) + selected_item = get_selected_item(selected, table_group_id) + if not selected_item: + self.router.set_query_params({ "selected": None }) + + if columns_df.empty: + table_group = table_groups_df.loc[table_groups_df["id"] == table_group_id].iloc[0] + testgen.empty_state( + label="No profiling data yet", + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.Profiling, + action_label="Run Profiling", + button_onclick=partial(run_profiling_dialog, project_code, table_group), + button_icon="play_arrow", + ) + else: + def on_tree_node_select(node_id): + self.router.set_query_params({ "selected": node_id }) + + testgen_component( + "data_hierarchy", + props={ "columns": columns_df.to_json(orient="records"), "selected": json.dumps(selected_item) }, + on_change_handlers={ "TreeNodeSelected": on_tree_node_select }, + event_handlers={ "MetadataChanged": on_metadata_changed }, + ) + + +def on_metadata_changed(metadata: dict) -> None: + schema = st.session_state["dbschema"] + item_type, item_id = metadata["id"].split("_", 2) + + if item_type == "table": + update_table = "data_table_chars" + id_column = "table_id" + else: + update_table = "data_column_chars" + id_column = "column_id" + + attributes = [ + "data_source", + "source_system", + "source_process", + "business_domain", + "stakeholder_group", + "transform_level", + "aggregation_level" + ] + cde_value_map = { + True: "TRUE", + False: "FALSE", + None: "NULL", + } + set_attributes = [ f"{key} = NULLIF('{metadata.get(key) or ''}', '')" for key in attributes ] + set_attributes.append(f"critical_data_element = {cde_value_map[metadata.get('critical_data_element')]}") + + sql = f""" + UPDATE {schema}.{update_table} + SET {', '.join(set_attributes)} + WHERE {id_column} = '{item_id}'; + """ + db.execute_sql(sql) + get_selected_item.clear() + st.rerun() + + +def render_empty_state(project_code: str) -> bool: + project_summary_df = project_queries.get_summary_by_code(project_code) + if project_summary_df["profiling_runs_ct"]: # Without profiling, we don't have any table and column information in db + return False + + label="Your project is empty" + testgen.whitespace(5) + if not project_summary_df["connections_ct"]: + testgen.empty_state( + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.Connection, + action_label="Go to Connections", + link_href="connections", + ) + else: + testgen.empty_state( + label=label, + icon=PAGE_ICON, + message=testgen.EmptyStateMessage.Profiling if project_summary_df["table_groups_ct"] else testgen.EmptyStateMessage.TableGroup, + action_label="Go to Table Groups", + link_href="connections:table-groups", + link_params={ "connection_id": str(project_summary_df["default_connection_id"]) } + ) + return True + + +@st.cache_data(show_spinner=False) +def get_table_group_options(project_code): + schema = st.session_state["dbschema"] + return dq.run_table_groups_lookup_query(schema, project_code) + + +@st.cache_data(show_spinner="Loading data ...") +def get_table_group_columns(table_group_id: str) -> pd.DataFrame: + schema = st.session_state["dbschema"] + sql = f""" + SELECT CONCAT('column_', column_chars.column_id) AS column_id, + CONCAT('table_', table_chars.table_id) AS table_id, + column_chars.column_name, + table_chars.table_name, + column_chars.general_type, + column_chars.drop_date AS column_drop_date, + table_chars.drop_date AS table_drop_date + FROM {schema}.data_column_chars column_chars + LEFT JOIN {schema}.data_table_chars table_chars ON ( + column_chars.table_id = table_chars.table_id + ) + WHERE column_chars.table_groups_id = '{table_group_id}' + ORDER BY table_name, column_name; + """ + return db.retrieve_data(sql) + + +@st.cache_data(show_spinner="Loading data ...") +def get_selected_item(selected: str, table_group_id: str) -> dict | None: + if not selected: + return None + + schema = st.session_state["dbschema"] + item_type, item_id = selected.split("_", 2) + + if item_type not in ["table", "column"] or not is_uuid4(item_id): + return None + + if item_type == "table": + sql = f""" + WITH latest_profile_dates AS ( + SELECT table_name, + profiling_runs.table_groups_id, + MAX(profiling_starttime) AS profiling_starttime + FROM {schema}.profile_results + LEFT JOIN {schema}.profiling_runs ON ( + profile_results.profile_run_id = profiling_runs.id + ) + GROUP BY profiling_runs.table_groups_id, table_name + ), + latest_test_run_dates AS ( + SELECT table_name, + test_results.table_groups_id, + MAX(test_starttime) AS test_starttime + FROM {schema}.test_results + LEFT JOIN {schema}.test_runs ON ( + test_results.test_run_id = test_runs.id + ) + GROUP BY test_results.table_groups_id, table_name + ) + SELECT table_chars.table_name, + table_chars.table_groups_id::VARCHAR(50) AS table_group_id, + -- Characteristics + functional_table_type, + record_ct, + table_chars.column_ct, + data_point_ct, + add_date AS add_date, + drop_date AS drop_date, + -- Metadata + critical_data_element, + data_source, + source_system, + source_process, + business_domain, + stakeholder_group, + transform_level, + aggregation_level, + -- Latest Profile & Test Runs + profiling_runs.id::VARCHAR(50) AS latest_profile_id, + lpd.profiling_starttime AS latest_profile_date, + lrd.test_starttime AS latest_test_run_date + FROM {schema}.data_table_chars table_chars + LEFT JOIN latest_profile_dates lpd ON ( + table_chars.table_groups_id = lpd.table_groups_id + AND table_chars.table_name = lpd.table_name + ) + LEFT JOIN latest_test_run_dates lrd ON ( + table_chars.table_groups_id = lrd.table_groups_id + AND table_chars.table_name = lrd.table_name + ) + LEFT JOIN {schema}.profiling_runs ON ( + lpd.table_groups_id = profiling_runs.table_groups_id + AND lpd.profiling_starttime = profiling_runs.profiling_starttime + ) + WHERE table_id = '{item_id}' + AND table_chars.table_groups_id = '{table_group_id}'; + """ + else: + sql = f""" + WITH latest_profile_dates AS ( + SELECT column_name, + table_name, + profiling_runs.table_groups_id, + MAX(profiling_starttime) AS profiling_starttime + FROM {schema}.profile_results + LEFT JOIN {schema}.profiling_runs ON ( + profile_results.profile_run_id = profiling_runs.id + ) + GROUP BY profiling_runs.table_groups_id, table_name, column_name + ), + latest_test_run_dates AS ( + SELECT column_names, + table_name, + test_results.table_groups_id, + MAX(test_starttime) AS test_starttime + FROM {schema}.test_results + LEFT JOIN {schema}.test_runs ON ( + test_results.test_run_id = test_runs.id + ) + GROUP BY test_results.table_groups_id, table_name, column_names + ) + SELECT column_chars.column_name, + column_chars.table_name, + column_chars.table_groups_id::VARCHAR(50) AS table_group_id, + -- Characteristics + column_chars.general_type, + column_chars.column_type, + column_chars.functional_data_type, + datatype_suggestion, + column_chars.add_date AS add_date, + column_chars.last_mod_date AS last_mod_date, + column_chars.drop_date AS drop_date, + -- Column Metadata + column_chars.critical_data_element, + column_chars.data_source, + column_chars.source_system, + column_chars.source_process, + column_chars.business_domain, + column_chars.stakeholder_group, + column_chars.transform_level, + column_chars.aggregation_level, + -- Table Metadata + table_chars.critical_data_element AS table_critical_data_element, + table_chars.data_source AS table_data_source, + table_chars.source_system AS table_source_system, + table_chars.source_process AS table_source_process, + table_chars.business_domain AS table_business_domain, + table_chars.stakeholder_group AS table_stakeholder_group, + table_chars.transform_level AS table_transform_level, + table_chars.aggregation_level AS table_aggregation_level, + -- Latest Profile & Test Runs + profiling_runs.id::VARCHAR(50) AS latest_profile_id, + lpd.profiling_starttime AS latest_profile_date, + lrd.test_starttime AS latest_test_run_date, + -- Value Counts + profile_results.record_ct, + value_ct, + distinct_value_ct, + null_value_ct, + zero_value_ct, + -- Alpha + zero_length_ct, + filled_value_ct, + includes_digit_ct, + numeric_ct, + date_ct, + quoted_value_ct, + lead_space_ct, + embedded_space_ct, + avg_embedded_spaces, + min_length, + max_length, + avg_length, + min_text, + max_text, + distinct_std_value_ct, + distinct_pattern_ct, + std_pattern_match, + top_freq_values, + top_patterns, + -- Numeric + min_value, + min_value_over_0, + max_value, + avg_value, + stdev_value, + percentile_25, + percentile_50, + percentile_75, + -- Date + min_date, + max_date, + before_1yr_date_ct, + before_5yr_date_ct, + before_20yr_date_ct, + within_1yr_date_ct, + within_1mo_date_ct, + future_date_ct, + -- Boolean + boolean_true_ct + FROM {schema}.data_column_chars column_chars + LEFT JOIN {schema}.data_table_chars table_chars ON ( + column_chars.table_id = table_chars.table_id + ) + LEFT JOIN latest_profile_dates lpd ON ( + column_chars.table_groups_id = lpd.table_groups_id + AND column_chars.table_name = lpd.table_name + AND column_chars.column_name = lpd.column_name + ) + LEFT JOIN latest_test_run_dates lrd ON ( + column_chars.table_groups_id = lrd.table_groups_id + AND column_chars.table_name = lrd.table_name + AND column_chars.column_name = lrd.column_names + ) + LEFT JOIN {schema}.profiling_runs ON ( + lpd.table_groups_id = profiling_runs.table_groups_id + AND lpd.profiling_starttime = profiling_runs.profiling_starttime + ) + LEFT JOIN {schema}.profile_results ON ( + profiling_runs.id = profile_results.profile_run_id + AND column_chars.column_name = profile_results.column_name + ) + WHERE column_id = '{item_id}' + AND column_chars.table_groups_id = '{table_group_id}';; + """ + + item_df = db.retrieve_data(sql) + if not item_df.empty: + # to_json converts datetimes, NaN, etc, to JSON-safe values (Note: to_dict does not) + item = json.loads(item_df.to_json(orient="records"))[0] + item["id"] = selected + item["type"] = item_type + item["latest_anomalies"] = get_profile_anomalies(item["latest_profile_id"], item["table_name"], item.get("column_name")) + item["latest_test_issues"] = get_latest_test_issues(item["table_group_id"], item["table_name"], item.get("column_name")) + return item + + +@st.cache_data(show_spinner=False) +def get_profile_anomalies(profile_run_id: str, table_name: str, column_name: str | None = None) -> dict | None: + schema = st.session_state["dbschema"] + + column_condition = "" + if column_name: + column_condition = f"AND column_name = '{column_name}'" + + sql = f""" + WITH pii_results AS ( + SELECT id, + CASE + WHEN detail LIKE 'Risk: HIGH%%' THEN 'High' + WHEN detail LIKE 'Risk: MODERATE%%' THEN 'Moderate' + ELSE null + END AS pii_risk + FROM {schema}.profile_anomaly_results + ) + SELECT column_name, + anomaly_name, + issue_likelihood, + detail, + pii_risk + FROM {schema}.profile_anomaly_results anomaly_results + LEFT JOIN {schema}.profile_anomaly_types anomaly_types ON ( + anomaly_types.id = anomaly_results.anomaly_id + ) + LEFT JOIN pii_results ON ( + anomaly_results.id = pii_results.id + ) + WHERE profile_run_id = '{profile_run_id}' + AND table_name = '{table_name}' + {column_condition} + AND COALESCE(disposition, 'Confirmed') = 'Confirmed' + ORDER BY + CASE issue_likelihood + WHEN 'Definite' THEN 1 + WHEN 'Likely' THEN 2 + WHEN 'Possible' THEN 3 + ELSE 4 + END, + CASE pii_risk + WHEN 'High' THEN 1 + WHEN 'Moderate' THEN 2 + ELSE 3 + END, + column_name; + """ + + df = db.retrieve_data(sql) + return json.loads(df.to_json(orient="records")) + + +@st.cache_data(show_spinner=False) +def get_latest_test_issues(table_group_id: str, table_name: str, column_name: str | None = None) -> dict | None: + schema = st.session_state["dbschema"] + + column_condition = "" + if column_name: + column_condition = f"AND column_names = '{column_name}'" + + sql = f""" + WITH latest_run_dates AS ( + SELECT test_suite_id, + MAX(test_starttime) AS test_starttime + FROM {schema}.test_runs + GROUP BY test_suite_id + ) + SELECT column_names AS column_name, + test_name_short AS test_name, + result_status, + result_message, + test_suite, + test_results.test_run_id::VARCHAR(50), + lrd.test_starttime AS test_run_date + FROM latest_run_dates lrd + LEFT JOIN {schema}.test_runs ON ( + lrd.test_suite_id = test_runs.test_suite_id + AND lrd.test_starttime = test_runs.test_starttime + ) + LEFT JOIN {schema}.test_results ON ( + test_runs.id = test_results.test_run_id + ) + LEFT JOIN {schema}.test_types ON ( + test_results.test_type = test_types.test_type + ) + LEFT JOIN {schema}.test_suites ON ( + lrd.test_suite_id = test_suites.id + ) + WHERE test_suites.table_groups_id = '{table_group_id}' + AND table_name = '{table_name}' + {column_condition} + AND result_status <> 'Passed' + AND COALESCE(disposition, 'Confirmed') = 'Confirmed' + ORDER BY + CASE result_status + WHEN 'Failed' THEN 1 + WHEN 'Warning' THEN 2 + ELSE 3 + END, + column_name; + """ + + df = db.retrieve_data(sql) + return json.loads(df.to_json(orient="records")) diff --git a/testgen/utils/__init__.py b/testgen/utils/__init__.py index bd4bda8..db58739 100644 --- a/testgen/utils/__init__.py +++ b/testgen/utils/__init__.py @@ -1,4 +1,5 @@ import math +from uuid import UUID import pandas as pd @@ -13,3 +14,12 @@ def truncate(value: float) -> int: if 0 < value < 1: return 1 return math.trunc(value) + + +def is_uuid4(value: str) -> bool: + try: + uuid = UUID(value, version=4) + except Exception: + return False + + return str(uuid) == value From 5ce267f125b9fb48fc947aff1cd73785d41a8ee8 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 4 Nov 2024 13:11:11 -0500 Subject: [PATCH 56/91] misc(ui): fixes and typing improvements --- .../frontend/js/pages/profiling_runs.js | 26 ++++++++++++++++--- .../components/frontend/js/pages/test_runs.js | 23 +++++++++++++--- testgen/ui/navigation/router.py | 3 +++ testgen/ui/services/javascript_service.py | 1 - 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js index c434f37..6b98d38 100644 --- a/testgen/ui/components/frontend/js/pages/profiling_runs.js +++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js @@ -1,7 +1,25 @@ /** + * @typedef ProfilingRun + * @type {object} + * @property {string} profiling_run_id + * @property {number} start_time + * @property {string} table_groups_name + * @property {'Running'|'Complete'|'Error'|'Cancelled'} status + * @property {string} log_message + * @property {string} duration + * @property {string} process_id + * @property {string} schema_name + * @property {number} column_ct + * @property {number} table_ct + * @property {number} anomaly_ct + * @property {number} anomalies_definite_ct + * @property {number} anomalies_likely_ct + * @property {number} anomalies_possible_ct + * @property {number} anomalies_dismissed_ct + * * @typedef Properties * @type {object} - * @property {array} items + * @property {ProfilingRun[]} items */ import van from '../van.min.js'; import { Tooltip } from '../components/tooltip.js'; @@ -57,7 +75,7 @@ const ProfilingRuns = (/** @type Properties */ props) => { ); } -const ProfilingRunItem = (item, /** @type string[] */ columns) => { +const ProfilingRunItem = (/** @type ProfilingRun */ item, /** @type string[] */ columns) => { return div( { class: 'table-row flex-row' }, div( @@ -92,7 +110,7 @@ const ProfilingRunItem = (item, /** @type string[] */ columns) => { class: 'text-caption mt-1 mb-1', style: item.status === 'Complete' && !item.column_ct ? 'color: var(--red);' : '', }, - `${item.table_ct || 0} tables, ${item.column_ct || 0} columns`, + item.status === 'Complete' ? `${item.table_ct || 0} tables, ${item.column_ct || 0} columns` : null, ), item.column_ct ? Link({ label: 'View results', @@ -126,7 +144,7 @@ const ProfilingRunItem = (item, /** @type string[] */ columns) => { ); } -function ProfilingRunStatus(/** @type object */ item) { +function ProfilingRunStatus(/** @type ProfilingRun */ item) { const attributeMap = { Running: { label: 'Running', color: 'blue' }, Complete: { label: 'Completed', color: '' }, diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js index d100f91..c5656a6 100644 --- a/testgen/ui/components/frontend/js/pages/test_runs.js +++ b/testgen/ui/components/frontend/js/pages/test_runs.js @@ -1,7 +1,24 @@ /** + * @typedef TestRun + * @type {object} + * @property {string} test_run_id + * @property {number} test_starttime + * @property {string} table_groups_name + * @property {string} test_suite + * @property {'Running'|'Complete'|'Error'|'Cancelled'} status + * @property {string} log_message + * @property {string} duration + * @property {string} process_id + * @property {number} test_ct + * @property {number} passed_ct + * @property {number} warning_ct + * @property {number} failed_ct + * @property {number} error_ct + * @property {number} dismissed_ct + * * @typedef Properties * @type {object} - * @property {array} items + * @property {TestRun[]} items */ import van from '../van.min.js'; import { Tooltip } from '../components/tooltip.js'; @@ -53,7 +70,7 @@ const TestRuns = (/** @type Properties */ props) => { ); } -const TestRunItem = (item, /** @type string[] */ columns) => { +const TestRunItem = (/** @type TestRun */ item, /** @type string[] */ columns) => { return div( { class: 'table-row flex-row' }, div( @@ -102,7 +119,7 @@ const TestRunItem = (item, /** @type string[] */ columns) => { ); } -function TestRunStatus(/** @type object */ item) { +function TestRunStatus(/** @type TestRun */ item) { const attributeMap = { Running: { label: 'Running', color: 'blue' }, Complete: { label: 'Completed', color: '' }, diff --git a/testgen/ui/navigation/router.py b/testgen/ui/navigation/router.py index 011ebb8..3b812a3 100644 --- a/testgen/ui/navigation/router.py +++ b/testgen/ui/navigation/router.py @@ -37,6 +37,9 @@ def run(self, hide_sidebar=False) -> None: if not session.cookies_ready: session.cookies_ready = 1 session.page_pending_cookies = current_page + # Set this anyway so that sidebar displays initial selection correctly + session.current_page = current_page.url_path + st.rerun() # Sometimes the cookie is ready on the second rerun and other times only on the third -_- # so we have to make sure the page renders correctly in both cases diff --git a/testgen/ui/services/javascript_service.py b/testgen/ui/services/javascript_service.py index 7b4ea32..93eae90 100644 --- a/testgen/ui/services/javascript_service.py +++ b/testgen/ui/services/javascript_service.py @@ -38,7 +38,6 @@ def get_browser_locale_timezone(): return st_javascript( """await (async () => { const userTimezone = Intl.DateTimeFormat().resolvedOptions().timeZone; - console.log(userTimezone) return userTimezone })().then(returnValue => returnValue)""" ) From 9fa43f13be9df29fb0e12775d001fe9ff8ba5d5f Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 6 Nov 2024 17:12:23 -0500 Subject: [PATCH 57/91] misc(ui): improve data hierarchy query performance --- testgen/ui/views/data_hierarchy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testgen/ui/views/data_hierarchy.py b/testgen/ui/views/data_hierarchy.py index 59421b3..445f6c4 100644 --- a/testgen/ui/views/data_hierarchy.py +++ b/testgen/ui/views/data_hierarchy.py @@ -245,13 +245,13 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None: WITH latest_profile_dates AS ( SELECT column_name, table_name, - profiling_runs.table_groups_id, + profile_results.table_groups_id, MAX(profiling_starttime) AS profiling_starttime FROM {schema}.profile_results LEFT JOIN {schema}.profiling_runs ON ( profile_results.profile_run_id = profiling_runs.id ) - GROUP BY profiling_runs.table_groups_id, table_name, column_name + GROUP BY profile_results.table_groups_id, table_name, column_name ), latest_test_run_dates AS ( SELECT column_names, @@ -366,7 +366,7 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None: AND column_chars.column_name = profile_results.column_name ) WHERE column_id = '{item_id}' - AND column_chars.table_groups_id = '{table_group_id}';; + AND column_chars.table_groups_id = '{table_group_id}'; """ item_df = db.retrieve_data(sql) From fc6cb63662be60113e8f79250391643d95c4d237 Mon Sep 17 00:00:00 2001 From: Astor Date: Thu, 7 Nov 2024 16:04:37 -0300 Subject: [PATCH 58/91] fix(threshold error count fix): code fixes Refs: TG-806 --- testgen/ui/views/test_definitions.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index c0eaf09..58fc022 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -529,20 +529,19 @@ def show_test_form( if dynamic_attribute in ["custom_query"]: show_custom_query = True + elif dynamic_attribute in ["threshold"]: + test_definition[dynamic_attribute] = current_column.number_input( + label=actual_dynamic_attributes_labels, + value=value, + help=actual_dynamic_attributes_help, + ) else: - if "threshold" in dynamic_attribute: - test_definition[dynamic_attribute] = current_column.number_input( - label=actual_dynamic_attributes_labels, - value=value, - help=actual_dynamic_attributes_help, - ) - else: - test_definition[dynamic_attribute] = current_column.text_input( - label=actual_dynamic_attributes_labels, - max_chars=4000 if dynamic_attribute in ["match_column_names", "match_groupby_names", "groupby_names"] else 1000, - value=value, - help=actual_dynamic_attributes_help, - ) + test_definition[dynamic_attribute] = current_column.text_input( + label=actual_dynamic_attributes_labels, + max_chars=4000 if dynamic_attribute in ["match_column_names", "match_groupby_names", "groupby_names"] else 1000, + value=value, + help=actual_dynamic_attributes_help, + ) # Custom Query if show_custom_query: From 1113bcd516c2a5ea461662cde6cfcd55c43900c0 Mon Sep 17 00:00:00 2001 From: Astor Date: Thu, 7 Nov 2024 16:30:22 -0300 Subject: [PATCH 59/91] fix: threshold error count Refs: TG-806 --- testgen/ui/views/test_definitions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index 58fc022..d36465d 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -529,7 +529,7 @@ def show_test_form( if dynamic_attribute in ["custom_query"]: show_custom_query = True - elif dynamic_attribute in ["threshold"]: + elif dynamic_attribute in ["threshold_value"]: test_definition[dynamic_attribute] = current_column.number_input( label=actual_dynamic_attributes_labels, value=value, From 00692706bf78f9b1ca4b75e5fa9f7747032ac692 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 7 Nov 2024 14:37:45 -0500 Subject: [PATCH 60/91] ci(docker): add git to dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index e436ca4..cdab57c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM python:3.10-slim-bookworm AS build-image RUN mkdir -p /dk && \ apt-get update && \ - apt-get install -y gcc libpcre3 libpcre3-dev g++ + apt-get install -y gcc libpcre3 libpcre3-dev g++ git COPY ./pyproject.toml /tmp/dk/ RUN python3 -m pip install /tmp/dk --prefix=/dk From 776d2b3f09d6a93ed57f141879e1d518504cb053 Mon Sep 17 00:00:00 2001 From: Luis Trinidad Date: Thu, 7 Nov 2024 12:48:08 -0400 Subject: [PATCH 61/91] fix(profiling): add parenthesis to the formatted anomaly criteria --- testgen/template/profiling/profile_anomalies_screen_column.sql | 2 +- .../profiling/profile_anomalies_screen_multi_column.sql | 2 +- .../template/profiling/profile_anomalies_screen_variants.sql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/testgen/template/profiling/profile_anomalies_screen_column.sql b/testgen/template/profiling/profile_anomalies_screen_column.sql index e0d9e34..cb9c4c1 100644 --- a/testgen/template/profiling/profile_anomalies_screen_column.sql +++ b/testgen/template/profiling/profile_anomalies_screen_column.sql @@ -19,4 +19,4 @@ LEFT JOIN v_inactive_anomalies i AND '{ANOMALY_ID}' = i.anomaly_id) WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID AND i.anomaly_id IS NULL - AND {ANOMALY_CRITERIA}; + AND ({ANOMALY_CRITERIA}); diff --git a/testgen/template/profiling/profile_anomalies_screen_multi_column.sql b/testgen/template/profiling/profile_anomalies_screen_multi_column.sql index 7a61561..6451eaf 100644 --- a/testgen/template/profiling/profile_anomalies_screen_multi_column.sql +++ b/testgen/template/profiling/profile_anomalies_screen_multi_column.sql @@ -44,7 +44,7 @@ WITH mults AS ( SELECT p.project_code, AND '{ANOMALY_ID}' = i.anomaly_id) WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID AND i.anomaly_id IS NULL - AND {ANOMALY_CRITERIA} + AND ({ANOMALY_CRITERIA}) ) INSERT INTO profile_anomaly_results (project_code, table_groups_id, profile_run_id, anomaly_id, diff --git a/testgen/template/profiling/profile_anomalies_screen_variants.sql b/testgen/template/profiling/profile_anomalies_screen_variants.sql index cec9bdb..266e73e 100644 --- a/testgen/template/profiling/profile_anomalies_screen_variants.sql +++ b/testgen/template/profiling/profile_anomalies_screen_variants.sql @@ -22,7 +22,7 @@ WITH all_matches AND p.column_name = i.column_name AND '{ANOMALY_ID}' = i.anomaly_id) WHERE p.profile_run_id = '{PROFILE_RUN_ID}'::UUID - AND {ANOMALY_CRITERIA} + AND ({ANOMALY_CRITERIA}) AND p.top_freq_values > '' AND i.anomaly_id IS NULL AND fn_count_intersecting_items(LOWER(fn_extract_top_values(p.top_freq_values)), v.check_values, '|') > 1 From 9521759a3f7b3bb9ad150f21b7b159e6723d7ae3 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Mon, 11 Nov 2024 11:14:44 -0500 Subject: [PATCH 62/91] misc(pdf): Code review feedback --- testgen/ui/pdf/hygiene_issue_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py index 4c23ec6..b228231 100644 --- a/testgen/ui/pdf/hygiene_issue_report.py +++ b/testgen/ui/pdf/hygiene_issue_report.py @@ -85,7 +85,7 @@ def build_summary_table(document, hi_data): ( "Hygiene Issue", ( - Paragraph(f"{hi_data["anomaly_name"]}:", style=PARA_STYLE_CELL), + Paragraph(f"{hi_data['anomaly_name']}:", style=PARA_STYLE_CELL), Paragraph(hi_data["anomaly_description"], style=PARA_STYLE_CELL), ), None, From f5ba8790ef3dde24403ed93379e67b68cd8115fd Mon Sep 17 00:00:00 2001 From: "Chip.Bloche" Date: Fri, 8 Nov 2024 15:14:19 -0500 Subject: [PATCH 63/91] feat(cli): add scoring infrastructure and default score roll-ups --- .../queries/execute_cat_tests_query.py | 11 ++ testgen/commands/queries/profiling_query.py | 17 +- testgen/commands/run_execute_cat_tests.py | 6 +- testgen/commands/run_profiling_bridge.py | 29 ++- .../commands/run_test_parameter_validation.py | 12 +- .../020_create_standard_functions_sprocs.sql | 83 +++++++++ .../030_initialize_new_schema_structure.sql | 120 ++++++++----- .../050_populate_new_schema_metadata.sql | 165 +++++++++--------- .../dbsetup/060_create_standard_views.sql | 2 +- .../dbupgrade/0120_incremental_upgrade.sql | 133 ++++++++++++++ .../ex_finalize_test_run_results.sql | 66 +++++++ .../execution/ex_update_test_suite.sql | 13 ++ .../execution/test_scoring_rollup.sql | 123 +++++++++++++ .../project_profiling_query_mssql.yaml | 27 +++ .../project_profiling_query_postgresql.yaml | 27 +++ .../project_profiling_query_redshift.yaml | 27 +++ .../project_profiling_query_snowflake.yaml | 27 +++ .../project_profiling_query_trino.yaml | 27 +++ .../template/parms/parms_test_execution.sql | 1 + .../profiling/functional_datatype.sql | 1 + .../profiling/profile_anomaly_scoring.sql | 10 ++ .../profile_anomaly_scoring_rollup.sql | 109 ++++++++++++ .../profiling/profile_anomaly_types_get.sql | 2 +- .../ex_get_test_column_list_tg.sql | 25 ++- .../ex_write_test_val_errors.sql | 4 +- 25 files changed, 913 insertions(+), 154 deletions(-) create mode 100644 testgen/template/dbupgrade/0120_incremental_upgrade.sql create mode 100644 testgen/template/execution/ex_update_test_suite.sql create mode 100644 testgen/template/execution/test_scoring_rollup.sql create mode 100644 testgen/template/profiling/profile_anomaly_scoring.sql create mode 100644 testgen/template/profiling/profile_anomaly_scoring_rollup.sql diff --git a/testgen/commands/queries/execute_cat_tests_query.py b/testgen/commands/queries/execute_cat_tests_query.py index ac905d3..89e8ff8 100644 --- a/testgen/commands/queries/execute_cat_tests_query.py +++ b/testgen/commands/queries/execute_cat_tests_query.py @@ -12,6 +12,7 @@ class CCATExecutionSQL: test_suite = "" run_date = "" test_run_id = "" + table_groups_id = "" max_query_chars = "" exception_message = "" @@ -39,6 +40,7 @@ def _ReplaceParms(self, strInputString): strInputString = strInputString.replace("{PROJECT_CODE}", self.project_code) strInputString = strInputString.replace("{TEST_SUITE}", self.test_suite) strInputString = strInputString.replace("{TEST_SUITE_ID}", self.test_suite_id) + strInputString = strInputString.replace("{TABLE_GROUPS_ID}", self.table_groups_id) # NOTE: REPLACE_QC_SCHEMA is parm replaced to run build query: sets the actual value to replace. # DATA_QC_SCHEMA is parm in cat_test_conditions that build query replaces via SQL. strInputString = strInputString.replace("{REPLACE_QC_SCHEMA}", self.replace_qc_schema) @@ -99,3 +101,12 @@ def FinalizeTestResultsSQL(self): def PushTestRunStatusUpdateSQL(self): strQ = self._ReplaceParms(read_template_sql_file("ex_update_test_record_in_testrun_table.sql", "execution")) return strQ + + def FinalizeTestSuiteUpdateSQL(self): + strQ = self._ReplaceParms(read_template_sql_file("ex_update_test_suite.sql", "execution")) + return strQ + + + def TestScoringRollupSQL(self): + strQ = self._ReplaceParms(read_template_sql_file("test_scoring_rollup.sql", "execution")) + return strQ diff --git a/testgen/commands/queries/profiling_query.py b/testgen/commands/queries/profiling_query.py index 84cc50f..ed35c0c 100644 --- a/testgen/commands/queries/profiling_query.py +++ b/testgen/commands/queries/profiling_query.py @@ -144,11 +144,16 @@ def GetPIIFlagUpdateQuery(self): strQ = self.ReplaceParms(read_template_sql_file("pii_flag.sql", sub_directory="profiling")) return strQ - def GetAnomalyRefreshQuery(self): + def GetAnomalyStatsRefreshQuery(self): # Runs on DK Postgres Server strQ = self.ReplaceParms(read_template_sql_file("refresh_anomalies.sql", sub_directory="profiling")) return strQ + def GetAnomalyScoringRollupQuery(self): + # Runs on DK Postgres Server + strQ = self.ReplaceParms(read_template_sql_file("profile_anomaly_scoring_rollup.sql", sub_directory="profiling")) + return strQ + def GetAnomalyTestTypesQuery(self): # Runs on DK Postgres Server strQ = self.ReplaceParms(read_template_sql_file("profile_anomaly_types_get.sql", sub_directory="profiling")) @@ -178,6 +183,16 @@ def GetAnomalyTestQuery(self, dct_test_type): return strQ + def GetAnomalyScoringQuery(self, dct_test_type): + # Runs on DK Postgres Server + strQ = read_template_sql_file("profile_anomaly_scoring.sql", sub_directory="profiling") + if strQ: + strQ = strQ.replace("{PROFILE_RUN_ID}", self.profile_run_id) + strQ = strQ.replace("{ANOMALY_ID}", dct_test_type["id"]) + strQ = strQ.replace("{PREV_FORMULA}", dct_test_type["dq_score_prevalence_formula"]) + strQ = strQ.replace("{RISK}", dct_test_type["dq_score_risk_factor"]) + return strQ + def GetDataCharsRefreshQuery(self): # Runs on DK Postgres Server strQ = self.ReplaceParms( diff --git a/testgen/commands/run_execute_cat_tests.py b/testgen/commands/run_execute_cat_tests.py index 9ca8de5..23e20a5 100644 --- a/testgen/commands/run_execute_cat_tests.py +++ b/testgen/commands/run_execute_cat_tests.py @@ -61,7 +61,10 @@ def ParseCATResults(clsCATExecute): def FinalizeTestRun(clsCATExecute): - lstQueries = [clsCATExecute.FinalizeTestResultsSQL(), clsCATExecute.PushTestRunStatusUpdateSQL()] + lstQueries = [clsCATExecute.FinalizeTestResultsSQL(), + clsCATExecute.PushTestRunStatusUpdateSQL(), + clsCATExecute.FinalizeTestSuiteUpdateSQL(), + clsCATExecute.TestScoringRollupSQL()] RunActionQueryList(("DKTG"), lstQueries) @@ -80,6 +83,7 @@ def run_cat_test_queries( ) clsCATExecute.test_run_id = strTestRunID clsCATExecute.run_date = strTestTime + clsCATExecute.table_groups_id = dctParms["table_groups_id"] clsCATExecute.exception_message += error_msg # Set Project Connection Params in common.db_bridgers from retrieved params diff --git a/testgen/commands/run_profiling_bridge.py b/testgen/commands/run_profiling_bridge.py index c141c76..4dd42b3 100644 --- a/testgen/commands/run_profiling_bridge.py +++ b/testgen/commands/run_profiling_bridge.py @@ -29,10 +29,8 @@ def InitializeProfilingSQL(strProject, strSQLFlavor): return CProfilingSQL(strProject, strSQLFlavor) -def CompileAnomalyTestQueries(clsProfiling): - str_query = clsProfiling.GetAnomalyTestTypesQuery() - lst_tests = RetrieveDBResultsToDictList("DKTG", str_query) - +def CompileAnomalyTestQueries(clsProfiling, lst_tests): + # Get queries for each test lst_queries = [] for dct_test_type in lst_tests: str_query = clsProfiling.GetAnomalyTestQuery(dct_test_type) @@ -42,6 +40,18 @@ def CompileAnomalyTestQueries(clsProfiling): return lst_queries +def CompileAnomalyScoringQueries(clsProfiling, lst_tests): + # Get queries for each test + lst_queries = [] + for dct_test_type in lst_tests: + if dct_test_type["dq_score_prevalence_formula"]: + str_query = clsProfiling.GetAnomalyScoringQuery(dct_test_type) + if str_query: + lst_queries.append(str_query) + + return lst_queries + + def save_contingency_rules(df_merged, threshold_ratio): # Prep rows to save lst_rules = [] @@ -434,6 +444,7 @@ def run_profiling_queries(strTableGroupsID, spinner=None): LOG.info("CurrentStep: Generating profiling update queries") lstQueries = [] + lstAnomalyTypes = [] if lstUpdates: # Run single update query, then delete from staging @@ -451,9 +462,14 @@ def run_profiling_queries(strTableGroupsID, spinner=None): lstQueries.append(strQuery) strQuery = clsProfiling.GetPIIFlagUpdateQuery() lstQueries.append(strQuery) - lstQueries.extend(CompileAnomalyTestQueries(clsProfiling)) - strQuery = clsProfiling.GetAnomalyRefreshQuery() + + strQuery = clsProfiling.GetAnomalyTestTypesQuery() + lstAnomalyTypes = RetrieveDBResultsToDictList("DKTG", strQuery) + lstQueries.extend(CompileAnomalyTestQueries(clsProfiling, lstAnomalyTypes)) + lstQueries.extend(CompileAnomalyScoringQueries(clsProfiling, lstAnomalyTypes)) + strQuery = clsProfiling.GetAnomalyStatsRefreshQuery() lstQueries.append(strQuery) + # Always runs last strQuery = clsProfiling.GetDataCharsRefreshQuery() lstQueries.append(strQuery) @@ -475,6 +491,7 @@ def run_profiling_queries(strTableGroupsID, spinner=None): finally: LOG.info("Updating the profiling run record") lstProfileRunQuery = [clsProfiling.GetProfileRunInfoRecordUpdateQuery()] + lstProfileRunQuery.append(clsProfiling.GetAnomalyScoringRollupQuery()) RunActionQueryList("DKTG", lstProfileRunQuery) if booErrors: str_error_status = "with errors. Check log for details." diff --git a/testgen/commands/run_test_parameter_validation.py b/testgen/commands/run_test_parameter_validation.py index 8e93148..f93ac32 100644 --- a/testgen/commands/run_test_parameter_validation.py +++ b/testgen/commands/run_test_parameter_validation.py @@ -65,8 +65,8 @@ def run_parameter_validation_queries( strSchemas = ", ".join([f"'{value}'" for value in setSchemas]) LOG.debug("Test column list successfully retrieved") - # Retrieve Project Column list - LOG.info("CurrentStep: Retrieve Test Columns for Validation") + # Retrieve Current Project Column list + LOG.info("CurrentStep: Retrieve Current Columns for Validation") clsExecute.test_schemas = strSchemas strProjectColumnList = clsExecute.GetProjectTestValidationColumns() if "where table_schema in ()" in strProjectColumnList: @@ -74,9 +74,9 @@ def run_parameter_validation_queries( lstProjectTestColumns = RetrieveDBResultsToDictList("PROJECT", strProjectColumnList) if len(lstProjectTestColumns) == 0: - LOG.info("Project Test Column list is empty") + LOG.info("Current Test Column list is empty") - LOG.debug("Project column list successfully received") + LOG.debug("Current column list successfully received") LOG.info("CurrentStep: Compare column sets") # load results into sets result_set1 = {col.lower() for col, _ in test_columns} @@ -86,7 +86,7 @@ def run_parameter_validation_queries( missing_columns = result_set1.difference(result_set2) if len(missing_columns) == 0: - LOG.info("No missing column in Project Column list.") + LOG.info("No missing column in Current Column list.") if missing_columns: LOG.debug("Test Columns are missing in target database: %s", ", ".join(missing_columns)) @@ -143,7 +143,7 @@ def run_parameter_validation_queries( # when run_parameter_validation_queries() is called from execute_tests_query.py: # we disable tests and write validation errors to test_results table. if booRunFromTestExec: - # Copy test results to DK DB, using temporary flagged -1 value to identify + # Copy test results to DK DB, using temporary flagged D value to identify LOG.info("CurrentStep: Saving error results for invalid tests") strReportValErrors = clsExecute.ReportTestValidationErrors() RunActionQueryList("DKTG", [strReportValErrors]) diff --git a/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql b/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql index f21925e..c0bad4d 100644 --- a/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +++ b/testgen/template/dbsetup/020_create_standard_functions_sprocs.sql @@ -177,3 +177,86 @@ FROM ( ) AS t WHERE trim(value) <> '' $$ LANGUAGE sql; + + +CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.fn_normal_cdf(z_score DOUBLE PRECISION) +RETURNS DOUBLE PRECISION AS +$$ +/* + This function calculates the cumulative distribution function (CDF) + for the standard normal distribution for a given Z-score using + the Abramowitz and Stegun approximation method. It returns the + probability that a standard normal variable is less than or equal + to the given Z-score. + + The approximation formula uses a series expansion to estimate the + CDF, which is accurate for most practical purposes. + + To estimate the count of observations that fall outside a certain Z-score + (both above and below), you can use the `normal_cdf()` function. For a + total number of observations N, the proportion of values outside the Z-score + is given by: 2 * (1 - normal_cdf(ABS(Z))) + + This gives the proportion of values greater than the positive Z-score and + less than the negative Z-score combined. To get the estimated count of + observations, multiply this proportion by N: N * 2 * (1 - normal_cdf(ABS(Z))) +*/ +DECLARE + t DOUBLE PRECISION; + cdf DOUBLE PRECISION; +BEGIN + t := 1.0 / (1.0 + 0.2316419 * ABS(z_score)); + + cdf := (1.0 / SQRT(2 * PI())) * EXP(-0.5 * z_score * z_score) * + (0.319381530 * t + - 0.356563782 * t * t + + 1.781477937 * t * t * t + - 1.821255978 * t * t * t * t + + 1.330274429 * t * t * t * t * t); + + IF z_score >= 0 THEN + RETURN 1.0 - cdf; + ELSE + RETURN cdf; + END IF; +END; +$$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION {SCHEMA_NAME}.fn_eval(expression TEXT) RETURNS FLOAT +AS +$$ +DECLARE + result FLOAT; + invalid_parts TEXT; +BEGIN + -- Check the modified expression for invalid characters, allowing colons + IF expression ~* E'[^0-9+\\-*/(),.\\sA-Z_:e\\\'"]' THEN + RAISE EXCEPTION 'Invalid characters detected in expression: %', expression; + END IF; + + -- Check for dangerous PostgreSQL-specific keywords + IF expression ~* E'\b(DROP|ALTER|INSERT|UPDATE|DELETE|TRUNCATE|GRANT|REVOKE|COPY|EXECUTE|CREATE|COMMENT|SECURITY|WITH|SET ROLE|SET SESSION|DO|CALL|--|/\\*|;|pg_read_file|pg_write_file|pg_terminate_backend)\b' THEN + RAISE EXCEPTION 'Invalid expression: dangerous statement detected'; + END IF; + + -- Remove all allowed tokens from the validation expression, treating 'FLOAT' as a keyword + invalid_parts := regexp_replace( + expression, + E'(\\mGREATEST|LEAST|ABS|FN_NORMAL_CDF|DATEDIFF|DAY|FLOAT)\\M|[0-9]+(\\.[0-9]+)?([eE][+-]?[0-9]+)?|[+\\-*/(),\\\'":]+|\\s+', + '', + 'gi' + ); + + -- If anything is left in the validation expression, it's invalid + IF invalid_parts <> '' THEN + RAISE EXCEPTION 'Invalid expression contains invalid tokens "%" in expression: %', invalid_parts, expression; + END IF; + + -- Use the original expression (with ::FLOAT) for execution + EXECUTE format('SELECT (%s)::FLOAT', expression) INTO result; + + RETURN result; +END; +$$ +LANGUAGE plpgsql; diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index 8c14348..4e6a7be 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -30,13 +30,13 @@ CREATE TABLE stg_functional_table_updates ( ); CREATE TABLE projects ( - id UUID DEFAULT gen_random_uuid(), - project_code VARCHAR(30) NOT NULL + id UUID DEFAULT gen_random_uuid(), + project_code VARCHAR(30) NOT NULL CONSTRAINT projects_project_code_pk PRIMARY KEY, - project_name VARCHAR(50), - effective_from_date DATE, - effective_thru_date DATE, + project_name VARCHAR(50), + effective_from_date DATE, + effective_thru_date DATE, observability_api_key TEXT, observability_api_url TEXT DEFAULT '' ); @@ -94,26 +94,32 @@ CREATE TABLE table_groups source_process VARCHAR(40), business_domain VARCHAR(40), stakeholder_group VARCHAR(40), - transform_level VARCHAR(40) + transform_level VARCHAR(40), + last_complete_profile_run_id UUID, + dq_score_profiling FLOAT, + dq_score_testing FLOAT ); CREATE TABLE profiling_runs ( - id UUID + id UUID CONSTRAINT pk_prun_id PRIMARY KEY, - project_code VARCHAR(30) NOT NULL, - connection_id BIGINT NOT NULL, - table_groups_id UUID NOT NULL, - profiling_starttime TIMESTAMP, - profiling_endtime TIMESTAMP, - status VARCHAR(100) DEFAULT 'Running', - log_message VARCHAR, - table_ct BIGINT, - column_ct BIGINT, - anomaly_ct BIGINT, - anomaly_table_ct BIGINT, - anomaly_column_ct BIGINT, - process_id INTEGER + project_code VARCHAR(30) NOT NULL, + connection_id BIGINT NOT NULL, + table_groups_id UUID NOT NULL, + profiling_starttime TIMESTAMP, + profiling_endtime TIMESTAMP, + status VARCHAR(100) DEFAULT 'Running', + log_message VARCHAR, + table_ct BIGINT, + column_ct BIGINT, + anomaly_ct BIGINT, + anomaly_table_ct BIGINT, + anomaly_column_ct BIGINT, + dq_affected_data_points BIGINT, + dq_total_data_points BIGINT, + dq_score_profiling FLOAT, + process_id INTEGER ); CREATE TABLE test_suites ( @@ -128,16 +134,12 @@ CREATE TABLE test_suites ( test_action VARCHAR(100), severity VARCHAR(10), export_to_observability VARCHAR(5) DEFAULT 'Y', --- email_list VARCHAR(200), --- email_slack VARCHAR(100), --- wiki_link VARCHAR(200), --- variation_link VARCHAR(200), --- wiki_page_id BIGINT, --- confluence_space VARCHAR(10), test_suite_schema VARCHAR(100), component_key VARCHAR(100), component_type VARCHAR(100), component_name VARCHAR(100), + last_complete_test_run_id UUID, + dq_score_exclude BOOLEAN default FALSE, CONSTRAINT test_suites_id_pk PRIMARY KEY (id) ); @@ -230,6 +232,10 @@ CREATE TABLE profile_results ( filled_value_ct BIGINT, min_text VARCHAR(1000), max_text VARCHAR(1000), + upper_case_ct BIGINT, + lower_case_ct BIGINT, + non_alpha_ct BIGINT, + mixed_case_ct BIGINT GENERATED ALWAYS AS ( value_ct - upper_case_ct - lower_case_ct - non_alpha_ct ) STORED, numeric_ct BIGINT, date_ct BIGINT, top_patterns VARCHAR(1000), @@ -249,9 +255,11 @@ CREATE TABLE profile_results ( before_1yr_date_ct BIGINT, before_5yr_date_ct BIGINT, before_20yr_date_ct BIGINT, + before_100yr_date_ct BIGINT, within_1yr_date_ct BIGINT, within_1mo_date_ct BIGINT, future_date_ct BIGINT, + distant_future_date_ct BIGINT, date_days_present BIGINT, date_weeks_present BIGINT, date_months_present BIGINT, @@ -275,13 +283,15 @@ CREATE TABLE profile_anomaly_types ( CONSTRAINT pk_anomaly_types_id PRIMARY KEY, anomaly_type VARCHAR(200) NOT NULL, - data_object VARCHAR(10), -- Table, Dates, Column + data_object VARCHAR(10), -- Column, Multi-Col, Dates, Variant anomaly_name VARCHAR(100), anomaly_description VARCHAR(500), anomaly_criteria VARCHAR(2000), detail_expression VARCHAR(2000), issue_likelihood VARCHAR(50), -- Potential, Likely, Certain - suggested_action VARCHAR(1000) -- Consider, Investigate, Correct + suggested_action VARCHAR(1000), + dq_score_prevalence_formula TEXT, + dq_score_risk_factor TEXT ); CREATE TABLE profile_anomaly_results ( @@ -298,7 +308,8 @@ CREATE TABLE profile_anomaly_results ( column_type VARCHAR(50), anomaly_id VARCHAR(10), detail VARCHAR, - disposition VARCHAR(20) -- Confirmed, Dismissed, Inactive + disposition VARCHAR(20), -- Confirmed, Dismissed, Inactive + dq_prevalence FLOAT ); @@ -350,7 +361,10 @@ CREATE TABLE data_table_chars ( drop_date TIMESTAMP, record_ct BIGINT, column_ct BIGINT, - data_point_ct BIGINT + data_point_ct BIGINT, + last_complete_profile_run_id UUID, + dq_score_profiling FLOAT, + dq_score_testing FLOAT ); CREATE TABLE data_column_chars ( @@ -384,7 +398,10 @@ CREATE TABLE data_column_chars ( fails_30_days_prior INTEGER, warnings_last_run INTEGER, warnings_7_days_prior INTEGER, - warnings_30_days_prior INTEGER + warnings_30_days_prior INTEGER, + last_complete_profile_run_id UUID, + dq_score_profiling FLOAT, + dq_score_testing FLOAT ); CREATE TABLE test_types ( @@ -399,6 +416,8 @@ CREATE TABLE test_types ( measure_uom VARCHAR(100), measure_uom_description VARCHAR(200), selection_criteria TEXT, + dq_score_prevalence_formula TEXT, + dq_score_risk_factor TEXT, column_name_prompt TEXT, column_name_help TEXT, default_parm_columns TEXT, @@ -434,25 +453,28 @@ CREATE TABLE generation_sets ( ); CREATE TABLE test_runs ( - id UUID NOT NULL + id UUID NOT NULL CONSTRAINT test_runs_id_pk PRIMARY KEY, - test_suite_id UUID NOT NULL, - test_starttime TIMESTAMP, - test_endtime TIMESTAMP, - status VARCHAR(100) DEFAULT 'Running', - log_message TEXT, - duration VARCHAR(50), - test_ct INTEGER, - passed_ct INTEGER, - failed_ct INTEGER, - warning_ct INTEGER, - error_ct INTEGER, - table_ct INTEGER, - column_ct INTEGER, - column_failed_ct INTEGER, - column_warning_ct INTEGER, - process_id INTEGER, + test_suite_id UUID NOT NULL, + test_starttime TIMESTAMP, + test_endtime TIMESTAMP, + status VARCHAR(100) DEFAULT 'Running', + log_message TEXT, + duration VARCHAR(50), + test_ct INTEGER, + passed_ct INTEGER, + failed_ct INTEGER, + warning_ct INTEGER, + error_ct INTEGER, + table_ct INTEGER, + column_ct INTEGER, + column_failed_ct INTEGER, + column_warning_ct INTEGER, + dq_affected_data_points BIGINT, + dq_total_data_points BIGINT, + dq_score_test_run FLOAT, + process_id INTEGER, CONSTRAINT test_runs_test_suites_fk FOREIGN KEY (test_suite_id) REFERENCES test_suites ); @@ -488,6 +510,8 @@ CREATE TABLE test_results ( test_description VARCHAR(1000), test_run_id UUID NOT NULL, table_groups_id UUID, + dq_prevalence FLOAT, + dq_record_ct BIGINT, observability_status VARCHAR(10), CONSTRAINT test_results_test_suites_project_code_test_suite_fk FOREIGN KEY (test_suite_id) REFERENCES test_suites diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql index c4ea048..2524edc 100644 --- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql +++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql @@ -12,15 +12,16 @@ ALTER TABLE cat_test_conditions DROP CONSTRAINT cat_test_conditions_cat_tests_te TRUNCATE TABLE profile_anomaly_types; -INSERT INTO profile_anomaly_types (id, anomaly_type, data_object, anomaly_name, anomaly_description, anomaly_criteria, detail_expression, issue_likelihood, suggested_action) +INSERT INTO profile_anomaly_types + (id, anomaly_type, data_object, anomaly_name, anomaly_description, anomaly_criteria, detail_expression, issue_likelihood, suggested_action, dq_score_prevalence_formula, dq_score_risk_factor) VALUES ('1001', 'Suggested_Type', 'Column', 'Suggested Data Type', 'Data stored as text all meets criteria for a more suitable type. ', '(functional_data_type NOT IN (''Boolean'', ''Flag'') ) AND (column_type ILIKE ''%ch ar%'' OR column_type ILIKE ''text'') AND NOT (datatype_suggestion ILIKE ''%char%'' OR datatype_suggestion ILIKE ''text'')', 'p.datatype_suggestion::VARCHAR(200)', 'Likely', 'Consider changing the column data type to tighte -n controls over data ingested and to make values more efficient, consistent and suitable for downstream analysis.'), - ('1002', 'Non_Standard_Blanks', 'Column', 'Non-Standard Blank Values', 'Values representing missing data may be unexpected or inconsistent. Non-standard values may include empty strings as opposed to nulls, dummy entries such as "MISSING" or repeated characters that may have been used to bypass entry requirements, processing artifacts such as "NULL", or spreadsheet artifacts such as "NA", "ERROR".', '(p.filled_value_ct > 0 OR p.zero_length_ct > 0)', '''Filled Values: '' || p.filled_value_ct::VARCHAR || '', Empty String: '' || p.zero_length_ct::VARCHAR || '', Null: '' || p.null_value_ct::VARCHAR || '', Records: '' || p.record_ct::VARCHAR', 'Definite', 'Consider cleansing the column upon ingestion to replace all variants of missing data with a standard designation, like Null.'), - ('1003', 'Invalid_Zip_USA', 'Column', 'Invalid USA Zip Code Format', 'Some values present do not conform with the expected format of USA Zip Codes.', 'p.std_pattern_match = ''ZIP_USA'' AND (p.general_type <> ''A'' OR p.filled_value_ct > 0 OR p.min_length >= 1 AND p.min_length <= 4 OR p.max_length > 10)', 'CASE WHEN p.general_type = ''N'' THEN ''Type: '' || p.column_type || '', '' ELSE '''' END || ''Min Length: '' || p.min_length::VARCHAR || '', Max Length: '' || p.max_length::VARCHAR || '', Filled Values: '' || p.filled_value_ct::VARCHAR', 'Definite', 'Consider correcting invalid column values or changing them to indicate a missing value if corrections cannot be made.'), - ('1004', 'Multiple_Types_Minor', 'Multi-Col', 'Multiple Data Types per Column Name - Minor', 'Columns with the same name have the same general type across tables, but the types do not exactly match. Truncation issues may result if columns are commingled and assumed to be the same format.', 'm.general_type_ct = 1 AND m.type_ct > 1', '''Found '' || m.column_ct::VARCHAR || '' columns, '' || m.type_ct::VARCHAR(10) || '' types, '' || m.min_type || '' to '' || m.max_type', 'Possible', 'Consider changing the column data types to be fully consistent. This will tighten your standards at ingestion and assure that data is consistent between tables.'), - ('1005', 'Multiple_Types_Major', 'Multi-Col', 'Multiple Data Types per Column Name - Major', 'Columns with the same name have broadly different types across tables. Differences could be significant enough to cause errors in downstream analysis, extra steps resulting in divergent business logic and inconsistencies in results.', 'm.general_type_ct > 1', '''Found '' || m.column_ct::VARCHAR || '' columns, '' || m.type_ct::VARCHAR(10) || '' types, '' || m.min_type || '' to '' || m.max_type', 'Likely', 'Ideally, you should change the column data types to be fully consistent. If the data is meant to be different, you should change column names so downstream users aren''t led astray.'), - ('1006', 'No_Values', 'Column', 'No Column Values Present', 'This column is present in the table, but no values have been ingested or assigned in any records. This could indicate missing data or a processing error. Note that this considers dummy values and zero-length values as missing data. ', '(p.null_value_ct + p.filled_value_ct + p.zero_length_ct) = p.record_ct', '''Null: '' || p.null_value_ct::VARCHAR(10) || '', Filled: '' || p.filled_value_ct::VARCHAR(10) || '', Zero Len: '' || p.zero_length_ct::VARCHAR(10)', 'Possible', 'Review your source data, ingestion process, and any processing steps that update this column.'), +n controls over data ingested and to make values more efficient, consistent and suitable for downstream analysis.', NULL, NULL), + ('1002', 'Non_Standard_Blanks', 'Column', 'Non-Standard Blank Values', 'Values representing missing data may be unexpected or inconsistent. Non-standard values may include empty strings as opposed to nulls, dummy entries such as "MISSING" or repeated characters that may have been used to bypass entry requirements, processing artifacts such as "NULL", or spreadsheet artifacts such as "NA", "ERROR".', '(p.filled_value_ct > 0 OR p.zero_length_ct > 0)', '''Filled Values: '' || p.filled_value_ct::VARCHAR || '', Empty String: '' || p.zero_length_ct::VARCHAR || '', Null: '' || p.null_value_ct::VARCHAR || '', Records: '' || p.record_ct::VARCHAR', 'Definite', 'Consider cleansing the column upon ingestion to replace all variants of missing data with a standard designation, like Null.', 'p.filled_value_ct::FLOAT/p.record_ct::FLOAT', '1.0'), + ('1003', 'Invalid_Zip_USA', 'Column', 'Invalid USA Zip Code Format', 'Some values present do not conform with the expected format of USA Zip Codes.', 'p.std_pattern_match = ''ZIP_USA'' AND (p.general_type <> ''A'' OR p.filled_value_ct > 0 OR p.min_length >= 1 AND p.min_length <= 4 OR p.max_length > 10)', 'CASE WHEN p.general_type = ''N'' THEN ''Type: '' || p.column_type || '', '' ELSE '''' END || ''Min Length: '' || p.min_length::VARCHAR || '', Max Length: '' || p.max_length::VARCHAR || '', Filled Values: '' || p.filled_value_ct::VARCHAR', 'Definite', 'Consider correcting invalid column values or changing them to indicate a missing value if corrections cannot be made.', NULL, '1.0'), + ('1004', 'Multiple_Types_Minor', 'Multi-Col', 'Multiple Data Types per Column Name - Minor', 'Columns with the same name have the same general type across tables, but the types do not exactly match. Truncation issues may result if columns are commingled and assumed to be the same format.', 'm.general_type_ct = 1 AND m.type_ct > 1', '''Found '' || m.column_ct::VARCHAR || '' columns, '' || m.type_ct::VARCHAR(10) || '' types, '' || m.min_type || '' to '' || m.max_type', 'Possible', 'Consider changing the column data types to be fully consistent. This will tighten your standards at ingestion and assure that data is consistent between tables.', NULL, NULL), + ('1005', 'Multiple_Types_Major', 'Multi-Col', 'Multiple Data Types per Column Name - Major', 'Columns with the same name have broadly different types across tables. Differences could be significant enough to cause errors in downstream analysis, extra steps resulting in divergent business logic and inconsistencies in results.', 'm.general_type_ct > 1', '''Found '' || m.column_ct::VARCHAR || '' columns, '' || m.type_ct::VARCHAR(10) || '' types, '' || m.min_type || '' to '' || m.max_type', 'Likely', 'Ideally, you should change the column data types to be fully consistent. If the data is meant to be different, you should change column names so downstream users aren''t led astray.', NULL, NULL), + ('1006', 'No_Values', 'Column', 'No Column Values Present', 'This column is present in the table, but no values have been ingested or assigned in any records. This could indicate missing data or a processing error. Note that this considers dummy values and zero-length values as missing data. ', '(p.null_value_ct + p.filled_value_ct + p.zero_length_ct) = p.record_ct', '''Null: '' || p.null_value_ct::VARCHAR(10) || '', Filled: '' || p.filled_value_ct::VARCHAR(10) || '', Zero Len: '' || p.zero_length_ct::VARCHAR(10)', 'Possible', 'Review your source data, ingestion process, and any processing steps that update this column.', '1.0', '0.33'), ('1007', 'Column_Pattern_Mismatch', 'Column', 'Pattern Inconsistency Within Column', 'Alpha-numeric string data within this column conforms to 2-4 different patterns, with 95% matching the first pattern. This could indicate data errors in the remaining values. ', 'p.general_type = ''A'' AND p.max_length > 3 AND p.value_ct > (p.numeric_ct + p.filled_value_ct) @@ -31,127 +32,121 @@ n controls over data ingested and to make values more efficient, consistent and AND SPLIT_PART(p.top_patterns, ''|'', 3)::NUMERIC / SPLIT_PART(p.top_patterns, ''|'', 1)::NUMERIC < 0.05) OR SPLIT_PART(p.top_patterns, ''|'', 3)::NUMERIC / SPLIT_PART(p.top_patterns, ''|'', 1)::NUMERIC < 0.1 - )', '''Patterns: '' || p.top_patterns', 'Likely', 'Review the values for any data that doesn''t conform to the most common pattern and correct any data errors.'), + )', '''Patterns: '' || p.top_patterns', 'Likely', 'Review the values for any data that doesn''t conform to the most common pattern and correct any data errors.', '(p.record_ct - SPLIT_PART(p.top_patterns, ''|'', 1)::INT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.66'), ('1008', 'Table_Pattern_Mismatch', 'Multi-Col', 'Pattern Inconsistency Across Tables', 'Alpha-numeric string data within this column matches a single pattern, but other columns with the same name have data that matches a different single pattern. Inconsistent formatting may contradict user assumptions and cause downstream errors, extra steps and inconsistent business logic.', 'p.general_type = ''A'' AND p.max_length > 3 AND p.value_ct > (p.numeric_ct + p.filled_value_ct) AND m.max_pattern_ct = 1 AND m.column_ct > 1 AND SPLIT_PART(p.top_patterns, ''|'', 2) <> SPLIT_PART(m.very_top_pattern, ''|'', 2) - AND SPLIT_PART(p.top_patterns, ''|'', 1)::NUMERIC / SPLIT_PART(m.very_top_pattern, ''|'', 1)::NUMERIC < 0.1', '''Patterns: '' || SPLIT_PART(p.top_patterns, ''|'', 2) || '', '' || SPLIT_PART(ltrim(m.very_top_pattern, ''0''), ''|'', 2)', 'Likely', 'Review the profiled patterns for the same column in other tables. You may want to add a hygiene step to your processing to make patterns consistent.'), - ('1009', 'Leading_Spaces', 'Column', 'Leading Spaces Found in Column Values', 'Spaces were found before data at the front of column string values. This likely contradicts user expectations and could be a sign of broader ingestion or processing errors.', 'p.lead_space_ct > 0', '''Cases Found: '' || p.lead_space_ct::VARCHAR(10)', 'Likely', 'Review your source data, ingestion process, and any processing steps that update this column.'), - ('1010', 'Quoted_Values', 'Column', 'Quoted Values Found in Column Values', 'Column values were found within quotes. This likely contradicts user expectations and could be a sign of broader ingestion or processing errors.', 'p.quoted_value_ct > 0', '''Cases Found: '' || p.quoted_value_ct::VARCHAR(10)', 'Likely', 'Review your source data, ingestion process, and any processing steps that update this column.'), + AND SPLIT_PART(p.top_patterns, ''|'', 1)::NUMERIC / SPLIT_PART(m.very_top_pattern, ''|'', 1)::NUMERIC < 0.1', '''Patterns: '' || SPLIT_PART(p.top_patterns, ''|'', 2) || '', '' || SPLIT_PART(ltrim(m.very_top_pattern, ''0''), ''|'', 2)', 'Likely', 'Review the profiled patterns for the same column in other tables. You may want to add a hygiene step to your processing to make patterns consistent.', NULL, NULL), + ('1009', 'Leading_Spaces', 'Column', 'Leading Spaces Found in Column Values', 'Spaces were found before data at the front of column string values. This likely contradicts user expectations and could be a sign of broader ingestion or processing errors.', 'p.lead_space_ct > 0', '''Cases Found: '' || p.lead_space_ct::VARCHAR(10)', 'Likely', 'Review your source data, ingestion process, and any processing steps that update this column.', 'p.lead_space_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.66'), + ('1010', 'Quoted_Values', 'Column', 'Quoted Values Found in Column Values', 'Column values were found within quotes. This likely contradicts user expectations and could be a sign of broader ingestion or processing errors.', 'p.quoted_value_ct > 0', '''Cases Found: '' || p.quoted_value_ct::VARCHAR(10)', 'Likely', 'Review your source data, ingestion process, and any processing steps that update this column.', 'p.quoted_value_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.66'), ('1011', 'Char_Column_Number_Values', 'Column', 'Character Column with Mostly Numeric Values', 'This column is defined as alpha, but more than 95% of its values are numeric. Numbers in alpha columns won''t sort correctly, and might contradict user expectations downstream. It''s also possible that more than one type of information is stored in the column, making it harder to retrieve.', 'p.general_type = ''A'' AND p.column_name NOT ILIKE ''%zip%'' AND p.functional_data_type NOT ILIKE ''id%'' AND p.value_ct > p.numeric_ct - AND p.numeric_ct::NUMERIC > (0.95 * p.value_ct::NUMERIC)', '''Numeric Ct: '' || p.numeric_ct || '' of '' || p.value_ct || '' (Numeric Percent: '' || ROUND(100.0 * p.numeric_ct::NUMERIC(18, 5) / p.value_ct::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)', 'Likely', 'Review your source data and ingestion process. Consider whether it might be better to store the numeric data in a numeric column. If the alpha data is significant, you could store it in a different column.'), + AND p.numeric_ct::NUMERIC > (0.95 * p.value_ct::NUMERIC)', '''Numeric Ct: '' || p.numeric_ct || '' of '' || p.value_ct || '' (Numeric Percent: '' || ROUND(100.0 * p.numeric_ct::NUMERIC(18, 5) / p.value_ct::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)', 'Likely', 'Review your source data and ingestion process. Consider whether it might be better to store the numeric data in a numeric column. If the alpha data is significant, you could store it in a different column.', 'p.numeric_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.66'), ('1012', 'Char_Column_Date_Values', 'Column', 'Character Column with Mostly Date Values', 'This column is defined as alpha, but more than 95% of its values are dates. Dates in alpha columns might not sort correctly, and might contradict user expectations downstream. It''s also possible that more than one type of information is stored in the column, making it harder to retrieve. ', 'p.general_type = ''A'' AND p.value_ct > p.date_ct - AND p.date_ct::NUMERIC > (0.95 * p.value_ct::NUMERIC)', ''' Date Ct: '' || p.date_ct || '' of '' || p.value_ct || '' (Date Percent: '' || ROUND(100.0 * p.date_ct::NUMERIC(18, 5) / p.value_ct::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)' , 'Likely', 'Review your source data and ingestion process. Consider whether it might be better to store the date values as a date or datetime column. If the alpha data is also significant, you could store it in a different column.'), + AND p.date_ct::NUMERIC > (0.95 * p.value_ct::NUMERIC)', ''' Date Ct: '' || p.date_ct || '' of '' || p.value_ct || '' (Date Percent: '' || ROUND(100.0 * p.date_ct::NUMERIC(18, 5) / p.value_ct::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)', 'Likely', 'Review your source data and ingestion process. Consider whether it might be better to store the date values as a date or datetime column. If the alpha data is also significant, you could store it in a different column.', 'p.date_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.66'), ('1013', 'Small Missing Value Ct', 'Column', 'Small Percentage of Missing Values Found', 'Under 3% of values in this column were found to be null, zero-length or dummy values, but values are not universally present. This could indicate unexpected missing values in a required column.', '(p.value_ct - p.zero_length_ct - p.filled_value_ct)::FLOAT / p.record_ct::FLOAT > 0.97 AND (p.value_ct - p.zero_length_ct - p.filled_value_ct) < p.record_ct', '(p.record_ct - (p.value_ct - p.zero_length_ct - p.filled_value_ct))::VARCHAR(20) || '' of '' || p.record_ct::VARCHAR(20) || '' blank values: '' || ROUND(100.0 * (p.record_ct - (p.value_ct - p.zero_length_ct - p.filled_value_ct))::NUMERIC(18, 5) - / NULLIF(p.value_ct, 0)::NUMERIC(18, 5), 2)::VARCHAR(40) || ''%''', 'Possible', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected, supplemented or excluded.'), + / NULLIF(p.value_ct, 0)::NUMERIC(18, 5), 2)::VARCHAR(40) || ''%''', 'Possible', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected, supplemented or excluded.', '(p.null_value_ct + filled_value_ct + zero_length_ct)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.33'), ('1014', 'Small Divergent Value Ct', 'Column', 'Small Percentage of Divergent Values Found', 'Under 3% of values in this column were found to be different from the most common value. This could indicate a data error.', '(100.0 * fn_parsefreq(p.top_freq_values, 1, 2)::FLOAT / p.value_ct::FLOAT) > 97::FLOAT AND (100.0 * fn_parsefreq(p.top_freq_values, 1, 2)::FLOAT / NULLIF(p.value_ct, 0)::FLOAT) < 100::FLOAT', '''Single Value Pct: '' || ROUND(100.0 * fn_parsefreq(p.top_freq_values, 1, 2)::FLOAT / NULLIF(p.value_ct, 0)::FLOAT)::VARCHAR(40) - || '', Value | Freq: '' || top_freq_values', 'Possible', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected.'), + || '', Value | Freq: '' || top_freq_values', 'Possible', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected.', '(p.record_ct - fn_parsefreq(p.top_freq_values, 1, 2)::INT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.33'), ('1015', 'Boolean_Value_Mismatch', 'Column', 'Unexpected Boolean Values Found', 'This column appears to contain boolean (True/False) data, but unexpected values were found. This could indicate inconsistent coding for the same intended values, potentially leading to downstream errors or inconsistent business logic. ', '(distinct_value_ct > 1 AND ((lower(top_freq_values) ILIKE ''| true |%'' OR lower(top_freq_values) ILIKE ''| false |%'') AND NOT (lower(top_freq_values) ILIKE ''%| true |%'' AND lower(top_freq_values) ILIKE ''%| false |%'')) OR ((lower(top_freq_values) ILIKE ''| yes |%'' OR lower(top_freq_values) ILIKE ''| no |%'' ) AND NOT (lower(top_freq_values) ILIKE ''%| yes |%'' AND lower(top_freq_values) ILIKE ''%| no |%'')) )', 'CASE WHEN p.top_freq_values IS NULL THEN ''Min: '' || p.min_text || '', Max: '' || p.max_text - ELSE ''Top Freq: '' || p.top_freq_values END', 'Likely', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected. '), + ELSE ''Top Freq: '' || p.top_freq_values END', 'Likely', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected. ', NULL, '0.66'), ('1016', 'Potential_Duplicates', 'Column', 'Potential Duplicate Values Found', 'This column is largely unique, but some duplicate values are present. This pattern is uncommon and could indicate inadvertant duplication. ', 'p.distinct_value_ct > 1000 - AND fn_parsefreq(p.top_freq_values, 1, 2)::BIGINT BETWEEN 2 AND 4', '''Top Freq: '' || p.top_freq_values', 'Possible', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected. '), + AND fn_parsefreq(p.top_freq_values, 1, 2)::BIGINT BETWEEN 2 AND 4', '''Top Freq: '' || p.top_freq_values', 'Possible', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected. ', '(p.value_ct - p.distinct_value_ct)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.33'), ('1017', 'Standardized_Value_Matches', 'Column', 'Similar Values Match When Standardized', 'When column values are standardized (removing spaces, single-quotes, periods and dashes), matching values are found in other records. This may indicate that formats should be further standardized to allow consistent comparisons for merges, joins and roll-ups. It could also indicate the presence of unintended duplicates.', 'p.general_type = ''A'' AND p.distinct_std_value_ct <> p.distinct_value_ct', '''Distinct Values: '' || p.distinct_value_ct::VARCHAR - || '', Standardized: '' || p.distinct_std_value_ct::VARCHAR', 'Likely', 'Review standardized vs. raw data values for all matches. Correct data if values should be consistent.'), + || '', Standardized: '' || p.distinct_std_value_ct::VARCHAR', 'Likely', 'Review standardized vs. raw data values for all matches. Correct data if values should be consistent.', '(p.distinct_value_ct - p.distinct_std_value_ct)::FLOAT/NULLIF(p.value_ct, 0)', '0.66'), ('1018', 'Unlikely_Date_Values', 'Column', 'Unlikely Dates out of Typical Range', 'Some date values in this column are earlier than 1900-01-01 or later than 30 years after Profiling date.', 'p.general_type = ''D'' AND (p.min_date BETWEEN ''0001-01-02''::DATE AND ''1900-01-01''::DATE - OR p.max_date > CURRENT_DATE + INTERVAL ''30 year'')', '''Date Range: '' || p.min_date::VARCHAR || '' thru '' || p.max_date::VARCHAR', 'Likely', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected or removed.'), - ('1019', 'Recency_One_Year', 'Dates', 'Recency - No Table Dates within 1 Year', 'Among all date columns present in the table, none fall inside of one year from Profile date.', 'MAX(p.max_date) < CURRENT_DATE - INTERVAL ''1 year''', '''Most Recent Date: '' || MAX(p.max_date)::VARCHAR', 'Possible', 'Review your source data and follow-up with data owners to determine whether dates in table should be more recent.'), - ('1020', 'Recency_Six_Months', 'Dates', 'Recency - No Table Dates within 6 Months', 'Among all date columns present in the table, the most recent date falls 6 months to 1 year back from Profile date. ', 'MAX(p.max_date) >= CURRENT_DATE - INTERVAL ''1 year'' AND MAX(p.max_date) < CURRENT_DATE - INTERVAL ''6 months''', '''Most Recent Date: '' || MAX(p.max_date)::VARCHAR', 'Possible', 'Review your source data and follow-up with data owners to determine whether dates in table should be more recent.'), + OR p.max_date > CURRENT_DATE + INTERVAL ''30 year'')', '''Date Range: '' || p.min_date::VARCHAR || '' thru '' || p.max_date::VARCHAR', 'Likely', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected or removed.', '(COALESCE(p.before_100yr_date_ct,0)+COALESCE(p.distant_future_date_ct, 0))::FLOAT/NULLIF(p.record_ct, 0)', '0.66'), + ('1019', 'Recency_One_Year', 'Dates', 'Recency - No Table Dates within 1 Year', 'Among all date columns present in the table, none fall inside of one year from Profile date.', 'MAX(p.max_date) < CURRENT_DATE - INTERVAL ''1 year''', '''Most Recent Date: '' || MAX(p.max_date)::VARCHAR', 'Possible', 'Review your source data and follow-up with data owners to determine whether dates in table should be more recent.', NULL, NULL), + ('1020', 'Recency_Six_Months', 'Dates', 'Recency - No Table Dates within 6 Months', 'Among all date columns present in the table, the most recent date falls 6 months to 1 year back from Profile date. ', 'MAX(p.max_date) >= CURRENT_DATE - INTERVAL ''1 year'' AND MAX(p.max_date) < CURRENT_DATE - INTERVAL ''6 months''', '''Most Recent Date: '' || MAX(p.max_date)::VARCHAR', 'Possible', 'Review your source data and follow-up with data owners to determine whether dates in table should be more recent.', NULL, NULL), ('1021', 'Unexpected US States', 'Column', 'Unexpected Column Contains US States', 'This column is not labeled as a state, but contains mostly US State abbreviations. This could indicate shifted or switched source data columns.', 'p.std_pattern_match = ''STATE_USA'' AND p.distinct_value_ct > 5 - AND NOT (p.column_name ILIKE ''%state%'' OR p.column_name ILIKE ''%_st'')', '''Value Range: '' || p.min_text || '' thru '' || max_text || CASE WHEN p.top_freq_values > '''' THEN ''Top Freq Values: '' || REPLACE(p.top_freq_values, CHR(10), '' ; '') ELSE '''' END ', 'Possible', 'Review your source data and follow-up with data owners to determine whether column should be populated with US states.'), + AND NOT (p.column_name ILIKE ''%state%'' OR p.column_name ILIKE ''%_st'')', '''Value Range: '' || p.min_text || '' thru '' || max_text || CASE WHEN p.top_freq_values > '''' THEN ''Top Freq Values: '' || REPLACE(p.top_freq_values, CHR(10), '' ; '') ELSE '''' END ', 'Possible', 'Review your source data and follow-up with data owners to determine whether column should be populated with US states.', NULL, '0.33'), ('1022', 'Unexpected Emails', 'Column', 'Unexpected Column Contains Emails', 'This column is not labeled as email, but contains mostly email addresses. This could indicate shifted or switched source data columns.', 'p.std_pattern_match = ''EMAIL'' - AND NOT (p.column_name ILIKE ''%email%'' OR p.column_name ILIKE ''%addr%'')', '''Value Range: '' || p.min_text || '' thru '' || max_text', 'Possible', 'Review your source data and follow-up with data owners to determine whether column should be populated with email addresses.'), - ('1023', 'Small_Numeric_Value_Ct', 'Column', 'Unexpected Numeric Values Found', - 'Under 3% of values in this column were found to be numeric. This could indicate a data error.', e'p.general_type = \'A\' + AND NOT (p.column_name ILIKE ''%email%'' OR p.column_name ILIKE ''%addr%'')', '''Value Range: '' || p.min_text || '' thru '' || max_text', 'Possible', 'Review your source data and follow-up with data owners to determine whether column should be populated with email addresses.', NULL, '0.33'), + ('1023', 'Small_Numeric_Value_Ct', 'Column', 'Unexpected Numeric Values Found', 'Under 3% of values in this column were found to be numeric. This could indicate a data error.', 'p.general_type = ''A'' AND p.numeric_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT < 0.03 - AND p.numeric_ct > 0', - '''Numeric Ct: '' || p.numeric_ct || '' of '' || p.value_ct || '' (Numeric Percent: '' || ROUND(100.0 * p.numeric_ct::NUMERIC(18, 5)/NULLIF(p.value_ct, 0)::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)', - 'Likely', 'Review your source data and follow-up with data owners to determine whether numeric values are invalid entries here.'), - ('1024', 'Invalid_Zip3_USA', 'Column', 'Invalid USA ZIP-3 Format', 'The majority of values in this column are 3-digit zips, but divergent patterns were found. This could indicate an incorrect roll-up category or a PII concern.', 'p.distinct_pattern_ct > 1 + AND p.numeric_ct > 0', '''Numeric Ct: '' || p.numeric_ct || '' of '' || p.value_ct || '' (Numeric Percent: '' || ROUND(100.0 * p.numeric_ct::NUMERIC(18, 5)/NULLIF(p.value_ct, 0)::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)', 'Likely', 'Review your source data and follow-up with data owners to determine whether numeric values are invalid entries here.', 'p.numeric_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.66'), + ('1024', 'Invalid_Zip3_USA', 'Column', 'Invalid USA ZIP-3 Format', 'The majority of values in this column are 3-digit zips, but divergent patterns were found. This could indicate an incorrect roll-up category or a PII concern.', 'p.distinct_pattern_ct > 1 AND (p.column_name ilike ''%zip%'' OR p.column_name ILIKE ''%postal%'') AND SPLIT_PART(p.top_patterns, '' | '', 2) = ''NNN'' - AND SPLIT_PART(p.top_patterns, '' | '', 1)::FLOAT/NULLIF(value_ct, 0)::FLOAT > 0.50', '''Pattern: '' || p.top_patterns', 'Definite', 'Review your source data, ingestion process, and any processing steps that update this column.'), - ('1025', 'Delimited_Data_Embedded', 'Column', 'Delimited Data Embedded in Column', 'Delimited data, separated by a common delimiter (comma, tab, pipe or caret) is present in over 80% of column values. This could indicate data that was incorrectly ingested, or data that would be better represented in parsed form.', 'p.std_pattern_match = ''DELIMITED_DATA''', 'CASE WHEN p.top_freq_values IS NULL THEN ''Min: '' || p.min_text || '', Max: '' || p.max_text ELSE ''Top Freq: '' || p.top_freq_values END', 'Likely', 'Review your source data and follow-up with data consumers to determine the most useful representation of this data.'), - ('1026', 'Char_Column_Number_Units', 'Column', 'Character Column with Numbers and Units', - 'This column is defined as alpha, but values include numbers with percents or common units. Embedded measures in alpha columns are harder to access, won''t sort correctly, and might contradict user expectations downstream. Consider parsing into numeric and UOM columns to improve usability.', - 'p.includes_digit_ct::FLOAT/NULLIF(p.value_ct, 0)::FLOAT > 0.5 AND TRIM(fn_parsefreq(p.top_freq_values, 1, 1)) ~ ''(?i)^[0-9]+(\.[0-9]+)? ?(%|lb|oz|kg|g|mg|km|m|cm|mm|mi|ft|in)$''', - '''Top Freq: '' || p.top_freq_values', 'Possible', - 'Review your source data and ingestion process. Consider whether it might be better to parse the numeric and unit data and store in separate columns.'), - ('1027', 'Variant_Coded_Values', 'Variant', 'Variant Codings for Same Values', 'This column contains more than one common variants that represent a single value or state. This can occur when data is integrated from multiple sources with different standards, or when free entry is permitted without validation. The variations can cause confusion and error for downstream data users and multiple versions of the truth. ', 'p.distinct_value_ct <= 20', '''Variants Found: '' || intersect_list', 'Definite', 'Review your source data and ingestion process. Consider cleansing this data to standardize on a single set of definitive codes.'), - ('1100', 'Potential_PII', 'Column', 'Personally Identifiable Information', 'This column contains data that could be Personally Identifiable Information (PII)', 'p.pii_flag > ''''', '''Risk: '' || CASE LEFT(p.pii_flag, 1) WHEN ''A'' THEN ''HIGH'' WHEN ''B'' THEN ''MODERATE'' WHEN ''C'' THEN ''LOW'' END || '', PII Type: '' || SUBSTRING(p.pii_flag, 3)', 'Potential PII', 'PII may require steps to ensure data security and compliance with relevant privacy regulations and legal requirements. You may have to classify and inventory PII, implement appropriate access controls, encrypt data, and monitor for unauthorized access. Your organization might be required to update privacy policies and train staff on data protection practices. Note that PII that is lower-risk in isolation might be high-risk in conjunction with other data.'); + AND SPLIT_PART(p.top_patterns, '' | '', 1)::FLOAT/NULLIF(value_ct, 0)::FLOAT > 0.50', '''Pattern: '' || p.top_patterns', 'Definite', 'Review your source data, ingestion process, and any processing steps that update this column.', '(NULLIF(p.record_ct, 0)::INT - SPLIT_PART(p.top_patterns, '' | '', 1)::INT)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '1'), + ('1025', 'Delimited_Data_Embedded', 'Column', 'Delimited Data Embedded in Column', 'Delimited data, separated by a common delimiter (comma, tab, pipe or caret) is present in over 80% of column values. This could indicate data that was incorrectly ingested, or data that would be better represented in parsed form.', 'p.std_pattern_match = ''DELIMITED_DATA''', 'CASE WHEN p.top_freq_values IS NULL THEN ''Min: '' || p.min_text || '', Max: '' || p.max_text ELSE ''Top Freq: '' || p.top_freq_values END', 'Likely', 'Review your source data and follow-up with data consumers to determine the most useful representation of this data.', NULL, '0.66'), + ('1026', 'Char_Column_Number_Units', 'Column', 'Character Column with Numbers and Units', 'This column is defined as alpha, but values include numbers with percents or common units. Embedded measures in alpha columns are harder to access, won''t sort correctly, and might contradict user expectations downstream. Consider parsing into numeric and UOM columns to improve usability.', 'p.includes_digit_ct::FLOAT/NULLIF(p.value_ct, 0)::FLOAT > 0.5 AND TRIM(fn_parsefreq(p.top_freq_values, 1, 1)) ~ ''(?i)^[0-9]+(\.[0-9]+)? ?(%|lb|oz|kg|g|mg|km|m|cm|mm|mi|ft|in)$''', '''Top Freq: '' || p.top_freq_values', 'Possible', 'Review your source data and ingestion process. Consider whether it might be better to parse the numeric and unit data and store in separate columns.', NULL, '0.33'), + ('1027', 'Variant_Coded_Values', 'Variant', 'Variant Codings for Same Values', 'This column contains more than one common variants that represent a single value or state. This can occur when data is integrated from multiple sources with different standards, or when free entry is permitted without validation. The variations can cause confusion and error for downstream data users and multiple versions of the truth. ', 'p.distinct_value_ct <= 20', '''Variants Found: '' || intersect_list', 'Definite', 'Review your source data and ingestion process. Consider cleansing this data to standardize on a single set of definitive codes.', NULL, NULL), + ('1100', 'Potential_PII', 'Column', 'Personally Identifiable Information', 'This column contains data that could be Personally Identifiable Information (PII)', 'p.pii_flag > ''''', '''Risk: '' || CASE LEFT(p.pii_flag, 1) WHEN ''A'' THEN ''HIGH'' WHEN ''B'' THEN ''MODERATE'' WHEN ''C'' THEN ''LOW'' END || '', PII Type: '' || SUBSTRING(p.pii_flag, 3)', 'Potential PII', 'PII may require steps to ensure data security and compliance with relevant privacy regulations and legal requirements. You may have to classify and inventory PII, implement appropriate access controls, encrypt data, and monitor for unauthorized access. Your organization might be required to update privacy policies and train staff on data protection practices. Note that PII that is lower-risk in isolation might be high-risk in conjunction with other data.', NULL, 'CASE LEFT(p.pii_flag, 1) WHEN ''A'' THEN 1 WHEN ''B'' THEN 0.66 WHEN ''C'' THEN 0.33 END') +; TRUNCATE TABLE test_types; INSERT INTO test_types - (id, test_type, test_name_short, test_name_long, test_description, except_message, measure_uom, measure_uom_description, selection_criteria, column_name_prompt, column_name_help, default_parm_columns, default_parm_values, default_parm_prompts, default_parm_help, default_severity, run_type, test_scope, dq_dimension, health_dimension, threshold_description, usage_notes, active) -VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count consistent', 'Tests that the maximum count of characters in a column value has not dropped vs. baseline data', 'Maximum length of values has dropped from prior expected length.', 'Values over max', NULL, 'general_type =''A'' AND max_length > 0 AND ( (min_length = avg_length AND max_length = avg_length) OR (numeric_ct <> value_ct ) ) AND functional_table_type NOT LIKE ''%window%'' /* The conditions below are to eliminate overlap with : LOV_Match (excluded selection criteria for this test_type), Pattern_Match (excluded selection criteria for this test_type), Constant (excluded functional_data_type Constant and Boolean) */ AND ( (distinct_value_ct NOT BETWEEN 2 AND 10 AND functional_data_type NOT IN ( ''Constant'', ''Boolean'') ) AND NOT ( fn_charcount(top_patterns, E'' \| '' ) = 1 AND fn_charcount(top_patterns, E'' \| '' ) IS NOT NULL AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > ''''))', NULL, NULL, 'threshold_value', 'max_length', 'Maximum String Length at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Maximum length expected', 'Alpha Truncation tests that the longest text value in a column hasn''t become shorter than the longest value at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'), - ('1005', 'Avg_Shift', 'Average Shift', 'Column mean is consistent with reference', 'Tests for statistically-significant shift in mean value for column from average calculated at baseline.', 'Standardized difference between averages is over the selected threshold level.', 'Difference Measure', 'Cohen''s D Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', NULL, NULL, 'baseline_value_ct,baseline_avg,baseline_sd,threshold_value', 'value_ct,avg_value,stdev_value,0.5::VARCHAR', 'Value Ct at Baseline,Mean at Baseline,Std Deviation at Baseline,Threshold Difference Measure ', NULL, 'Warning', 'CAT', 'column', 'Consistency', 'Data Drift', 'Standardized Difference Measure', 'Average Shift tests that the average of a numeric column has not significantly changed since baseline, when profiling was done. A significant shift may indicate errors in processing, differences in source data, or valid changes that may nevertheless impact assumptions in downstream data products. The test uses Cohen''s D, a statistical technique to identify significant shifts in a value. Cohen''s D measures the difference between the two averages, reporting results on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. Depending on your data, some difference may be expected, so it''s reasonable to adjust the threshold value that triggers test failure. This test works well for measures, or even for identifiers if you expect them to increment consistently. You may want to periodically adjust the expected threshold, or even the expected average value if you expect shifting over time. Consider this test along with Variability Increase. If variability rises too, process or measurement flaws could be at work. If variability remains consistent, the issue is more likely to be with the source data itself. ', 'Y'), - ('1007', 'Constant', 'Constant Match', 'All column values match constant value', 'Tests that all values in the column match the constant value identified in baseline data', 'A constant value is expected for this column.', 'Mismatched values', NULL, 'TEMPLATE', NULL, NULL, 'baseline_value,threshold_value', NULL, 'Constant Value at Baseline,Threshold Error Count', 'The single, unchanging value of the column, per baseline|The number of errors that are acceptable before test fails.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Count of records with unexpected values', 'Constant Match tests that a single value determined to be a constant in baseline profiling is still the only value for the column that appears in subsequent versions of the dataset. Sometimes new data or business knowledge may reveal that the value is not a constant at all, even though only one value was present at profiling. In this case, you will want to disable this test. Alternatively, you can use the Value Match test to provide a limited number of valid values for the column.', 'Y'), - ('1009', 'Daily_Record_Ct', 'Daily Records', 'All dates present within date range', 'Tests for presence of every calendar date within min/max date range, per baseline data', 'Not every date value between min and max dates is present, unlike at baseline.', 'Missing dates', NULL, 'general_type= ''D'' AND date_days_present > 21 AND date_days_present - (DATEDIFF(''day'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''day'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Calendar Days', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Volume', 'Missing calendar days within min/max range', 'Daily Records tests that at least one record is present for every day within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each day. A failure here would suggest missing records for the number of days identified without data. You can adjust the threshold to accept a number of days that you know legitimately have no records. ', 'Y'), - ('1011', 'Dec_Trunc', 'Decimal Truncation', 'Sum of fractional values at or above reference', 'Tests for decimal truncation by confirming that the sum of fractional values in data is no less than the sum at baseline', 'The sum of fractional values is under baseline, which may indicate decimal truncation', 'Fractional sum', 'The sum of all decimal values from all data for this column', 'fractional_sum IS NOT NULL AND functional_table_type LIKE''%cumulative%''', NULL, NULL, 'threshold_value', 'ROUND(fractional_sum, 0)', 'Sum of Fractional Values at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Minimum expected sum of all fractional values', 'Decimal Truncation tests that the fractional (decimal) part of a numeric column has not been truncated since Baseline. This works by summing all the fractional values after the decimal point and confirming that the total is at least equal to the fractional total at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'), - ('1012', 'Distinct_Date_Ct', 'Date Count', 'Count of distinct dates at or above reference', 'Tests that the count of distinct dates referenced in the column has not dropped vs. baseline data', 'Drop in count of unique dates recorded in column.', 'Unique dates', 'Count of unique dates in transactional date column', 'general_type=''D'' and date_days_present IS NOT NULL AND functional_table_type NOT LIKE ''%window%''', NULL, NULL, 'baseline_value,threshold_value', 'date_days_present,date_days_present', 'Distinct Date Count at Baseline,Min Expected Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Minimum distinct date count expected', 'Date Count tests that the count of distinct dates present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained. A failure here would indicate missing records, which could be caused by a processing error or changed upstream data sources.', 'Y'), - ('1013', 'Distinct_Value_Ct', 'Value Count', 'Count of distinct values has not dropped', 'Tests that the count of unique values in the column has not changed from baseline.', 'Count of unique values in column has changed from baseline.', 'Unique Values', NULL, 'distinct_value_ct between 2 and 10 AND value_ct > 0 AND NOT (coalesce(top_freq_values,'''') > '''' AND distinct_value_ct BETWEEN 2 and 10) AND NOT (lower(functional_data_type) LIKE ''%sequence%'' OR lower(functional_data_type) LIKE ''%measurement%'' OR functional_data_type LIKE ''%date%'' OR general_type = ''D'')', NULL, NULL, 'baseline_value_ct,threshold_value', 'distinct_value_ct,distinct_value_ct', 'Distinct Value Count at Baseline,Min Expected Value Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected distinct value count', 'Value Count tests that the count of unique values present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained, or for any dataset where you would expect a set number of distinct values should be present. A failure here would indicate missing records or a change in categories or value assignment.', 'Y'), - ('1014', 'Email_Format', 'Email Format', 'Email is correctly formatted', 'Tests that non-blank, non-empty email addresses match the standard format', 'Invalid email address formats found.', 'Invalid emails', 'Number of emails that do not match standard format', 'std_pattern_match=''EMAIL''', NULL, NULL, 'threshold_value', '0', 'Maximum Invalid Email Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid email addresses', NULL, 'Y'), - ('1015', 'Future_Date', 'Past Dates', 'Latest date is prior to test run date', 'Tests that the maximum date referenced in the column is no greater than the test date, consistent with baseline data', 'Future date found when absent in baseline data.', 'Future dates', NULL, 'general_type=''D''AND future_date_ct = 0', NULL, NULL, 'threshold_value', '0', 'Maximum Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates', NULL, 'Y'), - ('1016', 'Future_Date_1Y', 'Future Year', 'Future dates within year of test run date', 'Tests that the maximum date referenced in the column is no greater than one year beyond the test date, consistent with baseline data', 'Future date beyond one-year found when absent in baseline.', 'Future dates post 1 year', NULL, 'general_type=''D''AND future_date_ct > 0 AND max_date <=''{AS_OF_DATE}''::DATE + INTERVAL''365 DAYS''', NULL, NULL, 'threshold_value', '0', 'Maximum Post 1-Year Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates beyond one year', 'Future Year looks for date values in the column that extend beyond one year after the test date. This would be appropriate for transactional dates where you would expect to find dates in the near future, but not beyond one year ahead. Errors could indicate invalid entries or possibly dummy dates representing blank values.', 'Y'), - ('1017', 'Incr_Avg_Shift', 'New Shift', 'New record mean is consistent with reference', 'Tests for statistically-significant shift in mean of new values for column compared to average calculated at baseline.', 'Significant shift in average of new values vs. baseline avg', 'Z-score of mean shift', 'Absolute Z-score (number of SD''s outside mean) of prior avg - incremental avg', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', NULL, NULL, 'baseline_value_ct,baseline_sum,baseline_avg,baseline_sd,threshold_value', 'value_ct,(avg_value * value_ct)::FLOAT,avg_value,stdev_value,2', 'Value Count at Baseline,Sum at Baseline,Mean Value at Baseline,Std Deviation at Baseline,Threshold Max Z-Score', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Maximum Z-Score (number of SD''s beyond mean) expected', 'This is a more sensitive test than Average Shift, because it calculates an incremental difference in the average of new values compared to the average of values at baseline. This is appropriate for a cumulative dataset only, because it calculates the average of new entries based on the assumption that the count and average of records present at baseline are still present at the time of the test. This test compares the mean of new values with the standard deviation of the baseline average to calculate a Z-score. If the new mean falls outside the Z-score threshold, a shift is detected. Potential Z-score thresholds may range from 0 to 3, depending on the sensitivity you prefer. A failed test could indicate a quality issue or a legitimate shift in new data that should be noted and assessed by business users. Consider this test along with Variability Increase. If variability rises too, process, methodology or measurement flaws could be at issue. If variability remains consistent, the problem is more likely to be with the source data itself.', 'Y'), - ('1018', 'LOV_All', 'Value Match All', 'List of expected values all present in column', 'Tests that all values match a pipe-delimited list of expected values and that all expected values are present', 'Column values found don''t exactly match the expected list of values', 'Values found', NULL, NULL, NULL, NULL, 'threshold_value', NULL, 'List of Expected Values', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This is a more restrictive form of Value Match, testing that all values in the dataset match the list provided, and also that all values present in the list appear at least once in the dataset. This would be appropriate for tables where all category values in the column are represented at least once.', 'Y'), - ('1019', 'LOV_Match', 'Value Match', 'All column values present in expected list', 'Tests that all values in the column match the list-of-values identified in baseline data.', 'Values not matching expected List-of-Values from baseline.', 'Non-matching records', NULL, 'top_freq_values > '''' AND distinct_value_ct BETWEEN 2 and 10 AND NOT (functional_data_type LIKE ''%date%'' OR lower(datatype_suggestion) LIKE ''%date%'' OR general_type = ''D'' OR lower(column_name) IN (''file_name'', ''filename''))', NULL, NULL, 'baseline_value,threshold_value', '''('' || SUBSTRING( CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 2) > '''' THEN '','''''' || TRIM( REPLACE ( SPLIT_PART(top_freq_values, ''|'' , 2), '''''''' , '''''''''''' ) ) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 4) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 4), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 6) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 6), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 8) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 8), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 10) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 10), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 12) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 12), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 14) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 14), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 16) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 16), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 18) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 18), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 20) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 20), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END, 2, 999) || '')'',0', 'List of Expected Values,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This tests that all values in the column match the hard-coded list provided. This is relevant when the list of allowable values is small and not expected to change often. Even if new values might occasionally be added, this test is useful for downstream data products to provide warning that assumptions and logic may need to change.', 'Y'), - ('1020', 'Min_Date', 'Minimum Date', 'All dates on or after set minimum', 'Tests that the earliest date referenced in the column is no earlier than baseline data', 'The earliest date value found is before the earliest value at baseline.', 'Dates prior to limit', NULL, 'general_type=''D''and min_date IS NOT NULL AND distinct_value_ct > 1', NULL, NULL, 'baseline_value,threshold_value', 'min_date,0', 'Minimum Date at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of dates prior to minimum', 'This test is appropriate for a cumulative dataset only, because it assumes all prior values are still present. It''s appropriate where new records are added with more recent dates, but old dates dates do not change.', 'Y'), - ('1021', 'Min_Val', 'Minimum Value', 'All values at or above set minimum', 'Tests that the minimum value present in the column is no lower than the minimum value in baseline data', 'Minimum column value less than baseline.', 'Values under limit', NULL, 'general_type=''N''and min_value IS NOT NULL AND (distinct_value_ct >= 2 OR (distinct_value_ct=2 and min_value<>0 and max_value<>1))', NULL, NULL, 'baseline_value,threshold_value', 'min_value,0', 'Minimum Value at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values under limit', 'This test is appropriate for a cumulative dataset only, assuming all prior values are still present. It is also appropriate for any measure that has an absolute, definable minimum value, or a heuristic that makes senes for valid data.', 'Y'), - ('1022', 'Missing_Pct', 'Percent Missing', 'Consistent ratio of missing values', 'Tests for statistically-significant shift in percentage of missing values in column vs. baseline data', 'Significant shift in percent of missing values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'record_ct <> value_ct', NULL, NULL, 'baseline_ct,baseline_value_ct,threshold_value', 'record_ct,value_ct,2::VARCHAR(10)', 'Baseline Record Count,Baseline Value Count,Standardized Difference Measure', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'This test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. An uptick in missing data may indicate a collection issue at the source. A larger change may indicate a processing failure. A drop in missing data may also be significant, if it affects assumptions built into analytic products downstream. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'), - ('1023', 'Monthly_Rec_Ct', 'Monthly Records', 'At least one date per month present within date range', 'Tests for presence of at least one date per calendar month within min/max date range, per baseline data', 'At least one date per month expected in min/max date range.', 'Missing months', 'Calendar months without date values present', 'general_type= ''D'' AND date_days_present > 1 AND date_months_present > 2 AND date_months_present - (datediff( ''MON'' , min_date, max_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', NULL, NULL, 'threshold_value', '0', 'Threshold Count of Months without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar months without dates present', 'Monthly Records tests that at least one record is present for every calendar month within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each month. A failure here would suggest missing records for the number of months identified without data. You can adjust the threshold to accept a number of month that you know legitimately have no records.', 'Y'), - ('1024', 'Outlier_Pct_Above', 'Outliers Above', 'Consistent outlier counts over 2 SD above mean', 'Tests that percent of outliers over 2 SD above Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD above the mean is greater than expected threshold.', 'Pct records over limit', NULL, 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over upper 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations above the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'), - ('1025', 'Outlier_Pct_Below', 'Outliers Below', 'Consistent outlier counts under 2 SD below mean', 'Tests that percent of outliers over 2 SD below Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD below the mean is greater than expected threshold.', 'Pct records under limit', NULL, 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over lower 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations below the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'), - ('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, 'fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', NULL, NULL, 'baseline_value,threshold_value', 'trim(REPLACE(REPLACE(REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database: REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y'), - ('1028', 'Recency', 'Recency', 'Latest date within expected range of test date', 'Tests that the latest date in column is within a set number of days of the test date', 'Most recent date value not within expected days of test date.', 'Days before test', 'Number of days that most recent date precedes the date of test', 'general_type= ''D'' AND max_date <= run_date AND NOT column_name IN ( ''filedate'' , ''file_date'' ) AND NOT functional_data_type IN (''Future Date'', ''Schedule Date'') AND DATEDIFF( ''DAY'' , max_date, run_date) <= 62', NULL, NULL, 'threshold_value', 'CASE WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 3 THEN DATEDIFF(''DAY'', max_date, run_date) + 3 WHEN DATEDIFF(''DAY'', max_date, run_date) <= 7 then DATEDIFF(''DAY'', max_date, run_date) + 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 7.0) * 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) > 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 30.0) * 30 END', 'Threshold Maximum Days before Test', NULL, 'Warning', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected maximum count of days preceding test date', 'This test evaluates recency based on the latest referenced dates in the column. The test is appropriate for transactional dates and timestamps. The test can be especially valuable because timely data deliveries themselves may not assure that the most recent data is present. You can adjust the expected threshold to the maximum number of days that you expect the data to age before the dataset is refreshed. ', 'Y'), - ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'), - ('1033', 'Street_Addr_Pattern', 'Street Address', 'Enough street address entries match defined pattern', 'Tests for percent of records matching standard street address pattern.', 'Percent of values matching standard street address format is under expected threshold.', 'Percent matches', 'Percent of records that match street address pattern', '(std_pattern_match=''STREET_ADDR'') AND (avg_length <> round(avg_length)) AND (avg_embedded_spaces BETWEEN 2 AND 6) AND (avg_length < 35)', NULL, NULL, 'threshold_value', '75', 'Threshold Pct that Match Address Pattern', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected percent of records that match standard street address pattern', 'The street address pattern used in this test should match the vast majority of USA addresses. You can adjust the threshold percent of matches based on the results you are getting -- you may well want to tighten it to make the test more sensitive to invalid entries.', 'Y'), - ('1034', 'Unique', 'Unique Values', 'Each column value is unique', 'Tests that no values for the column are repeated in multiple records.', 'Column values should be unique per row.', 'Duplicate values', 'Count of non-unique values', 'record_ct > 500 and record_ct = distinct_value_ct and value_ct > 0', NULL, NULL, 'threshold_value', '0', 'Threshold Duplicate Value Count', NULL, 'Fail', 'CAT', 'column', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate values', 'This test is ideal when the database itself does not enforce a primary key constraint on the table. It serves as an independent check on uniqueness. If''s also useful when there are a small number of exceptions to uniqueness, which can be reflected in the expected threshold count of duplicates.', 'Y'), - ('1035', 'Unique_Pct', 'Percent Unique', 'Consistent ratio of unique values', 'Tests for statistically-significant shift in percentage of unique values vs. baseline data.', 'Significant shift in percent of unique values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'distinct_value_ct > 10', NULL, NULL, 'baseline_value_ct,baseline_unique_ct,threshold_value', 'value_ct,distinct_value_ct,0.5', 'Value Count at Baseline,Distinct Value Count at Baseline,Standardized Difference Measure (0 to 1)', NULL, 'Warning', 'CAT', 'column', 'Uniqueness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'You can think of this as a test of similarity that measures whether the percentage of unique values is consistent with the percentage at baseline. A significant change might indicate duplication or a telling shift in cardinality between entities. The test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'), - ('1036', 'US_State', 'US State', 'Column value is two-letter US state code', 'Tests that the recorded column value is a valid US state.', 'Column Value is not a valid US state.', 'Not US States', 'Values that doo not match 2-character US state abbreviations.', 'general_type= ''A'' AND column_name ILIKE ''%state%'' AND distinct_value_ct < 70 AND max_length = 2', NULL, NULL, 'threshold_value', '0', 'Threshold Count not Matching State Abbreviations', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values that are not US state abbreviations', 'This test validates entries against a fixed list of two-character US state codes and related Armed Forces codes.', 'Y'), - ('1037', 'Weekly_Rec_Ct', 'Weekly Records', 'At least one date per week present within date range', 'Tests for presence of at least one date per calendar week within min/max date range, per baseline data', 'At least one date per week expected in min/max date range.', 'Missing weeks', 'Calendar weeks without date values present', 'general_type= ''D'' AND date_days_present > 1 AND date_weeks_present > 3 AND date_weeks_present - (DATEDIFF(''week'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''week'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', NULL, NULL, 'threshold_value', '0', 'Threshold Weeks without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar weeks without dates present', 'Weekly Records tests that at least one record is present for every calendar week within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each week. A failure here would suggest missing records for the number of weeks identified without data. You can adjust the threshold to accept a number of weeks that you know legitimately have no records.', 'Y'), - ('1040', 'Variability_Increase', 'Variability Increase', 'Variability has increased above threshold', 'Tests that the spread or dispersion of column values has increased significantly over baseline, indicating a drop in stability of the measure.', 'The Standard Deviation of the measure has increased beyond the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value,120', 'Std Deviation at Baseline,Expected Maximum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time. A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline. An increase in particular could mark new problems in measurement, a more heterogeneous cohort, or that significant outliers have been introduced. Consider this test along with Average Shift and New Shift. If the average shifts as well, there may be a fundamental shift in the dataset or process used to collect the data point. This might suggest a data shift that should be noted and assessed by business users. If the average does not shift, this may point to a data quality or data collection problem. ', 'Y'), - ('1041', 'Variability_Decrease', 'Variability Decrease', 'Variability has decreased below threshold', 'Tests that the spread or dispersion of column values has decreased significantly over baseline, indicating a shift in stability of the measure. This could signal a change in a process or a data quality issue.', 'The Standard Deviation of the measure has decreased below the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value, 80', 'Std Deviation at Baseline,Expected Minimum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected minimum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time. A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline. A decrease in particular could indicate an improved process, better precision in measurement, the elimination of outliers, or a more homogeneous cohort. ', 'Y'), - ('1042', 'Valid_Month', 'Valid Month', 'Valid calendar month in expected format', 'Tests for the presence of a valid representation of a calendar month consistent with the format at baseline.', 'Column values are not a valid representation of a calendar month consistent with the format at baseline.', 'Invalid months', NULL, 'functional_data_type = ''Period Month''', NULL, NULL, 'threshold_value,baseline_value', '0,CASE WHEN max_length > 3 AND initcap(min_text) = min_text THEN ''''''January'''',''''February'''',''''March'''',''''April'''',''''May'''',''''June'''',''''July'''',''''August'''',''''September'''',''''October'''',''''November'''',''''December'''''' WHEN max_length > 3 AND upper(min_text) = min_text THEN ''''''JANUARY'''',''''FEBRUARY'''',''''MARCH'''',''''APRIL'''',''''MAY'''',''''JUNE'''',''''JULY'''',''''AUGUST'''',''''SEPTEMBER'''',''''OCTOBER'''',''''NOVEMBER'''',''''DECEMBER'''''' WHEN max_length > 3 AND lower(min_text) = min_text THEN ''''''january'''',''''february'''',''''march'''',''''april'''',''''may'''',''''june'''',''''july'''',''''august'''',''''september'''',''''october'''',''''november'''',''''december'''''' WHEN max_length = 3 AND initcap(min_text) = min_text THEN ''''''Jan'''',''''Feb'''',''''Mar'''',''''Apr'''',''''May'''',''''Jun'''',''''Jul'''',''''Aug'''',''''Sep'''',''''Oct'''',''''Nov'''',''''Dec'''''' WHEN max_length = 3 AND upper(min_text) = min_text THEN ''''''JAN'''',''''FEB'''',''''MAR'''',''''APR'''',''''MAY'''',''''JUN'''',''''JUL'''',''''AUG'''',''''SEP'''',''''OCT'''',''''NOV'''',''''DEC'''''' WHEN max_length = 3 AND lower(min_text) = min_text THEN ''''''jan'''',''''feb'''',''''mar'''',''''apr'''',''''may'''',''''jun'''',''''jul'''',''''aug'''',''''sep'''',''''oct'''',''''nov'''',''''dec'''''' WHEN max_length = 2 AND min_text = ''01'' THEN ''''''01'''',''''02'''',''''03'''',''''04'''',''''05'''',''''06'''',''''07'''',''''08'''',''''09'''',''''10'''',''''11'''',''''12'''''' WHEN max_length = 2 AND min_text = ''1'' THEN ''''''1'''',''''2'''',''''3'''',''''4'''',''''5'''',''''6'''',''''7'''',''''8'''',''''9'''',''''10'''',''''11'''',''''12'''''' WHEN min_value = 1 THEN ''1,2,3,4,5,6,7,8,9,10,11,12'' ELSE ''NULL'' END', 'Threshold Invalid Months,Valid Month List', 'The acceptable number of records with invalid months present.|List of valid month values for this field, in quotes if field is numeric, separated by commas.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid months', NULL, 'N'), - ('1043', 'Valid_Characters', 'Valid Characters', 'Column contains no invalid characters', 'Tests for the presence of non-printing characters, leading spaces, or surrounding quotes.', 'Invalid characters, such as non-printing characters, leading spaces, or surrounding quotes, were found.', 'Invalid records', 'Expected count of values with invalid characters', 'general_type = ''A''', NULL, NULL, 'threshold_value', '0', NULL, 'The acceptable number of records with invalid character values present.', 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', 'This test looks for the presence of non-printing ASCII characters that are considered non-standard in basic text processing. It also identifies leading spaces and values enclosed in quotes. Values that fail this test may be artifacts of data conversion, or just more difficult to process or analyze downstream.', 'N'), - ('1044', 'Valid_US_Zip', 'Valid US Zip', 'Valid USA Postal Codes', 'Tests that postal codes match the 5 or 9 digit standard US format', 'Invalid US Zip Code formats found.', 'Invalid Zip Codes', 'Expected count of values with invalid Zip Codes', 'functional_data_type = ''Zip''', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', NULL, 'Y'), - ('1045', 'Valid_US_Zip3', 'Valid US Zip-3 ', 'Valid USA Zip-3 Prefix', 'Tests that postal codes match the 3 digit format of a regional prefix.', 'Invalid 3-digit US Zip Code regional prefix formats found.', 'Invalid Zip-3 Prefix', 'Expected count of values with invalid Zip-3 Prefix Codes', 'functional_data_type = ''Zip3''', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Zip3 Count', 'This test looks for the presence of values that fail to match the three-digit numeric code expected for US Zip Code regional prefixes. These prefixes are often used to roll up Zip Code data to a regional level, and may be critical to anonymize detailed data and protect PID. Depending on your needs and regulatory requirements, longer zip codes could place PID at risk.', 'Y'), - ('1006', 'Condition_Flag', 'Custom Condition', 'Column values match pre-defined condition', 'Tests that each record in the table matches a pre-defined, custom condition', 'Value(s) found not matching defined condition.', 'Values Failing', NULL, NULL, 'Test Focus', 'Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Quantity Consistency` if you are testing that quantity ordered matches quantity shipped.', 'threshold_value,custom_query', NULL, 'Threshold Error Count,Custom SQL Expression (TRUE on error)', 'The number of errors that are acceptable before test fails.|Expression should evaluate to TRUE to register an error or FALSE if no error. An expression can reference only columns in the selected table.', 'Fail', 'CAT', 'custom', 'Validity', 'Schema Drift', 'Count of records that don''t meet test condition', 'Custom Condition is a business-rule test for a user-defined error condition based on the value of one or more columns. The condition is applied to each record within the table, and the count of records failing the condition is added up. If that count exceeds a threshold of errors, the test as a whole is failed. This test is ideal for error conditions that TestGen cannot automatically infer, and any condition that involves the values of more than one column in the same record. Performance of this test is fast, since it is performed together with other aggregate tests. Interpretation is based on the user-defined meaning of the test.', 'Y'), + (id, test_type, test_name_short, test_name_long, test_description, except_message, measure_uom, measure_uom_description, selection_criteria, dq_score_prevalence_formula, dq_score_risk_factor, column_name_prompt, column_name_help, default_parm_columns, default_parm_values, default_parm_prompts, default_parm_help, default_severity, run_type, test_scope, dq_dimension, health_dimension, threshold_description, usage_notes, active) +VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count consistent', 'Tests that the maximum count of characters in a column value has not dropped vs. baseline data', 'Maximum length of values has dropped from prior expected length.', 'Values over max', NULL, 'general_type =''A'' AND max_length > 0 AND ( (min_length = avg_length AND max_length = avg_length) OR (numeric_ct <> value_ct ) ) AND functional_table_type NOT LIKE ''%window%'' /* The conditions below are to eliminate overlap with : LOV_Match (excluded selection criteria for this test_type), Pattern_Match (excluded selection criteria for this test_type), Constant (excluded functional_data_type Constant and Boolean) */ AND ( (distinct_value_ct NOT BETWEEN 2 AND 10 AND functional_data_type NOT IN ( ''Constant'', ''Boolean'') ) AND NOT ( fn_charcount(top_patterns, E'' \| '' ) = 1 AND fn_charcount(top_patterns, E'' \| '' ) IS NOT NULL AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > ''''))', '{VALUE_CT}::FLOAT * (FN_NORMAL_CDF(({MAX_LENGTH}::FLOAT - {AVG_LENGTH}::FLOAT) / ({MAX_LENGTH}::FLOAT / 3)) - FN_NORMAL_CDF(({RESULT_MEASURE}::FLOAT - {AVG_LENGTH}::FLOAT) / ({MAX_LENGTH}::FLOAT / 3)) ) /{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', 'max_length', 'Maximum String Length at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Maximum length expected', 'Alpha Truncation tests that the longest text value in a column hasn''t become shorter than the longest value at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'), + ('1005', 'Avg_Shift', 'Average Shift', 'Column mean is consistent with reference', 'Tests for statistically-significant shift in mean value for column from average calculated at baseline.', 'Standardized difference between averages is over the selected threshold level.', 'Difference Measure', 'Cohen''s D Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', 'LEAST({RESULT_MEASURE}, 1.0)', '0.75', NULL, NULL, 'baseline_value_ct,baseline_avg,baseline_sd,threshold_value', 'value_ct,avg_value,stdev_value,0.5::VARCHAR', 'Value Ct at Baseline,Mean at Baseline,Std Deviation at Baseline,Threshold Difference Measure ', NULL, 'Warning', 'CAT', 'column', 'Consistency', 'Data Drift', 'Standardized Difference Measure', 'Average Shift tests that the average of a numeric column has not significantly changed since baseline, when profiling was done. A significant shift may indicate errors in processing, differences in source data, or valid changes that may nevertheless impact assumptions in downstream data products. The test uses Cohen''s D, a statistical technique to identify significant shifts in a value. Cohen''s D measures the difference between the two averages, reporting results on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. Depending on your data, some difference may be expected, so it''s reasonable to adjust the threshold value that triggers test failure. This test works well for measures, or even for identifiers if you expect them to increment consistently. You may want to periodically adjust the expected threshold, or even the expected average value if you expect shifting over time. Consider this test along with Variability Increase. If variability rises too, process or measurement flaws could be at work. If variability remains consistent, the issue is more likely to be with the source data itself. ', 'Y'), + ('1007', 'Constant', 'Constant Match', 'All column values match constant value', 'Tests that all values in the column match the constant value identified in baseline data', 'A constant value is expected for this column.', 'Mismatched values', NULL, 'TEMPLATE', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value,threshold_value', NULL, 'Constant Value at Baseline,Threshold Error Count', 'The single, unchanging value of the column, per baseline|The number of errors that are acceptable before test fails.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Count of records with unexpected values', 'Constant Match tests that a single value determined to be a constant in baseline profiling is still the only value for the column that appears in subsequent versions of the dataset. Sometimes new data or business knowledge may reveal that the value is not a constant at all, even though only one value was present at profiling. In this case, you will want to disable this test. Alternatively, you can use the Value Match test to provide a limited number of valid values for the column.', 'Y'), + ('1009', 'Daily_Record_Ct', 'Daily Records', 'All dates present within date range', 'Tests for presence of every calendar date within min/max date range, per baseline data', 'Not every date value between min and max dates is present, unlike at baseline.', 'Missing dates', NULL, 'general_type= ''D'' AND date_days_present > 21 AND date_days_present - (DATEDIFF(''day'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''day'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/{DATE_DAYS_PRESENT}::FLOAT/{RECORD_CT}::FLOAT', '0.75', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Calendar Days', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Volume', 'Missing calendar days within min/max range', 'Daily Records tests that at least one record is present for every day within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each day. A failure here would suggest missing records for the number of days identified without data. You can adjust the threshold to accept a number of days that you know legitimately have no records. ', 'Y'), + ('1011', 'Dec_Trunc', 'Decimal Truncation', 'Sum of fractional values at or above reference', 'Tests for decimal truncation by confirming that the sum of fractional values in data is no less than the sum at baseline', 'The sum of fractional values is under baseline, which may indicate decimal truncation', 'Fractional sum', 'The sum of all decimal values from all data for this column', 'fractional_sum IS NOT NULL AND functional_table_type LIKE''%cumulative%''', '1', '1.0', NULL, NULL, 'threshold_value', 'ROUND(fractional_sum, 0)', 'Sum of Fractional Values at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Minimum expected sum of all fractional values', 'Decimal Truncation tests that the fractional (decimal) part of a numeric column has not been truncated since Baseline. This works by summing all the fractional values after the decimal point and confirming that the total is at least equal to the fractional total at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'), + ('1012', 'Distinct_Date_Ct', 'Date Count', 'Count of distinct dates at or above reference', 'Tests that the count of distinct dates referenced in the column has not dropped vs. baseline data', 'Drop in count of unique dates recorded in column.', 'Unique dates', 'Count of unique dates in transactional date column', 'general_type=''D'' and date_days_present IS NOT NULL AND functional_table_type NOT LIKE ''%window%''', '(({RECORD_CT}-{PRO_RECORD_CT})::FLOAT*{DISTINCT_VALUE_CT}::FLOAT/{PRO_RECORD_CT}::FLOAT)/{PRO_RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'date_days_present,date_days_present', 'Distinct Date Count at Baseline,Min Expected Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Minimum distinct date count expected', 'Date Count tests that the count of distinct dates present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained. A failure here would indicate missing records, which could be caused by a processing error or changed upstream data sources.', 'Y'), + ('1013', 'Distinct_Value_Ct', 'Value Count', 'Count of distinct values has not dropped', 'Tests that the count of unique values in the column has not changed from baseline.', 'Count of unique values in column has changed from baseline.', 'Unique Values', NULL, 'distinct_value_ct between 2 and 10 AND value_ct > 0 AND NOT (coalesce(top_freq_values,'''') > '''' AND distinct_value_ct BETWEEN 2 and 10) AND NOT (lower(functional_data_type) LIKE ''%sequence%'' OR lower(functional_data_type) LIKE ''%measurement%'' OR functional_data_type LIKE ''%date%'' OR general_type = ''D'')', 'ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/{DISTINCT_VALUE_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value_ct,threshold_value', 'distinct_value_ct,distinct_value_ct', 'Distinct Value Count at Baseline,Min Expected Value Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected distinct value count', 'Value Count tests that the count of unique values present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained, or for any dataset where you would expect a set number of distinct values should be present. A failure here would indicate missing records or a change in categories or value assignment.', 'Y'), + ('1014', 'Email_Format', 'Email Format', 'Email is correctly formatted', 'Tests that non-blank, non-empty email addresses match the standard format', 'Invalid email address formats found.', 'Invalid emails', 'Number of emails that do not match standard format', 'std_pattern_match=''EMAIL''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Invalid Email Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid email addresses', NULL, 'Y'), + ('1015', 'Future_Date', 'Past Dates', 'Latest date is prior to test run date', 'Tests that the maximum date referenced in the column is no greater than the test date, consistent with baseline data', 'Future date found when absent in baseline data.', 'Future dates', NULL, 'general_type=''D''AND future_date_ct = 0', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates', NULL, 'Y'), + ('1016', 'Future_Date_1Y', 'Future Year', 'Future dates within year of test run date', 'Tests that the maximum date referenced in the column is no greater than one year beyond the test date, consistent with baseline data', 'Future date beyond one-year found when absent in baseline.', 'Future dates post 1 year', NULL, 'general_type=''D''AND future_date_ct > 0 AND max_date <=''{AS_OF_DATE}''::DATE + INTERVAL''365 DAYS''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Post 1-Year Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates beyond one year', 'Future Year looks for date values in the column that extend beyond one year after the test date. This would be appropriate for transactional dates where you would expect to find dates in the near future, but not beyond one year ahead. Errors could indicate invalid entries or possibly dummy dates representing blank values.', 'Y'), + ('1017', 'Incr_Avg_Shift', 'New Shift', 'New record mean is consistent with reference', 'Tests for statistically-significant shift in mean of new values for column compared to average calculated at baseline.', 'Significant shift in average of new values vs. baseline avg', 'Z-score of mean shift', 'Absolute Z-score (number of SD''s outside mean) of prior avg - incremental avg', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', '{RECORD_CT}::FLOAT*(1-FN_NORMAL_CDF({RESULT_MEASURE}::FLOAT))/{RECORD_CT}::FLOAT', '0.75', NULL, NULL, 'baseline_value_ct,baseline_sum,baseline_avg,baseline_sd,threshold_value', 'value_ct,(avg_value * value_ct)::FLOAT,avg_value,stdev_value,2', 'Value Count at Baseline,Sum at Baseline,Mean Value at Baseline,Std Deviation at Baseline,Threshold Max Z-Score', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Maximum Z-Score (number of SD''s beyond mean) expected', 'This is a more sensitive test than Average Shift, because it calculates an incremental difference in the average of new values compared to the average of values at baseline. This is appropriate for a cumulative dataset only, because it calculates the average of new entries based on the assumption that the count and average of records present at baseline are still present at the time of the test. This test compares the mean of new values with the standard deviation of the baseline average to calculate a Z-score. If the new mean falls outside the Z-score threshold, a shift is detected. Potential Z-score thresholds may range from 0 to 3, depending on the sensitivity you prefer. A failed test could indicate a quality issue or a legitimate shift in new data that should be noted and assessed by business users. Consider this test along with Variability Increase. If variability rises too, process, methodology or measurement flaws could be at issue. If variability remains consistent, the problem is more likely to be with the source data itself.', 'Y'), + ('1018', 'LOV_All', 'Value Match All', 'List of expected values all present in column', 'Tests that all values match a pipe-delimited list of expected values and that all expected values are present', 'Column values found don''t exactly match the expected list of values', 'Values found', NULL, NULL, '1', '1.0', NULL, NULL, 'threshold_value', NULL, 'List of Expected Values', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This is a more restrictive form of Value Match, testing that all values in the dataset match the list provided, and also that all values present in the list appear at least once in the dataset. This would be appropriate for tables where all category values in the column are represented at least once.', 'Y'), + ('1019', 'LOV_Match', 'Value Match', 'All column values present in expected list', 'Tests that all values in the column match the list-of-values identified in baseline data.', 'Values not matching expected List-of-Values from baseline.', 'Non-matching records', NULL, 'top_freq_values > '''' AND distinct_value_ct BETWEEN 2 and 10 AND NOT (functional_data_type LIKE ''%date%'' OR lower(datatype_suggestion) LIKE ''%date%'' OR general_type = ''D'' OR lower(column_name) IN (''file_name'', ''filename''))', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value,threshold_value', '''('' || SUBSTRING( CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 2) > '''' THEN '','''''' || TRIM( REPLACE ( SPLIT_PART(top_freq_values, ''|'' , 2), '''''''' , '''''''''''' ) ) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 4) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 4), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 6) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 6), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 8) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 8), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 10) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 10), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 12) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 12), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 14) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 14), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 16) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 16), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 18) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 18), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 20) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 20), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END, 2, 999) || '')'',0', 'List of Expected Values,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This tests that all values in the column match the hard-coded list provided. This is relevant when the list of allowable values is small and not expected to change often. Even if new values might occasionally be added, this test is useful for downstream data products to provide warning that assumptions and logic may need to change.', 'Y'), + ('1020', 'Min_Date', 'Minimum Date', 'All dates on or after set minimum', 'Tests that the earliest date referenced in the column is no earlier than baseline data', 'The earliest date value found is before the earliest value at baseline.', 'Dates prior to limit', NULL, 'general_type=''D''and min_date IS NOT NULL AND distinct_value_ct > 1', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'min_date,0', 'Minimum Date at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of dates prior to minimum', 'This test is appropriate for a cumulative dataset only, because it assumes all prior values are still present. It''s appropriate where new records are added with more recent dates, but old dates dates do not change.', 'Y'), + ('1021', 'Min_Val', 'Minimum Value', 'All values at or above set minimum', 'Tests that the minimum value present in the column is no lower than the minimum value in baseline data', 'Minimum column value less than baseline.', 'Values under limit', NULL, 'general_type=''N''and min_value IS NOT NULL AND (distinct_value_ct >= 2 OR (distinct_value_ct=2 and min_value<>0 and max_value<>1))', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'min_value,0', 'Minimum Value at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values under limit', 'This test is appropriate for a cumulative dataset only, assuming all prior values are still present. It is also appropriate for any measure that has an absolute, definable minimum value, or a heuristic that makes senes for valid data.', 'Y'), + ('1022', 'Missing_Pct', 'Percent Missing', 'Consistent ratio of missing values', 'Tests for statistically-significant shift in percentage of missing values in column vs. baseline data', 'Significant shift in percent of missing values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'record_ct <> value_ct', 'LEAST({RESULT_MEASURE}, 1.0)', '0.75', NULL, NULL, 'baseline_ct,baseline_value_ct,threshold_value', 'record_ct,value_ct,2::VARCHAR(10)', 'Baseline Record Count,Baseline Value Count,Standardized Difference Measure', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'This test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. An uptick in missing data may indicate a collection issue at the source. A larger change may indicate a processing failure. A drop in missing data may also be significant, if it affects assumptions built into analytic products downstream. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'), + ('1023', 'Monthly_Rec_Ct', 'Monthly Records', 'At least one date per month present within date range', 'Tests for presence of at least one date per calendar month within min/max date range, per baseline data', 'At least one date per month expected in min/max date range.', 'Missing months', 'Calendar months without date values present', 'general_type= ''D'' AND date_days_present > 1 AND date_months_present > 2 AND date_months_present - (datediff( ''MON'' , min_date, max_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/{DATE_MONTHS_PRESENT}::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Count of Months without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar months without dates present', 'Monthly Records tests that at least one record is present for every calendar month within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each month. A failure here would suggest missing records for the number of months identified without data. You can adjust the threshold to accept a number of month that you know legitimately have no records.', 'Y'), + ('1024', 'Outlier_Pct_Above', 'Outliers Above', 'Consistent outlier counts over 2 SD above mean', 'Tests that percent of outliers over 2 SD above Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD above the mean is greater than expected threshold.', 'Pct records over limit', NULL, 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over upper 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations above the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'), + ('1025', 'Outlier_Pct_Below', 'Outliers Below', 'Consistent outlier counts under 2 SD below mean', 'Tests that percent of outliers over 2 SD below Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD below the mean is greater than expected threshold.', 'Pct records under limit', NULL, 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over lower 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations below the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'), + ('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, 'fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'trim(REPLACE(REPLACE(REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database: REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y'), + ('1028', 'Recency', 'Recency', 'Latest date within expected range of test date', 'Tests that the latest date in column is within a set number of days of the test date', 'Most recent date value not within expected days of test date.', 'Days before test', 'Number of days that most recent date precedes the date of test', 'general_type= ''D'' AND max_date <= run_date AND NOT column_name IN ( ''filedate'' , ''file_date'' ) AND NOT functional_data_type IN (''Future Date'', ''Schedule Date'') AND DATEDIFF( ''DAY'' , max_date, run_date) <= 62', '(ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/(1.0+DATEDIFF(''DAY'', ''{MIN_DATE}'', ''{MAX_DATE}''))::FLOAT)/{RECORD_CT}::FLOAT', '0.75', NULL, NULL, 'threshold_value', 'CASE WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 3 THEN DATEDIFF(''DAY'', max_date, run_date) + 3 WHEN DATEDIFF(''DAY'', max_date, run_date) <= 7 then DATEDIFF(''DAY'', max_date, run_date) + 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 7.0) * 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) > 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 30.0) * 30 END', 'Threshold Maximum Days before Test', NULL, 'Warning', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected maximum count of days preceding test date', 'This test evaluates recency based on the latest referenced dates in the column. The test is appropriate for transactional dates and timestamps. The test can be especially valuable because timely data deliveries themselves may not assure that the most recent data is present. You can adjust the expected threshold to the maximum number of days that you expect the data to age before the dataset is refreshed. ', 'Y'), + ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'), + ('1033', 'Street_Addr_Pattern', 'Street Address', 'Enough street address entries match defined pattern', 'Tests for percent of records matching standard street address pattern.', 'Percent of values matching standard street address format is under expected threshold.', 'Percent matches', 'Percent of records that match street address pattern', '(std_pattern_match=''STREET_ADDR'') AND (avg_length <> round(avg_length)) AND (avg_embedded_spaces BETWEEN 2 AND 6) AND (avg_length < 35)', '({VALUE_CT}::FLOAT * ({RESULT_MEASURE}::FLOAT - {THRESHOLD_VALUE}::FLOAT)/100.0)/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '75', 'Threshold Pct that Match Address Pattern', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected percent of records that match standard street address pattern', 'The street address pattern used in this test should match the vast majority of USA addresses. You can adjust the threshold percent of matches based on the results you are getting -- you may well want to tighten it to make the test more sensitive to invalid entries.', 'Y'), + ('1034', 'Unique', 'Unique Values', 'Each column value is unique', 'Tests that no values for the column are repeated in multiple records.', 'Column values should be unique per row.', 'Duplicate values', 'Count of non-unique values', 'record_ct > 500 and record_ct = distinct_value_ct and value_ct > 0', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Duplicate Value Count', NULL, 'Fail', 'CAT', 'column', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate values', 'This test is ideal when the database itself does not enforce a primary key constraint on the table. It serves as an independent check on uniqueness. If''s also useful when there are a small number of exceptions to uniqueness, which can be reflected in the expected threshold count of duplicates.', 'Y'), + ('1035', 'Unique_Pct', 'Percent Unique', 'Consistent ratio of unique values', 'Tests for statistically-significant shift in percentage of unique values vs. baseline data.', 'Significant shift in percent of unique values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'distinct_value_ct > 10', 'LEAST({RESULT_MEASURE}, 1.0)', '0.75', NULL, NULL, 'baseline_value_ct,baseline_unique_ct,threshold_value', 'value_ct,distinct_value_ct,0.5', 'Value Count at Baseline,Distinct Value Count at Baseline,Standardized Difference Measure (0 to 1)', NULL, 'Warning', 'CAT', 'column', 'Uniqueness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'You can think of this as a test of similarity that measures whether the percentage of unique values is consistent with the percentage at baseline. A significant change might indicate duplication or a telling shift in cardinality between entities. The test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'), + ('1036', 'US_State', 'US State', 'Column value is two-letter US state code', 'Tests that the recorded column value is a valid US state.', 'Column Value is not a valid US state.', 'Not US States', 'Values that doo not match 2-character US state abbreviations.', 'general_type= ''A'' AND column_name ILIKE ''%state%'' AND distinct_value_ct < 70 AND max_length = 2', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Count not Matching State Abbreviations', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values that are not US state abbreviations', 'This test validates entries against a fixed list of two-character US state codes and related Armed Forces codes.', 'Y'), + ('1037', 'Weekly_Rec_Ct', 'Weekly Records', 'At least one date per week present within date range', 'Tests for presence of at least one date per calendar week within min/max date range, per baseline data', 'At least one date per week expected in min/max date range.', 'Missing weeks', 'Calendar weeks without date values present', 'general_type= ''D'' AND date_days_present > 1 AND date_weeks_present > 3 AND date_weeks_present - (DATEDIFF(''week'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''week'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/{DATE_WEEKS_PRESENT}::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Weeks without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar weeks without dates present', 'Weekly Records tests that at least one record is present for every calendar week within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each week. A failure here would suggest missing records for the number of weeks identified without data. You can adjust the threshold to accept a number of weeks that you know legitimately have no records.', 'Y'), + ('1040', 'Variability_Increase', 'Variability Increase', 'Variability has increased above threshold', 'Tests that the spread or dispersion of column values has increased significantly over baseline, indicating a drop in stability of the measure.', 'The Standard Deviation of the measure has increased beyond the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', '1', '0.75', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value,120', 'Std Deviation at Baseline,Expected Maximum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time. A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline. An increase in particular could mark new problems in measurement, a more heterogeneous cohort, or that significant outliers have been introduced. Consider this test along with Average Shift and New Shift. If the average shifts as well, there may be a fundamental shift in the dataset or process used to collect the data point. This might suggest a data shift that should be noted and assessed by business users. If the average does not shift, this may point to a data quality or data collection problem. ', 'Y'), + ('1041', 'Variability_Decrease', 'Variability Decrease', 'Variability has decreased below threshold', 'Tests that the spread or dispersion of column values has decreased significantly over baseline, indicating a shift in stability of the measure. This could signal a change in a process or a data quality issue.', 'The Standard Deviation of the measure has decreased below the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', '1', '0.75', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value, 80', 'Std Deviation at Baseline,Expected Minimum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected minimum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time. A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline. A decrease in particular could indicate an improved process, better precision in measurement, the elimination of outliers, or a more homogeneous cohort. ', 'Y'), + ('1042', 'Valid_Month', 'Valid Month', 'Valid calendar month in expected format', 'Tests for the presence of a valid representation of a calendar month consistent with the format at baseline.', 'Column values are not a valid representation of a calendar month consistent with the format at baseline.', 'Invalid months', NULL, 'functional_data_type = ''Period Month''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value,baseline_value', '0,CASE WHEN max_length > 3 AND initcap(min_text) = min_text THEN ''''''January'''',''''February'''',''''March'''',''''April'''',''''May'''',''''June'''',''''July'''',''''August'''',''''September'''',''''October'''',''''November'''',''''December'''''' WHEN max_length > 3 AND upper(min_text) = min_text THEN ''''''JANUARY'''',''''FEBRUARY'''',''''MARCH'''',''''APRIL'''',''''MAY'''',''''JUNE'''',''''JULY'''',''''AUGUST'''',''''SEPTEMBER'''',''''OCTOBER'''',''''NOVEMBER'''',''''DECEMBER'''''' WHEN max_length > 3 AND lower(min_text) = min_text THEN ''''''january'''',''''february'''',''''march'''',''''april'''',''''may'''',''''june'''',''''july'''',''''august'''',''''september'''',''''october'''',''''november'''',''''december'''''' WHEN max_length = 3 AND initcap(min_text) = min_text THEN ''''''Jan'''',''''Feb'''',''''Mar'''',''''Apr'''',''''May'''',''''Jun'''',''''Jul'''',''''Aug'''',''''Sep'''',''''Oct'''',''''Nov'''',''''Dec'''''' WHEN max_length = 3 AND upper(min_text) = min_text THEN ''''''JAN'''',''''FEB'''',''''MAR'''',''''APR'''',''''MAY'''',''''JUN'''',''''JUL'''',''''AUG'''',''''SEP'''',''''OCT'''',''''NOV'''',''''DEC'''''' WHEN max_length = 3 AND lower(min_text) = min_text THEN ''''''jan'''',''''feb'''',''''mar'''',''''apr'''',''''may'''',''''jun'''',''''jul'''',''''aug'''',''''sep'''',''''oct'''',''''nov'''',''''dec'''''' WHEN max_length = 2 AND min_text = ''01'' THEN ''''''01'''',''''02'''',''''03'''',''''04'''',''''05'''',''''06'''',''''07'''',''''08'''',''''09'''',''''10'''',''''11'''',''''12'''''' WHEN max_length = 2 AND min_text = ''1'' THEN ''''''1'''',''''2'''',''''3'''',''''4'''',''''5'''',''''6'''',''''7'''',''''8'''',''''9'''',''''10'''',''''11'''',''''12'''''' WHEN min_value = 1 THEN ''1,2,3,4,5,6,7,8,9,10,11,12'' ELSE ''NULL'' END', 'Threshold Invalid Months,Valid Month List', 'The acceptable number of records with invalid months present.|List of valid month values for this field, in quotes if field is numeric, separated by commas.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid months', NULL, 'N'), + ('1043', 'Valid_Characters', 'Valid Characters', 'Column contains no invalid characters', 'Tests for the presence of non-printing characters, leading spaces, or surrounding quotes.', 'Invalid characters, such as non-printing characters, leading spaces, or surrounding quotes, were found.', 'Invalid records', 'Expected count of values with invalid characters', 'general_type = ''A''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '0.75', NULL, NULL, 'threshold_value', '0', NULL, 'The acceptable number of records with invalid character values present.', 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', 'This test looks for the presence of non-printing ASCII characters that are considered non-standard in basic text processing. It also identifies leading spaces and values enclosed in quotes. Values that fail this test may be artifacts of data conversion, or just more difficult to process or analyze downstream.', 'N'), + ('1044', 'Valid_US_Zip', 'Valid US Zip', 'Valid USA Postal Codes', 'Tests that postal codes match the 5 or 9 digit standard US format', 'Invalid US Zip Code formats found.', 'Invalid Zip Codes', 'Expected count of values with invalid Zip Codes', 'functional_data_type = ''Zip''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '0.75', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', NULL, 'Y'), + ('1045', 'Valid_US_Zip3', 'Valid US Zip-3 ', 'Valid USA Zip-3 Prefix', 'Tests that postal codes match the 3 digit format of a regional prefix.', 'Invalid 3-digit US Zip Code regional prefix formats found.', 'Invalid Zip-3 Prefix', 'Expected count of values with invalid Zip-3 Prefix Codes', 'functional_data_type = ''Zip3''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '0.75', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Zip3 Count', 'This test looks for the presence of values that fail to match the three-digit numeric code expected for US Zip Code regional prefixes. These prefixes are often used to roll up Zip Code data to a regional level, and may be critical to anonymize detailed data and protect PID. Depending on your needs and regulatory requirements, longer zip codes could place PID at risk.', 'Y'), + ('1006', 'Condition_Flag', 'Custom Condition', 'Column values match pre-defined condition', 'Tests that each record in the table matches a pre-defined, custom condition', 'Value(s) found not matching defined condition.', 'Values Failing', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', 'Test Focus', 'Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Quantity Consistency` if you are testing that quantity ordered matches quantity shipped.', 'threshold_value,custom_query', NULL, 'Threshold Error Count,Custom SQL Expression (TRUE on error)', 'The number of errors that are acceptable before test fails.|Expression should evaluate to TRUE to register an error or FALSE if no error. An expression can reference only columns in the selected table.', 'Fail', 'CAT', 'custom', 'Validity', 'Schema Drift', 'Count of records that don''t meet test condition', 'Custom Condition is a business-rule test for a user-defined error condition based on the value of one or more columns. The condition is applied to each record within the table, and the count of records failing the condition is added up. If that count exceeds a threshold of errors, the test as a whole is failed. This test is ideal for error conditions that TestGen cannot automatically infer, and any condition that involves the values of more than one column in the same record. Performance of this test is fast, since it is performed together with other aggregate tests. Interpretation is based on the user-defined meaning of the test.', 'Y'), - ('1031', 'Row_Ct', 'Row Count', 'Number of rows is at or above threshold', 'Tests that the count of records has not decreased from the baseline count.', 'Row count less than baseline count.', 'Row count', NULL, 'TEMPLATE', NULL, NULL, 'threshold_value', NULL, 'Threshold Minimum Record Count', NULL, 'Fail', 'CAT', 'table', 'Completeness', 'Volume', 'Expected minimum row count', 'Because this tests the row count against a constant minimum threshold, it''s appropriate for any dataset, as long as the number of rows doesn''t radically change from refresh to refresh. But it''s not responsive to change over time. You may want to adjust the threshold periodically if you are dealing with a cumulative dataset.', 'Y'), - ('1032', 'Row_Ct_Pct', 'Row Range', 'Number of rows within percent range of threshold', 'Tests that the count of records is within a percentage above or below the baseline count.', 'Row Count is outside of threshold percent of baseline count.', 'Percent of baseline', 'Row count percent above or below baseline', 'TEMPLATE', NULL, NULL, 'baseline_ct,threshold_value', NULL, 'Baseline Record Count,Threshold Pct Above or Below Baseline', NULL, 'Fail', 'CAT', 'table', 'Completeness', 'Volume', 'Expected percent window below or above baseline', 'This test is better than Row Count for an incremental or windowed dataset where you would expect the row count to range within a percentage of baseline.', 'Y'), + ('1031', 'Row_Ct', 'Row Count', 'Number of rows is at or above threshold', 'Tests that the count of records has not decreased from the baseline count.', 'Row count less than baseline count.', 'Row count', NULL, 'TEMPLATE', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{THRESHOLD_VALUE}::FLOAT', '1.0', NULL, NULL, 'threshold_value', NULL, 'Threshold Minimum Record Count', NULL, 'Fail', 'CAT', 'table', 'Completeness', 'Volume', 'Expected minimum row count', 'Because this tests the row count against a constant minimum threshold, it''s appropriate for any dataset, as long as the number of rows doesn''t radically change from refresh to refresh. But it''s not responsive to change over time. You may want to adjust the threshold periodically if you are dealing with a cumulative dataset.', 'Y'), + ('1032', 'Row_Ct_Pct', 'Row Range', 'Number of rows within percent range of threshold', 'Tests that the count of records is within a percentage above or below the baseline count.', 'Row Count is outside of threshold percent of baseline count.', 'Percent of baseline', 'Row count percent above or below baseline', 'TEMPLATE', '(100.0 - {RESULT_MEASURE}::FLOAT)/100.0', '1.0', NULL, NULL, 'baseline_ct,threshold_value', NULL, 'Baseline Record Count,Threshold Pct Above or Below Baseline', NULL, 'Fail', 'CAT', 'table', 'Completeness', 'Volume', 'Expected percent window below or above baseline', 'This test is better than Row Count for an incremental or windowed dataset where you would expect the row count to range within a percentage of baseline.', 'Y'), - ('1008', 'CUSTOM', 'Custom Test', 'Custom-defined business rule', 'Custom SQL Query Test', 'Errors were detected according to test definition.', 'Errors found', 'Count of errors identified by query', NULL, 'Test Focus', 'Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Order Total Matches Detail` if you are testing that the total in one table matches the sum of lines in another.', 'custom_query', NULL, 'Custom SQL Query Returning Error Records', 'Query should return records indicating one or more errors. The test passes if no records are returned. Results of the query will be shown when you click `Review Source Data` for a failed test, so be sure to include enough data in your results to follow-up. \n\nA query can refer to any tables in the database. You must hard-code the schema or use `{DATA_SCHEMA}` to represent the schema defined for the Table Group.', 'Fail', 'QUERY', 'custom', 'Accuracy', 'Data Drift', 'Expected count of errors found by custom query', 'This business-rule test is highly flexible, covering any error state that can be expressed by a SQL query against one or more tables in the database. In operation, the user-defined query is embedded within a parent query returning the count of error rows identified. Any row returned by the query is interpreted as a single error condition in the test. Note that this query is run independently of other tests, and that performance will be slower, depending in large part on the efficiency of the query you write. Interpretation is based on the user-defined meaning of the test. Your query might be written to return errors in individual rows identified by joining tables. Or it might return an error based on a multi-column aggregate condition returning a single row if an error is found. This query is run separately when you click `Review Source Data` from Test Results, so be sure to include enough data in your results to follow-up. Interpretation is based on the user-defined meaning of the test.', 'Y'), + ('1008', 'CUSTOM', 'Custom Test', 'Custom-defined business rule', 'Custom SQL Query Test', 'Errors were detected according to test definition.', 'Errors found', 'Count of errors identified by query', NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', 'Test Focus', 'Specify a brief descriptor of the focus of this test that is unique within this Test Suite for the Table and Test Type. This distinguishes this test from others of the same type on the same table. Example: `Order Total Matches Detail` if you are testing that the total in one table matches the sum of lines in another.', 'custom_query', NULL, 'Custom SQL Query Returning Error Records', 'Query should return records indicating one or more errors. The test passes if no records are returned. Results of the query will be shown when you click `Review Source Data` for a failed test, so be sure to include enough data in your results to follow-up. \n\nA query can refer to any tables in the database. You must hard-code the schema or use `{DATA_SCHEMA}` to represent the schema defined for the Table Group.', 'Fail', 'QUERY', 'custom', 'Accuracy', 'Data Drift', 'Expected count of errors found by custom query', 'This business-rule test is highly flexible, covering any error state that can be expressed by a SQL query against one or more tables in the database. In operation, the user-defined query is embedded within a parent query returning the count of error rows identified. Any row returned by the query is interpreted as a single error condition in the test. Note that this query is run independently of other tests, and that performance will be slower, depending in large part on the efficiency of the query you write. Interpretation is based on the user-defined meaning of the test. Your query might be written to return errors in individual rows identified by joining tables. Or it might return an error based on a multi-column aggregate condition returning a single row if an error is found. This query is run separately when you click `Review Source Data` from Test Results, so be sure to include enough data in your results to follow-up. Interpretation is based on the user-defined meaning of the test.', 'Y'), - ('1500', 'Aggregate_Balance', 'Aggregate Balance', 'Aggregate values per group match reference', 'Tests for exact match in aggregate values for each set of column values vs. reference dataset', 'Aggregate measure per set of column values does not exactly match reference dataset.', 'Mismatched measures', NULL, NULL, 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. It''s ideal for confirming that two datasets exactly match -- that the sum of a measure or count of a value hasn''t changed or shifted between categories. Use this test to compare a raw and processed version of the same dataset, or to confirm that an aggregated table exactly matches the detail table that it''s built from. An error here means that one or more value combinations fail to match. New categories or combinations will cause failure.', 'Y'), - ('1501', 'Aggregate_Minimum', 'Aggregate Minimum', 'Aggregate values per group are at or above reference', 'Tests that aggregate values for each set of column values are at least the same as reference dataset', 'Aggregate measure per set of column values is not at least the same as reference dataset.', 'Mismatched measures', NULL, NULL, 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of reference table|Reference table name|Aggregate column expression in reference table (e.g. `SUM(sales)`)|Condition defining a subset of records in reference table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in reference table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in reference table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals below aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations, but requires a match or increase in the aggregate value, rather than an exact match, across two different tables. Both tables must be accessible at the same time. Use this to confirm that aggregate values have not dropped for any set of categories, even if some values may rise. This test is useful to compare an older and newer version of a cumulative dataset. An error here means that one or more values per category set fail to match or exceed the prior dataset. New categories or combinations are allowed (but can be restricted independently with a Combo_Match test). Both tables must be present to run this test.', 'Y'), - ('1502', 'Combo_Match', 'Reference Match', 'Column values or combinations found in reference', 'Tests for the presence of one or a set of column values in a reference table', 'Column value combinations are not found in reference table values.', 'Missing values', NULL, NULL, 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. \n\nDo not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,having_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition,match_having_condition', NULL, 'Record Subset Condition,Group Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns,Matching Record Subset Condition,Matching Group Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in source table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to validate source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in reference table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Validity', 'Schema Drift', 'Expected count of non-matching value combinations', 'This test verifies that values, or combinations of values, that are present in the main table are also found in a reference table. This is a useful test for referential integrity between fact and dimension tables. You can also use it to confirm the validity of a code or category, or of combinations of values that should only be found together within each record, such as product/size/color. An error here means that one or more category combinations in the main table are not found in the reference table. Both tables must be present to run this test.', 'Y'), - ('1503', 'Distribution_Shift', 'Distribution Shift', 'Probability distribution consistent with reference', 'Tests the closeness of match between two distributions of aggregate measures across combinations of column values, using Jensen-Shannon Divergence test', 'Divergence between two distributions exceeds specified threshold.', 'Divergence level (0-1)', 'Jensen-Shannon Divergence, from 0 (identical distributions), to 1.0 (max divergence)', NULL, 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition', NULL, 'Record Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns to Compare,Matching Record Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to compare counts with source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL', 'Warning', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected maximum divergence level between 0 and 1', 'This test measures the similarity of two sets of counts per categories, by using their proportional counts as probability distributions. Using Jensen-Shannon divergence, a measure of relative entropy or difference between two distributions, the test assigns a score ranging from 0, meaning that the distributions are identical, to 1, meaning that the distributions are completely unrelated. This test can be used to compare datasets that may not match exactly, but should have similar distributions. For example, it is a useful sanity check for data from different sources that you would expect to have a consistent spread, such as shipment of building materials per state and construction projects by state. Scores can be compared over time even if the distributions are not identical -- a dataset can be expected to maintain a comparable divergence score with a reference dataset over time. Both tables must be present to run this test.', 'Y'), - ('1508', 'Timeframe_Combo_Gain', 'Timeframe No Drops', 'Latest timeframe has at least all value combinations from prior period', 'Tests that column values in most recent time-window include at least same as prior time window', 'Column values in most recent time-window don''t include all values in prior window.', 'Mismatched values', NULL, NULL, 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Make sure not to use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', 'The date column used to define the time windows. This must be a DATE or DATETIME type.|Length in days of the time window. The test will compare the most recent period of days to the prior period of the same duration.|Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of missing value combinations', 'This test checks a single transactional table to verify that categorical values or combinations that are present in the most recent time window you define include at least all those found in the prior time window of the same duration. Missing values in the latest time window will trigger the test to fail. New values are permitted. Use this test to confirm that codes or categories are not lost across successive time periods in a transactional table.', 'Y'), - ('1509', 'Timeframe_Combo_Match', 'Timeframe Match', 'Column value combinations from latest timeframe same as prior period', 'Tests for presence of same column values in most recent time-window vs. prior time window', 'Column values don''t match in most recent time-windows.', 'Mismatched values', NULL, NULL, 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', NULL, 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of non-matching value combinations', 'This test checks a single transactional table (such as a fact table) to verify that categorical values or combinations that are present in the most recent time window you define match those found in the prior time window of the same duration. New or missing values in the latest time window will trigger the test to fail. Use this test to confirm the consistency in the occurrence of codes or categories across successive time periods in a transactional table.', 'Y'), + ('1500', 'Aggregate_Balance', 'Aggregate Balance', 'Aggregate values per group match reference', 'Tests for exact match in aggregate values for each set of column values vs. reference dataset', 'Aggregate measure per set of column values does not exactly match reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. It''s ideal for confirming that two datasets exactly match -- that the sum of a measure or count of a value hasn''t changed or shifted between categories. Use this test to compare a raw and processed version of the same dataset, or to confirm that an aggregated table exactly matches the detail table that it''s built from. An error here means that one or more value combinations fail to match. New categories or combinations will cause failure.', 'Y'), + ('1501', 'Aggregate_Minimum', 'Aggregate Minimum', 'Aggregate values per group are at or above reference', 'Tests that aggregate values for each set of column values are at least the same as reference dataset', 'Aggregate measure per set of column values is not at least the same as reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of reference table|Reference table name|Aggregate column expression in reference table (e.g. `SUM(sales)`)|Condition defining a subset of records in reference table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in reference table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in reference table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals below aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations, but requires a match or increase in the aggregate value, rather than an exact match, across two different tables. Both tables must be accessible at the same time. Use this to confirm that aggregate values have not dropped for any set of categories, even if some values may rise. This test is useful to compare an older and newer version of a cumulative dataset. An error here means that one or more values per category set fail to match or exceed the prior dataset. New categories or combinations are allowed (but can be restricted independently with a Combo_Match test). Both tables must be present to run this test.', 'Y'), + ('1502', 'Combo_Match', 'Reference Match', 'Column values or combinations found in reference', 'Tests for the presence of one or a set of column values in a reference table', 'Column value combinations are not found in reference table values.', 'Missing values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. \n\nDo not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,having_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition,match_having_condition', NULL, 'Record Subset Condition,Group Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns,Matching Record Subset Condition,Matching Group Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in source table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to validate source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in reference table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Validity', 'Schema Drift', 'Expected count of non-matching value combinations', 'This test verifies that values, or combinations of values, that are present in the main table are also found in a reference table. This is a useful test for referential integrity between fact and dimension tables. You can also use it to confirm the validity of a code or category, or of combinations of values that should only be found together within each record, such as product/size/color. An error here means that one or more category combinations in the main table are not found in the reference table. Both tables must be present to run this test.', 'Y'), + ('1503', 'Distribution_Shift', 'Distribution Shift', 'Probability distribution consistent with reference', 'Tests the closeness of match between two distributions of aggregate measures across combinations of column values, using Jensen-Shannon Divergence test', 'Divergence between two distributions exceeds specified threshold.', 'Divergence level (0-1)', 'Jensen-Shannon Divergence, from 0 (identical distributions), to 1.0 (max divergence)', NULL, '1', '0.75', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition', NULL, 'Record Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns to Compare,Matching Record Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to compare counts with source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL', 'Warning', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected maximum divergence level between 0 and 1', 'This test measures the similarity of two sets of counts per categories, by using their proportional counts as probability distributions. Using Jensen-Shannon divergence, a measure of relative entropy or difference between two distributions, the test assigns a score ranging from 0, meaning that the distributions are identical, to 1, meaning that the distributions are completely unrelated. This test can be used to compare datasets that may not match exactly, but should have similar distributions. For example, it is a useful sanity check for data from different sources that you would expect to have a consistent spread, such as shipment of building materials per state and construction projects by state. Scores can be compared over time even if the distributions are not identical -- a dataset can be expected to maintain a comparable divergence score with a reference dataset over time. Both tables must be present to run this test.', 'Y'), + ('1508', 'Timeframe_Combo_Gain', 'Timeframe No Drops', 'Latest timeframe has at least all value combinations from prior period', 'Tests that column values in most recent time-window include at least same as prior time window', 'Column values in most recent time-window don''t include all values in prior window.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Make sure not to use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', 'The date column used to define the time windows. This must be a DATE or DATETIME type.|Length in days of the time window. The test will compare the most recent period of days to the prior period of the same duration.|Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of missing value combinations', 'This test checks a single transactional table to verify that categorical values or combinations that are present in the most recent time window you define include at least all those found in the prior time window of the same duration. Missing values in the latest time window will trigger the test to fail. New values are permitted. Use this test to confirm that codes or categories are not lost across successive time periods in a transactional table.', 'Y'), + ('1509', 'Timeframe_Combo_Match', 'Timeframe Match', 'Column value combinations from latest timeframe same as prior period', 'Tests for presence of same column values in most recent time-window vs. prior time window', 'Column values don''t match in most recent time-windows.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', NULL, 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of non-matching value combinations', 'This test checks a single transactional table (such as a fact table) to verify that categorical values or combinations that are present in the most recent time window you define match those found in the prior time window of the same duration. New or missing values in the latest time window will trigger the test to fail. Use this test to confirm the consistency in the occurrence of codes or categories across successive time periods in a transactional table.', 'Y'), - ('1504', 'Aggregate_Pct_Above', 'Aggregate Pct Above', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals with not exceeding aggregate measure', NULL, 'N'), - ('1505', 'Aggregate_Pct_Within', 'Aggregate Pct Within', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals with not exceeding aggregate measure', NULL, 'N'), - ('1506', 'Aggregate_Increase', 'Aggregate Increase', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals below reference value', NULL, 'N') + ('1504', 'Aggregate_Pct_Above', 'Aggregate Pct Above', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals with not exceeding aggregate measure', NULL, 'N'), + ('1505', 'Aggregate_Pct_Within', 'Aggregate Pct Within', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals with not exceeding aggregate measure', NULL, 'N'), + ('1506', 'Aggregate_Increase', 'Aggregate Increase', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals below reference value', NULL, 'N') ; diff --git a/testgen/template/dbsetup/060_create_standard_views.sql b/testgen/template/dbsetup/060_create_standard_views.sql index 9ec8331..fbbf2f1 100644 --- a/testgen/template/dbsetup/060_create_standard_views.sql +++ b/testgen/template/dbsetup/060_create_standard_views.sql @@ -133,7 +133,7 @@ SELECT p.project_name, ELSE 'Passed' END as disposition, r.result_code as passed_ct, - (1 - r.result_code)::INTEGER as exception_ct, + (1 - COALESCE(r.result_code, 0))::INTEGER as exception_ct, CASE WHEN result_status = 'Warning' AND result_message NOT ILIKE 'Inactivated%' THEN 1 diff --git a/testgen/template/dbupgrade/0120_incremental_upgrade.sql b/testgen/template/dbupgrade/0120_incremental_upgrade.sql new file mode 100644 index 0000000..0081e19 --- /dev/null +++ b/testgen/template/dbupgrade/0120_incremental_upgrade.sql @@ -0,0 +1,133 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +ALTER TABLE test_types + ADD COLUMN dq_score_prevalence_formula TEXT, + ADD COLUMN dq_score_risk_factor TEXT; + +ALTER TABLE test_suites + ADD COLUMN last_complete_test_run_id UUID, + ADD COLUMN dq_score_exclude BOOLEAN default FALSE; + +ALTER TABLE profile_anomaly_results + ADD COLUMN dq_prevalence FLOAT; + +ALTER TABLE profiling_runs + ADD COLUMN dq_affected_data_points BIGINT, + ADD COLUMN dq_total_data_points BIGINT, + ADD COLUMN dq_score_profiling FLOAT; + +ALTER TABLE test_results + ADD COLUMN dq_prevalence FLOAT, + ADD COLUMN dq_record_ct BIGINT; + +ALTER TABLE test_runs + ADD COLUMN dq_affected_data_points BIGINT, + ADD COLUMN dq_total_data_points BIGINT, + ADD COLUMN dq_score_test_run FLOAT; + +ALTER TABLE table_groups + ADD COLUMN last_complete_profile_run_id UUID, + ADD COLUMN dq_score_profiling FLOAT, + ADD COLUMN dq_score_testing FLOAT; + +ALTER TABLE data_table_chars + ADD COLUMN last_complete_profile_run_id UUID, + ADD COLUMN dq_score_profiling FLOAT, + ADD COLUMN dq_score_testing FLOAT; + +ALTER TABLE data_column_chars + ADD COLUMN last_complete_profile_run_id UUID, + ADD COLUMN dq_score_profiling FLOAT, + ADD COLUMN dq_score_testing FLOAT; + + +ALTER TABLE profile_results + ADD COLUMN upper_case_ct BIGINT, + ADD COLUMN lower_case_ct BIGINT, + ADD COLUMN non_alpha_ct BIGINT, + ADD COLUMN mixed_case_ct BIGINT GENERATED ALWAYS AS ( value_ct - upper_case_ct - lower_case_ct - non_alpha_ct ) STORED, + ADD COLUMN before_100yr_date_ct BIGINT, + ADD COLUMN distant_future_date_ct BIGINT; + + +CREATE OR REPLACE FUNCTION fn_normal_cdf(z_score DOUBLE PRECISION) +RETURNS DOUBLE PRECISION AS +$$ +/* + This function calculates the cumulative distribution function (CDF) + for the standard normal distribution for a given Z-score using + the Abramowitz and Stegun approximation method. It returns the + probability that a standard normal variable is less than or equal + to the given Z-score. + + The approximation formula uses a series expansion to estimate the + CDF, which is accurate for most practical purposes. + + To estimate the count of observations that fall outside a certain Z-score + (both above and below), you can use the `normal_cdf()` function. For a + total number of observations N, the proportion of values outside the Z-score + is given by: 2 * (1 - normal_cdf(ABS(Z))) + + This gives the proportion of values greater than the positive Z-score and + less than the negative Z-score combined. To get the estimated count of + observations, multiply this proportion by N: N * 2 * (1 - normal_cdf(ABS(Z))) +*/ +DECLARE + t DOUBLE PRECISION; + cdf DOUBLE PRECISION; +BEGIN + t := 1.0 / (1.0 + 0.2316419 * ABS(z_score)); + + cdf := (1.0 / SQRT(2 * PI())) * EXP(-0.5 * z_score * z_score) * + (0.319381530 * t + - 0.356563782 * t * t + + 1.781477937 * t * t * t + - 1.821255978 * t * t * t * t + + 1.330274429 * t * t * t * t * t); + + IF z_score >= 0 THEN + RETURN 1.0 - cdf; + ELSE + RETURN cdf; + END IF; +END; +$$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION fn_eval(expression TEXT) RETURNS FLOAT +AS +$$ +DECLARE + result FLOAT; + invalid_parts TEXT; +BEGIN + -- Check the modified expression for invalid characters, allowing colons + IF expression ~* E'[^0-9+\\-*/(),.\\sA-Z_:e\\\'"]' THEN + RAISE EXCEPTION 'Invalid characters detected in expression: %', expression; + END IF; + + -- Check for dangerous PostgreSQL-specific keywords + IF expression ~* E'\b(DROP|ALTER|INSERT|UPDATE|DELETE|TRUNCATE|GRANT|REVOKE|COPY|EXECUTE|CREATE|COMMENT|SECURITY|WITH|SET ROLE|SET SESSION|DO|CALL|--|/\\*|;|pg_read_file|pg_write_file|pg_terminate_backend)\b' THEN + RAISE EXCEPTION 'Invalid expression: dangerous statement detected'; + END IF; + + -- Remove all allowed tokens from the validation expression, treating 'FLOAT' as a keyword + invalid_parts := regexp_replace( + expression, + E'(\\mGREATEST|LEAST|ABS|FN_NORMAL_CDF|DATEDIFF|DAY|FLOAT)\\M|[0-9]+(\\.[0-9]+)?([eE][+-]?[0-9]+)?|[+\\-*/(),\\\'":]+|\\s+', + '', + 'gi' + ); + + -- If anything is left in the validation expression, it's invalid + IF invalid_parts <> '' THEN + RAISE EXCEPTION 'Invalid tokens "%" in expression: %', invalid_parts, expression; + END IF; + + -- Use the original expression (with ::FLOAT) for execution + EXECUTE format('SELECT (%s)::FLOAT', expression) INTO result; + + RETURN result; +END; +$$ +LANGUAGE plpgsql; diff --git a/testgen/template/execution/ex_finalize_test_run_results.sql b/testgen/template/execution/ex_finalize_test_run_results.sql index e4d1d6e..c9f187c 100644 --- a/testgen/template/execution/ex_finalize_test_run_results.sql +++ b/testgen/template/execution/ex_finalize_test_run_results.sql @@ -3,6 +3,7 @@ UPDATE test_results severity = COALESCE(d.severity, s.severity, tt.default_severity), threshold_value = COALESCE(r.threshold_value, d.threshold_value), result_status = CASE + WHEN r.result_status = 'Error' THEN 'Error' WHEN r.result_code = 1 THEN 'Passed' WHEN r.result_code = 0 AND COALESCE(d.severity, s.severity, tt.default_severity) = 'Warning' THEN 'Warning' @@ -31,3 +32,68 @@ INNER JOIN test_definitions d ON r.test_definition_id = d.id INNER JOIN test_types tt ON r.test_type = tt.test_type WHERE r.test_run_id = '{TEST_RUN_ID}' AND test_results.id = r.id; + +-- ============================================================================== +-- | Data Quality Scoring +-- | - Prevalence % * dq_score_risk_factor = calculated prevalence % +-- | - Save with total datapoints (record count). +-- | - When scoring, calculate SUM(calculated prevalence * record count) +-- | / SUM(record count) +-- ============================================================================== + +-- UPDATE prevalence to zero for all passed or excluded tests +UPDATE test_results + SET dq_record_ct = tc.record_ct, + dq_prevalence = 0 + FROM test_results r +INNER JOIN data_table_chars tc + ON (r.table_groups_id = tc.table_groups_id + AND r.table_name ILIKE tc.table_name) + WHERE r.test_run_id = '{TEST_RUN_ID}'::UUID + AND ( r.result_code = 1 + OR r.disposition IN ('Dismissed', 'Inactive') ) + AND test_results.id = r.id; + +-- UPDATE TO calculated prevalence for all fails/warnings - result_code = 0 +WITH result_calc + AS ( SELECT r.id, + tt.dq_score_risk_factor::FLOAT as risk_calc, + REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( + REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( + tt.dq_score_prevalence_formula, + '{RESULT_MEASURE}', COALESCE(r.result_measure::VARCHAR, '')), + '{THRESHOLD_VALUE}', COALESCE(r.threshold_value::VARCHAR, '')), + + '{PRO_RECORD_CT}', COALESCE(p.record_ct::VARCHAR, '')), + '{DATE_DAYS_PRESENT}', COALESCE(p.date_days_present::VARCHAR, '')), + '{DATE_MONTHS_PRESENT}', COALESCE(p.date_months_present::VARCHAR, '')), + '{DATE_WEEKS_PRESENT}', COALESCE(p.date_weeks_present::VARCHAR, '')), + '{MIN_DATE}', COALESCE(p.min_date::VARCHAR, '')), + '{MAX_DATE}', COALESCE(p.max_date::VARCHAR, '')), + '{DISTINCT_VALUE_CT}', COALESCE(p.distinct_value_ct::VARCHAR, '')), + '{VALUE_CT}', COALESCE(p.value_ct::VARCHAR, '')), + '{MAX_LENGTH}', COALESCE(p.max_length::VARCHAR, '')), + '{AVG_LENGTH}', COALESCE(p.avg_length::VARCHAR, '')), + + '{RECORD_CT}', COALESCE(r.dq_record_ct::VARCHAR, tc.record_ct::VARCHAR, '')) + as built_score_prevalance_formula, + COALESCE(r.dq_record_ct, tc.record_ct) as dq_record_ct + FROM test_results r + INNER JOIN test_types tt + ON r.test_type = tt.test_type + LEFT JOIN v_latest_profile_results p + ON (r.table_groups_id = p.table_groups_id + AND r.table_name = p.table_name + AND r.column_names = p.column_name) + LEFT JOIN data_table_chars tc + ON (r.table_groups_id = tc.table_groups_id + AND r.table_name ILIKE tc.table_name) + WHERE r.test_run_id = '{TEST_RUN_ID}'::UUID + AND result_code = 0 + AND NOT COALESCE(disposition, '') IN ('Dismissed', 'Inactive') ) +UPDATE test_results + SET dq_record_ct = c.dq_record_ct, + dq_prevalence = risk_calc * fn_eval(c.built_score_prevalance_formula) + FROM result_calc c + WHERE test_results.id = c.id; + diff --git a/testgen/template/execution/ex_update_test_suite.sql b/testgen/template/execution/ex_update_test_suite.sql new file mode 100644 index 0000000..68283f1 --- /dev/null +++ b/testgen/template/execution/ex_update_test_suite.sql @@ -0,0 +1,13 @@ +WITH last_run + AS (SELECT test_suite_id, MAX(test_starttime) as max_starttime + FROM test_runs + WHERE test_suite_id = '{TEST_SUITE_ID}' + AND status = 'Complete' + GROUP BY test_suite_id) +UPDATE test_suites + SET last_complete_test_run_id = r.id + FROM test_runs r +INNER JOIN last_run l + ON (r.test_suite_id = l.test_suite_id + AND r.test_starttime = l.max_starttime) + WHERE test_suites.id = r.test_suite_id; \ No newline at end of file diff --git a/testgen/template/execution/test_scoring_rollup.sql b/testgen/template/execution/test_scoring_rollup.sql new file mode 100644 index 0000000..30c2798 --- /dev/null +++ b/testgen/template/execution/test_scoring_rollup.sql @@ -0,0 +1,123 @@ +-- Roll up scoring to test run +WITH score_detail + AS (SELECT tr.test_run_id, tr.table_name, tr.column_names, + MAX(tr.dq_record_ct) as row_ct, + SUM(COALESCE(tr.dq_prevalence * tr.dq_record_ct, 0)) as affected_data_points + FROM test_results tr + INNER JOIN test_runs r + ON tr.test_run_id = r.id + WHERE tr.test_run_id = '{TEST_RUN_ID}' + AND COALESCE(tr.disposition, 'Confirmed') = 'Confirmed' + GROUP BY tr.test_run_id, tr.table_name, tr.column_names ), +score_calc + AS ( SELECT test_run_id, + SUM(affected_data_points) as sum_affected_data_points, + SUM(row_ct) as sum_data_points + FROM score_detail + GROUP BY test_run_id ) +UPDATE test_runs + SET dq_affected_data_points = sum_affected_data_points, + dq_total_data_points = sum_data_points, + dq_score_test_run = 100.0 - sum_affected_data_points / sum_data_points + FROM score_calc + WHERE test_runs.id = score_calc.test_run_id; + + + +-- Roll up scores from latest Test Runs per Test Suite to Table Group +WITH last_test_date + AS (SELECT r.test_suite_id, MAX(r.test_starttime) as last_test_run_date + FROM test_runs r + WHERE r.status = 'Complete' + GROUP BY r.test_suite_id), +score_calc + AS (SELECT ts.table_groups_id, + SUM(run.dq_affected_data_points) as sum_affected_data_points, + SUM(run.dq_total_data_points) as sum_data_points + FROM test_runs run + INNER JOIN test_suites ts + ON (run.test_suite_id = ts.id) + INNER JOIN last_test_date lp + ON (run.test_suite_id = lp.test_suite_id + AND run.test_starttime = lp.last_test_run_date) + WHERE ts.table_groups_id = '{TABLE_GROUPS_ID}' + AND ts.dq_score_exclude = FALSE + GROUP BY ts.table_groups_id) +UPDATE table_groups + SET dq_score_testing = 100.0 - s.sum_affected_data_points::FLOAT / s.sum_data_points::FLOAT + FROM score_calc s + WHERE table_groups.id = s.table_groups_id; + +-- Roll up latest scores to data_column_chars +WITH last_test_date + AS (SELECT r.test_suite_id, MAX(r.test_starttime) as last_test_run_date + FROM test_runs r + WHERE r.status = 'Complete' + GROUP BY r.test_suite_id), +score_calc + AS (SELECT dcc.column_id, + -- Use AVG instead of MAX because column counts may differ by test_run + AVG(tr.dq_record_ct) as row_ct, + -- Use SUM to combine impact of all fails per column + SUM(COALESCE(tr.dq_prevalence * tr.dq_record_ct, 0)) as affected_data_points + FROM test_results tr + INNER JOIN test_runs r + ON tr.test_run_id = r.id + INNER JOIN last_test_date lp + ON (r.test_suite_id = lp.test_suite_id + AND r.test_starttime = lp.last_test_run_date) + INNER JOIN test_suites ts + ON (r.test_suite_id = ts.id) + INNER JOIN data_column_chars dcc + ON (ts.table_groups_id = dcc.table_groups_id + AND tr.table_name = dcc.table_name + AND tr.column_names = dcc.column_name) + WHERE ts.table_groups_id = '{TABLE_GROUPS_ID}' + AND ts.dq_score_exclude = FALSE + AND COALESCE(tr.disposition, 'Confirmed') = 'Confirmed' + GROUP BY dcc.column_id ) +UPDATE data_column_chars + SET dq_score_testing = 100.0 - affected_data_points / row_ct + FROM score_calc s + WHERE data_column_chars.column_id = s.column_id; + + + +-- Roll up latest scores to data_table_chars +WITH last_test_date + AS (SELECT r.test_suite_id, MAX(r.test_starttime) as last_test_run_date + FROM test_runs r + WHERE r.status = 'Complete' + GROUP BY r.test_suite_id), +score_detail + AS (SELECT dcc.table_id, dcc.column_id, + -- Use AVG instead of MAX because column counts may differ by test_run + AVG(tr.dq_record_ct) as row_ct, + -- Use SUM to combine impact of all fails per column + SUM(COALESCE(tr.dq_prevalence * tr.dq_record_ct, 0)) as affected_data_points + FROM test_results tr + INNER JOIN test_runs r + ON tr.test_run_id = r.id + INNER JOIN last_test_date lp + ON (r.test_suite_id = lp.test_suite_id + AND r.test_starttime = lp.last_test_run_date) + INNER JOIN test_suites ts + ON (r.test_suite_id = ts.id) + INNER JOIN data_column_chars dcc + ON (ts.table_groups_id = dcc.table_groups_id + AND tr.table_name = dcc.table_name + AND tr.column_names = dcc.column_name) + WHERE ts.table_groups_id = '{TABLE_GROUPS_ID}' + AND ts.dq_score_exclude = FALSE + AND COALESCE(tr.disposition, 'Confirmed') = 'Confirmed' + GROUP BY table_id, dcc.column_id ), +score_calc + AS (SELECT table_id, + SUM(affected_data_points) as sum_affected_data_points, + SUM(row_ct) as sum_data_points + FROM score_detail + GROUP BY table_id) +UPDATE data_table_chars + SET dq_score_testing = 100.0 - sum_affected_data_points / sum_data_points + FROM score_calc s + WHERE data_table_chars.table_id = s.table_id; \ No newline at end of file diff --git a/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml b/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml index 5c5e433..8ca20a1 100644 --- a/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +++ b/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml @@ -57,6 +57,20 @@ strTemplate05_A: COUNT(DISTINCT UPPER(REPLACE(TRANSLATE("{COL_NAME}",' '''',.-', END ) AS filled_value_ct, LEFT(MIN(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS min_text, LEFT(MAX(NULLIF("{COL_NAME}", '') COLLATE Latin1_General_BIN ), 100) AS max_text, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 0 + WHEN TRANSLATE("{COL_NAME}", 'abcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS upper_case_ct, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 0 + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', ' ') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS lower_case_ct, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', ' ') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS non_alpha_ct, SUM({{DKFN_IS_NUM;;LEFT("{COL_NAME}", 31)}} ) AS numeric_ct, SUM({{DKFN_IS_DATE;;LEFT("{COL_NAME}", 26)}} ) AS date_ct, CASE @@ -107,6 +121,9 @@ strTemplate05_else: NULL as distinct_std_value_ct, NULL as filled_value_ct, NULL as min_text, NULL as max_text, + NULL as upper_case_ct, + NULL as lower_case_ct, + NULL as non_alpha_ct, NULL as numeric_ct, NULL as date_ct, NULL as std_pattern_match, @@ -175,6 +192,10 @@ strTemplate11_D: CASE WHEN DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1 ELSE 0 END) AS before_20yr_date_ct, + SUM(CASE + WHEN DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}') > 1200 THEN 1 + ELSE 0 + END) AS before_100yr_date_ct, SUM(CASE WHEN DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 ELSE 0 @@ -186,6 +207,10 @@ strTemplate11_D: CASE SUM(CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0 END) AS future_date_ct, + SUM(CASE + WHEN DATEDIFF(month, '{RUN_DATE}', "{COL_NAME}") > 240 THEN 1 + ELSE 0 + END) AS distant_future_date_ct, COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present, COUNT(DISTINCT DATEDIFF(week, "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present, COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present, @@ -195,9 +220,11 @@ strTemplate11_else: NULL as min_date, NULL as before_1yr_date_ct, NULL as before_5yr_date_ct, NULL as before_20yr_date_ct, + NULL AS before_100yr_date_ct, NULL as within_1yr_date_ct, NULL as within_1mo_date_ct, NULL as future_date_ct, + NULL as distant_future_date_ct, NULL as date_days_present, NULL as date_weeks_present, NULL as date_months_present, diff --git a/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml b/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml index e32c609..746c25f 100644 --- a/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +++ b/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml @@ -51,6 +51,20 @@ strTemplate05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a END ) AS filled_value_ct, LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text, LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 0 + WHEN TRANSLATE("{COL_NAME}", 'abcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS upper_case_ct, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 0 + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS lower_case_ct, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS non_alpha_ct, SUM({{DKFN_IS_NUM;;LEFT("{COL_NAME}", 31)}} ) AS numeric_ct, SUM({{DKFN_IS_DATE;;LEFT("{COL_NAME}", 26)}} ) AS date_ct, CASE @@ -84,6 +98,9 @@ strTemplate05_else: NULL as distinct_std_value_ct, NULL as filled_value_ct, NULL as min_text, NULL as max_text, + NULL as upper_case_ct, + NULL as lower_case_ct, + NULL as non_alpha_ct, NULL as numeric_ct, NULL as date_ct, NULL as std_pattern_match, @@ -153,6 +170,10 @@ strTemplate11_D: CASE WHEN {{DKFN_DATEDIFF_MONTH;;"{COL_NAME}";;'{RUN_DATE}'}} > 240 THEN 1 ELSE 0 END) AS before_20yr_date_ct, + SUM(CASE + WHEN {{DKFN_DATEDIFF_MONTH;;"{COL_NAME}";;'{RUN_DATE}'}} > 1200 THEN 1 + ELSE 0 + END) AS before_100yr_date_ct, SUM(CASE WHEN {{DKFN_DATEDIFF_DAY;;"{COL_NAME}";;'{RUN_DATE}'}} BETWEEN 0 AND 365 THEN 1 ELSE 0 @@ -164,6 +185,10 @@ strTemplate11_D: CASE SUM(CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0 END) AS future_date_ct, + SUM(CASE + WHEN {{DKFN_DATEDIFF_MONTH;;'{RUN_DATE}';;"{COL_NAME}"}} > 240 THEN 1 + ELSE 0 + END) AS distant_future_date_ct, COUNT(DISTINCT {{DKFN_DATEDIFF_DAY;;"{COL_NAME}";;'{RUN_DATE}'}} ) as date_days_present, COUNT(DISTINCT {{DKFN_DATEDIFF_WEEK;;"{COL_NAME}";;'{RUN_DATE}'}} ) as date_weeks_present, COUNT(DISTINCT {{DKFN_DATEDIFF_MONTH;;"{COL_NAME}";;'{RUN_DATE}'}} ) as date_months_present, @@ -174,9 +199,11 @@ strTemplate11_else: NULL as min_date, NULL as before_1yr_date_ct, NULL as before_5yr_date_ct, NULL as before_20yr_date_ct, + NULL AS before_100yr_date_ct, NULL as within_1yr_date_ct, NULL as within_1mo_date_ct, NULL as future_date_ct, + NULL as distant_future_date_ct, NULL as date_days_present, NULL as date_weeks_present, NULL as date_months_present, diff --git a/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml b/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml index b876a4d..e54bdf4 100644 --- a/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +++ b/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml @@ -51,6 +51,20 @@ strTemplate05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a END ) AS filled_value_ct, LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text, LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 0 + WHEN TRANSLATE("{COL_NAME}", 'abcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS upper_case_ct, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 0 + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS lower_case_ct, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS non_alpha_ct, SUM({{DKFN_IS_NUM;;LEFT("{COL_NAME}", 31)}} ) AS numeric_ct, SUM({{DKFN_IS_DATE;;LEFT("{COL_NAME}", 26)}} ) AS date_ct, CASE @@ -84,6 +98,9 @@ strTemplate05_else: NULL as distinct_std_value_ct, NULL as filled_value_ct, NULL as min_text, NULL as max_text, + NULL as upper_case_ct, + NULL as lower_case_ct, + NULL as non_alpha_ct, NULL as numeric_ct, NULL as date_ct, NULL as std_pattern_match, @@ -150,6 +167,10 @@ strTemplate11_D: CASE WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1 ELSE 0 END) AS before_20yr_date_ct, + SUM(CASE + WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 1200 THEN 1 + ELSE 0 + END) AS before_100yr_date_ct, SUM(CASE WHEN DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 ELSE 0 @@ -161,6 +182,10 @@ strTemplate11_D: CASE SUM(CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0 END) AS future_date_ct, + SUM(CASE + WHEN DATEDIFF('MON', '{RUN_DATE}', "{COL_NAME}") > 240 THEN 1 + ELSE 0 + END) AS distant_future_date_ct, COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present, COUNT(DISTINCT DATEDIFF(week, "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present, COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present, @@ -170,9 +195,11 @@ strTemplate11_else: NULL as min_date, NULL as before_1yr_date_ct, NULL as before_5yr_date_ct, NULL as before_20yr_date_ct, + NULL AS before_100yr_date_ct, NULL as within_1yr_date_ct, NULL as within_1mo_date_ct, NULL as future_date_ct, + NULL as distant_future_date_ct, NULL as date_days_present, NULL as date_weeks_present, NULL as date_months_present, diff --git a/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml b/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml index 4538d10..f0a784f 100644 --- a/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +++ b/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml @@ -52,6 +52,20 @@ strTemplate05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a END ) AS filled_value_ct, LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text, LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 0 + WHEN TRANSLATE("{COL_NAME}", 'abcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS upper_case_ct, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 0 + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS lower_case_ct, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS non_alpha_ct, SUM({{DKFN_IS_NUM;;LEFT("{COL_NAME}", 31)}} ) AS numeric_ct, SUM({{DKFN_IS_DATE;;LEFT("{COL_NAME}", 26)}} ) AS date_ct, CASE @@ -85,6 +99,9 @@ strTemplate05_else: NULL as distinct_std_value_ct, NULL as filled_value_ct, NULL as min_text, NULL as max_text, + NULL as upper_case_ct, + NULL as lower_case_ct, + NULL as non_alpha_ct, NULL as numeric_ct, NULL as date_ct, NULL as std_pattern_match, @@ -149,6 +166,10 @@ strTemplate11_D: GREATEST(MIN("{COL_NAME}"), '0001-01-01') as min_date, WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1 ELSE 0 END) AS before_20yr_date_ct, + SUM(CASE + WHEN DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 1200 THEN 1 + ELSE 0 + END) AS before_100yr_date_ct, SUM(CASE WHEN DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 ELSE 0 @@ -160,6 +181,10 @@ strTemplate11_D: GREATEST(MIN("{COL_NAME}"), '0001-01-01') as min_date, SUM(CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0 END) AS future_date_ct, + SUM(CASE + WHEN DATEDIFF('MON', '{RUN_DATE}', "{COL_NAME}") > 240 THEN 1 + ELSE 0 + END) AS distant_future_date_ct, COUNT(DISTINCT DATEDIFF(day, "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present, COUNT(DISTINCT DATEDIFF(week, "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present, COUNT(DISTINCT DATEDIFF(month, "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present, @@ -169,9 +194,11 @@ strTemplate11_else: NULL as min_date, NULL as before_1yr_date_ct, NULL as before_5yr_date_ct, NULL as before_20yr_date_ct, + NULL AS before_100yr_date_ct, NULL as within_1yr_date_ct, NULL as within_1mo_date_ct, NULL as future_date_ct, + NULL as distant_future_date_ct, NULL as date_days_present, NULL as date_weeks_present, NULL as date_months_present, diff --git a/testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml b/testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml index 0968a2d..87b216f 100644 --- a/testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +++ b/testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml @@ -51,6 +51,20 @@ strTemplate05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a END ) AS filled_value_ct, SUBSTRING(MIN(NULLIF("{COL_NAME}", '')), 1, 100) AS min_text, SUBSTRING(MAX(NULLIF("{COL_NAME}", '')), 1, 100) AS max_text, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 0 + WHEN TRANSLATE("{COL_NAME}", 'abcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS upper_case_ct, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 0 + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS lower_case_ct, + SUM(CASE + WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1 + ELSE 0 + END) AS non_alpha_ct, SUM(fndk_isnum(SUBSTRING("{COL_NAME}", 1, 31))) AS numeric_ct, SUM(fndk_isdate(SUBSTRING("{COL_NAME}", 1, 26))) AS date_ct, CASE @@ -84,6 +98,9 @@ strTemplate05_else: NULL as distinct_std_value_ct, NULL as filled_value_ct, NULL as min_text, NULL as max_text, + NULL as upper_case_ct, + NULL as lower_case_ct, + NULL as non_alpha_ct, NULL as numeric_ct, NULL as date_ct, NULL as std_pattern_match, @@ -148,6 +165,10 @@ strTemplate11_D: CASE WHEN DATE_DIFF('MONTH', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}') > 240 THEN 1 ELSE 0 END) AS before_20yr_date_ct, + SUM(CASE + WHEN DATE_DIFF('MONTH', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}') > 1200 THEN 1 + ELSE 0 + END) AS before_100yr_date_ct, SUM(CASE WHEN DATE_DIFF('DAY', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1 ELSE 0 @@ -159,6 +180,10 @@ strTemplate11_D: CASE SUM(CASE WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0 END) AS future_date_ct, + SUM(CASE + WHEN DATE_DIFF('MONTH', TIMESTAMP '{RUN_DATE}', TIMESTAMP "{COL_NAME}") > 240 THEN 1 + ELSE 0 + END) AS distant_future_date_ct, COUNT(DISTINCT DATE_DIFF('day', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}' ) ) as date_days_present, COUNT(DISTINCT DATE_DIFF('week', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}' ) ) as date_weeks_present, COUNT(DISTINCT DATE_DIFF('month', TIMESTAMP "{COL_NAME}", TIMESTAMP '{RUN_DATE}' ) ) as date_months_present, @@ -168,9 +193,11 @@ strTemplate11_else: NULL as min_date, NULL as before_1yr_date_ct, NULL as before_5yr_date_ct, NULL as before_20yr_date_ct, + NULL AS before_100yr_date_ct, NULL as within_1yr_date_ct, NULL as within_1mo_date_ct, NULL as future_date_ct, + NULL as distant_future_date_ct, NULL as date_days_present, NULL as date_weeks_present, NULL as date_months_present, diff --git a/testgen/template/parms/parms_test_execution.sql b/testgen/template/parms/parms_test_execution.sql index 204b49c..d39b644 100644 --- a/testgen/template/parms/parms_test_execution.sql +++ b/testgen/template/parms/parms_test_execution.sql @@ -1,6 +1,7 @@ SELECT ts.project_code, ts.connection_id::VARCHAR, ts.id::VARCHAR as test_suite_id, + ts.table_groups_id::VARCHAR, tg.table_group_schema, cc.sql_flavor, cc.project_host, diff --git a/testgen/template/profiling/functional_datatype.sql b/testgen/template/profiling/functional_datatype.sql index a74cfb4..b7822a5 100644 --- a/testgen/template/profiling/functional_datatype.sql +++ b/testgen/template/profiling/functional_datatype.sql @@ -232,6 +232,7 @@ WHERE profile_run_id = '{PROFILE_RUN_ID}' UPDATE profile_results SET functional_data_type = CASE WHEN (std_pattern_match = 'ZIP_USA' AND (column_name ILIKE '%zip%' OR column_name ILIKE '%postal%')) + OR (lower(column_name) IN ('ZIP_CODE', 'ZIP')) THEN 'Zip' WHEN std_pattern_match = 'EMAIL' THEN 'Email' diff --git a/testgen/template/profiling/profile_anomaly_scoring.sql b/testgen/template/profiling/profile_anomaly_scoring.sql new file mode 100644 index 0000000..9511c12 --- /dev/null +++ b/testgen/template/profiling/profile_anomaly_scoring.sql @@ -0,0 +1,10 @@ +UPDATE profile_anomaly_results r + SET dq_prevalence = ({PREV_FORMULA}) * {RISK} + FROM profile_anomaly_results r2 +INNER JOIN profile_results p + ON (r2.profile_run_id = p.profile_run_id + AND r2.table_name = p.table_name + AND r2.column_name = p.column_name) + WHERE r.profile_run_id = '{PROFILE_RUN_ID}'::UUID + AND r2.anomaly_id = '{ANOMALY_ID}' + AND r.id = r2.id; \ No newline at end of file diff --git a/testgen/template/profiling/profile_anomaly_scoring_rollup.sql b/testgen/template/profiling/profile_anomaly_scoring_rollup.sql new file mode 100644 index 0000000..9c7047b --- /dev/null +++ b/testgen/template/profiling/profile_anomaly_scoring_rollup.sql @@ -0,0 +1,109 @@ +-- Roll up scoring to profiling run +WITH score_detail + AS (SELECT pr.profile_run_id, pr.table_name, pr.column_name, + MAX(pr.record_ct) as row_ct, + SUM(COALESCE(p.dq_prevalence * pr.record_ct, 0)) as affected_data_points + FROM profile_results pr + INNER JOIN profiling_runs r + ON (pr.profile_run_id = r.id) + LEFT JOIN profile_anomaly_results p + ON (pr.profile_run_id = p.profile_run_id + AND pr.column_name = p.column_name + AND pr.table_name = p.table_name) + WHERE pr.profile_run_id = '{PROFILE_RUN_ID}' + AND COALESCE(p.disposition, 'Confirmed') = 'Confirmed' + GROUP BY 1, 2, 3 ), +score_calc + AS ( SELECT profile_run_id, + SUM(affected_data_points) as sum_affected_data_points, + SUM(row_ct) as sum_data_points + FROM score_detail + GROUP BY profile_run_id ) +UPDATE profiling_runs + SET dq_affected_data_points = sum_affected_data_points, + dq_total_data_points = sum_data_points, + dq_score_profiling = 100.0 - sum_affected_data_points / sum_data_points + FROM score_calc + WHERE profiling_runs.id = score_calc.profile_run_id; + + +-- Roll up latest scores to Table Group +WITH last_profile_date + AS (SELECT table_groups_id, MAX(profiling_starttime) as last_profile_run_date + FROM profiling_runs + WHERE status = 'Complete' + GROUP BY table_groups_id), +score_calc + AS (SELECT run.table_groups_id, run.id as profile_run_id, + run.dq_affected_data_points as sum_affected_data_points, + run.dq_total_data_points as sum_data_points + FROM profiling_runs run + INNER JOIN last_profile_date lp + ON (run.table_groups_id = lp.table_groups_id + AND run.profiling_starttime = lp.last_profile_run_date) + WHERE run.table_groups_id = '{TABLE_GROUPS_ID}' ) +UPDATE table_groups + SET dq_score_profiling = 100.0 - s.sum_affected_data_points::FLOAT / s.sum_data_points::FLOAT, + last_complete_profile_run_id = s.profile_run_id + FROM score_calc s + WHERE table_groups.id = s.table_groups_id; + +-- Roll up latest scores to data_column_chars +WITH score_detail + AS (SELECT dcc.column_id, tg.last_complete_profile_run_id, + MAX(pr.record_ct) as row_ct, + SUM(COALESCE(p.dq_prevalence * pr.record_ct, 0)) as affected_data_points + FROM table_groups tg + INNER JOIN profiling_runs r + ON (tg.last_complete_profile_run_id = r.id) + INNER JOIN profile_results pr + ON (r.id = pr.profile_run_id) + INNER JOIN data_column_chars dcc + ON (pr.table_groups_id = dcc.table_groups_id + AND pr.table_name = dcc.table_name + AND pr.column_name = dcc.column_name) + LEFT JOIN profile_anomaly_results p + ON (pr.profile_run_id = p.profile_run_id + AND pr.column_name = p.column_name + AND pr.table_name = p.table_name) + WHERE tg.id = '{TABLE_GROUPS_ID}' + AND COALESCE(p.disposition, 'Confirmed') = 'Confirmed' + GROUP BY dcc.column_id, tg.last_complete_profile_run_id ) +UPDATE data_column_chars + SET dq_score_profiling = 100.0 - s.affected_data_points / s.row_ct, + last_complete_profile_run_id = s.last_complete_profile_run_id + FROM score_detail s + WHERE data_column_chars.column_id = s.column_id; + +-- Roll up latest scores to data_table_chars +WITH score_detail + AS (SELECT dcc.column_id, dcc.table_id, tg.last_complete_profile_run_id, + MAX(pr.record_ct) as row_ct, + SUM(COALESCE(p.dq_prevalence * pr.record_ct, 0)) as affected_data_points + FROM table_groups tg + INNER JOIN profiling_runs r + ON (tg.last_complete_profile_run_id = r.id) + INNER JOIN profile_results pr + ON (r.id = pr.profile_run_id) + INNER JOIN data_column_chars dcc + ON (pr.table_groups_id = dcc.table_groups_id + AND pr.table_name = dcc.table_name + AND pr.column_name = dcc.column_name) + LEFT JOIN profile_anomaly_results p + ON (pr.profile_run_id = p.profile_run_id + AND pr.column_name = p.column_name + AND pr.table_name = p.table_name) + WHERE tg.id = '{TABLE_GROUPS_ID}' + AND COALESCE(p.disposition, 'Confirmed') = 'Confirmed' + GROUP BY dcc.column_id, dcc.table_id, tg.last_complete_profile_run_id ), +score_calc + AS ( SELECT table_id, last_complete_profile_run_id, + SUM(affected_data_points) as sum_affected_data_points, + SUM(row_ct) as sum_data_points + FROM score_detail + GROUP BY table_id, last_complete_profile_run_id ) +UPDATE data_table_chars + SET dq_score_profiling = 100.0 - s.sum_affected_data_points / s.sum_data_points, + last_complete_profile_run_id = s.last_complete_profile_run_id + FROM score_calc s + WHERE data_table_chars.table_id = s.table_id; diff --git a/testgen/template/profiling/profile_anomaly_types_get.sql b/testgen/template/profiling/profile_anomaly_types_get.sql index f1cd576..c1f3950 100644 --- a/testgen/template/profiling/profile_anomaly_types_get.sql +++ b/testgen/template/profiling/profile_anomaly_types_get.sql @@ -1,3 +1,3 @@ -SELECT id, anomaly_type, data_object, anomaly_criteria, detail_expression +SELECT id, anomaly_type, data_object, anomaly_criteria, detail_expression, dq_score_prevalence_formula, dq_score_risk_factor FROM profile_anomaly_types t ORDER BY id; diff --git a/testgen/template/validate_tests/ex_get_test_column_list_tg.sql b/testgen/template/validate_tests/ex_get_test_column_list_tg.sql index df7bdde..b0953b1 100644 --- a/testgen/template/validate_tests/ex_get_test_column_list_tg.sql +++ b/testgen/template/validate_tests/ex_get_test_column_list_tg.sql @@ -1,6 +1,19 @@ SELECT schema_name || '.' || table_name || '.' || column_name AS columns, ARRAY_AGG(cat_test_id) as test_id_array - FROM (SELECT cat_test_id, + FROM ( + -- FROM: column_name - column scope (single column) + SELECT cat_test_id, + schema_name AS schema_name, + table_name AS table_name, + column_name + FROM test_definitions d + INNER JOIN test_types t + ON d.test_type = t.test_type + WHERE test_suite_id = '{TEST_SUITE_ID}' + AND t.test_scope = 'column' + UNION + -- FROM: column_name - referential scope (could be multiple columns) + SELECT cat_test_id, schema_name AS schema_name, table_name AS table_name, TRIM(UNNEST(STRING_TO_ARRAY(column_name, ','))) as column_name @@ -8,8 +21,9 @@ INNER JOIN test_types t ON d.test_type = t.test_type WHERE test_suite_id = '{TEST_SUITE_ID}' - AND t.test_scope IN ('column', 'referential') + AND t.test_scope = 'referential' UNION + -- FROM: groupby_names (should be referential) SELECT cat_test_id, schema_name AS schema_name, table_name AS table_name, @@ -20,6 +34,7 @@ WHERE test_suite_id = '{TEST_SUITE_ID}' AND t.test_scope IN ('column', 'referential') UNION + -- FROM: window_date_column (referential) SELECT cat_test_id, schema_name AS schema_name, table_name AS table_name, @@ -28,8 +43,9 @@ INNER JOIN test_types t ON d.test_type = t.test_type WHERE test_suite_id = '{TEST_SUITE_ID}' - AND t.test_scope IN ('column', 'referential') + AND t.test_scope = 'referential' UNION + -- FROM: match_column_names (referential) SELECT cat_test_id, match_schema_name AS schema_name, match_table_name AS table_name, @@ -40,6 +56,7 @@ WHERE test_suite_id = '{TEST_SUITE_ID}' AND t.test_scope = 'referential' UNION + -- FROM: match_groupby_names (referential) SELECT cat_test_id, match_schema_name AS schema_name, match_table_name AS table_name, @@ -49,5 +66,5 @@ ON d.test_type = t.test_type WHERE test_suite_id = '{TEST_SUITE_ID}' AND t.test_scope = 'referential' ) cols - WHERE column_name SIMILAR TO '[A-Za-z0-9_]+' +-- WHERE column_name SIMILAR TO '[A-Za-z0-9_]+' GROUP BY columns; diff --git a/testgen/template/validate_tests/ex_write_test_val_errors.sql b/testgen/template/validate_tests/ex_write_test_val_errors.sql index b1d47d3..639cc3e 100644 --- a/testgen/template/validate_tests/ex_write_test_val_errors.sql +++ b/testgen/template/validate_tests/ex_write_test_val_errors.sql @@ -9,6 +9,7 @@ INSERT INTO test_results test_run_id, input_parameters, result_code, + result_status, result_message, result_measure ) SELECT '{TEST_SUITE_ID}'::UUID, @@ -20,7 +21,8 @@ INSERT INTO test_results '{RUN_DATE}' as test_time, '{TEST_RUN_ID}' as test_run_id, NULL as input_parameters, - 0 as result_code, + NULL as result_code, + 'Error' as result_status, test_definition_status AS result_message, NULL as result_measure FROM test_definitions From c54266bdb5b9503659a0714e6734897427f52180 Mon Sep 17 00:00:00 2001 From: "Chip.Bloche" Date: Mon, 11 Nov 2024 09:26:24 -0500 Subject: [PATCH 64/91] Tweaked Functional Datatypes --- .../profiling/functional_datatype.sql | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/testgen/template/profiling/functional_datatype.sql b/testgen/template/profiling/functional_datatype.sql index b7822a5..af64286 100644 --- a/testgen/template/profiling/functional_datatype.sql +++ b/testgen/template/profiling/functional_datatype.sql @@ -460,15 +460,6 @@ UPDATE profile_results AND p.distinct_value_ct BETWEEN 15 AND 40000 ) c WHERE profile_results.id = c.id; --- 7. Assign 'ID-Unique' functional data type to the columns that are identity columns - -UPDATE profile_results -SET functional_data_type = 'ID-Unique' -WHERE profile_run_id = '{PROFILE_RUN_ID}' - AND functional_data_type IN ('ID', 'ID-Secondary') - AND record_ct = distinct_value_ct - AND record_ct > 50; - -- Update alpha ID's to ID-Secondary and ID-Grouping UPDATE profile_results @@ -482,7 +473,16 @@ SET functional_data_type = CASE WHERE profile_run_id = '{PROFILE_RUN_ID}' AND functional_data_type = 'ID'; --- 8. Assign 'ID-FK' functional data type to the columns that are foreign keys of the identity columns identified in the previous step +-- Assign 'ID-Unique' functional data type to the columns that are identity columns + +UPDATE profile_results +SET functional_data_type = 'ID-Unique' +WHERE profile_run_id = '{PROFILE_RUN_ID}' + AND functional_data_type IN ('ID', 'ID-Secondary') + AND record_ct = distinct_value_ct + AND record_ct > 50; + +-- Assign 'ID-FK' functional data type to the columns that are foreign keys of the identity columns identified in the previous step UPDATE profile_results SET functional_data_type = 'ID-FK' @@ -496,9 +496,7 @@ WHERE profile_results.profile_run_id = '{PROFILE_RUN_ID}' and profile_results.table_name <> ui.table_name and profile_results.functional_data_type <> 'ID-Unique'; --- Assign - --- 9. Functional Data Type: 'Measurement Pct' +-- Functional Data Type: 'Measurement Pct' UPDATE profile_results SET functional_data_type = 'Measurement Pct' From b62367918c57d272236e81f0bc4f6625f2a7b7da Mon Sep 17 00:00:00 2001 From: "Chip.Bloche" Date: Mon, 11 Nov 2024 13:55:27 -0500 Subject: [PATCH 65/91] Tweaked Incremental Upgrade --- ...incremental_upgrade.sql => 0113_incremental_upgrade.sql} | 6 ++++++ 1 file changed, 6 insertions(+) rename testgen/template/dbupgrade/{0120_incremental_upgrade.sql => 0113_incremental_upgrade.sql} (94%) diff --git a/testgen/template/dbupgrade/0120_incremental_upgrade.sql b/testgen/template/dbupgrade/0113_incremental_upgrade.sql similarity index 94% rename from testgen/template/dbupgrade/0120_incremental_upgrade.sql rename to testgen/template/dbupgrade/0113_incremental_upgrade.sql index 0081e19..dafc6f1 100644 --- a/testgen/template/dbupgrade/0120_incremental_upgrade.sql +++ b/testgen/template/dbupgrade/0113_incremental_upgrade.sql @@ -8,6 +8,12 @@ ALTER TABLE test_suites ADD COLUMN last_complete_test_run_id UUID, ADD COLUMN dq_score_exclude BOOLEAN default FALSE; +ALTER TABLE profile_anomaly_types + ADD COLUMN upper_case_ct BIGINT, + ADD COLUMN lower_case_ct BIGINT, + ADD COLUMN non_alpha_ct BIGINT, + ADD COLUMN mixed_case_ct BIGINT GENERATED ALWAYS AS ( value_ct - upper_case_ct - lower_case_ct - non_alpha_ct ) STORED; + ALTER TABLE profile_anomaly_results ADD COLUMN dq_prevalence FLOAT; From bde97caab229558f26c5f5d1808085e884268518 Mon Sep 17 00:00:00 2001 From: "Chip.Bloche" Date: Mon, 11 Nov 2024 14:29:01 -0500 Subject: [PATCH 66/91] Incremental upgrade fix --- testgen/template/dbupgrade/0113_incremental_upgrade.sql | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/testgen/template/dbupgrade/0113_incremental_upgrade.sql b/testgen/template/dbupgrade/0113_incremental_upgrade.sql index dafc6f1..8907660 100644 --- a/testgen/template/dbupgrade/0113_incremental_upgrade.sql +++ b/testgen/template/dbupgrade/0113_incremental_upgrade.sql @@ -9,10 +9,8 @@ ALTER TABLE test_suites ADD COLUMN dq_score_exclude BOOLEAN default FALSE; ALTER TABLE profile_anomaly_types - ADD COLUMN upper_case_ct BIGINT, - ADD COLUMN lower_case_ct BIGINT, - ADD COLUMN non_alpha_ct BIGINT, - ADD COLUMN mixed_case_ct BIGINT GENERATED ALWAYS AS ( value_ct - upper_case_ct - lower_case_ct - non_alpha_ct ) STORED; + ADD COLUMN dq_score_prevalence_formula TEXT, + ADD COLUMN dq_score_risk_factor TEXT; ALTER TABLE profile_anomaly_results ADD COLUMN dq_prevalence FLOAT; From b9b17432901095cccfd1c4da2cf385ab79620cad Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Tue, 12 Nov 2024 11:40:31 -0500 Subject: [PATCH 67/91] fix(ui): Adding database icons to the python built package --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cc41773..b3fec2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,7 +102,7 @@ include-package-data = true [tool.setuptools.package-data] "*" = ["*.toml", "*.sql", "*.yaml"] "testgen.template" = ["*.sql", "*.yaml", "**/*.sql", "**/*.yaml"] -"testgen.ui.assets" = ["*.svg", "*.png", "*.js", "*.css"] +"testgen.ui.assets" = ["*.svg", "*.png", "*.js", "*.css", "flavors/*.svg"] "testgen.ui.components.frontend" = ["*.html", "**/*.js", "**/*.css", "**/*.woff2", "**/*.svg"] [tool.setuptools.packages.find] From bd3235381f7b93a33a66e903a1da66c03c326f92 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 8 Nov 2024 18:01:25 -0500 Subject: [PATCH 68/91] fix(ui): add table and column filters to hygiene issues and test results --- testgen/ui/bootstrap.py | 4 +- .../frontend/js/pages/data_hierarchy.js | 12 +- ...ofiling_anomalies.py => hygiene_issues.py} | 111 +++++++++++---- testgen/ui/views/test_results.py | 131 +++++++++++++----- 4 files changed, 189 insertions(+), 69 deletions(-) rename testgen/ui/views/{profiling_anomalies.py => hygiene_issues.py} (85%) diff --git a/testgen/ui/bootstrap.py b/testgen/ui/bootstrap.py index 414f7e5..3abacce 100644 --- a/testgen/ui/bootstrap.py +++ b/testgen/ui/bootstrap.py @@ -12,9 +12,9 @@ from testgen.ui.session import session from testgen.ui.views.connections import ConnectionsPage from testgen.ui.views.data_hierarchy import DataHierarchyPage +from testgen.ui.views.hygiene_issues import HygieneIssuesPage from testgen.ui.views.login import LoginPage from testgen.ui.views.overview import OverviewPage -from testgen.ui.views.profiling_anomalies import ProfilingAnomaliesPage from testgen.ui.views.profiling_results import ProfilingResultsPage from testgen.ui.views.profiling_runs import DataProfilingPage from testgen.ui.views.project_settings import ProjectSettingsPage @@ -31,7 +31,7 @@ DataHierarchyPage, DataProfilingPage, ProfilingResultsPage, - ProfilingAnomaliesPage, + HygieneIssuesPage, TestRunsPage, TestResultsPage, ConnectionsPage, diff --git a/testgen/ui/components/frontend/js/pages/data_hierarchy.js b/testgen/ui/components/frontend/js/pages/data_hierarchy.js index a1d09ce..2916a50 100644 --- a/testgen/ui/components/frontend/js/pages/data_hierarchy.js +++ b/testgen/ui/components/frontend/js/pages/data_hierarchy.js @@ -463,7 +463,11 @@ const HygieneIssuesCard = (/** @type Table | Column */ item) => { const hygieneIssues = item.latest_anomalies.filter(({ issue_likelihood }) => issue_likelihood !== 'Potential PII'); const linkProps = { href: 'profiling-runs:hygiene', - params: { run_id: item.latest_profile_id }, + params: { + run_id: item.latest_profile_id, + table_name: item.table_name, + column_name: item.column_name, + }, }; return IssuesCard('Hygiene Issues', hygieneIssues, attributes, linkProps, 'No hygiene issues detected'); @@ -496,7 +500,11 @@ const TestIssuesCard = (/** @type Table | Column */ item) => { ), Link({ href: 'test-runs:results', - params: { run_id: issue.test_run_id }, + params: { + run_id: issue.test_run_id, + table_name: item.table_name, + column_name: item.column_name, + }, open_new: true, label: formatTimestamp(issue.test_run_date), style: 'font-size: 12px; margin-top: 2px;', diff --git a/testgen/ui/views/profiling_anomalies.py b/testgen/ui/views/hygiene_issues.py similarity index 85% rename from testgen/ui/views/profiling_anomalies.py rename to testgen/ui/views/hygiene_issues.py index 4e70ce5..7f6aec5 100644 --- a/testgen/ui/views/profiling_anomalies.py +++ b/testgen/ui/views/hygiene_issues.py @@ -20,14 +20,22 @@ from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button -class ProfilingAnomaliesPage(Page): +class HygieneIssuesPage(Page): path = "profiling-runs:hygiene" can_activate: typing.ClassVar = [ lambda: session.authentication_status, lambda: "run_id" in session.current_page_args or "profiling-runs", ] - def render(self, run_id: str, issue_class: str | None = None, issue_type: str | None = None, **_kwargs) -> None: + def render( + self, + run_id: str, + issue_class: str | None = None, + issue_type: str | None = None, + table_name: str | None = None, + column_name: str | None = None, + **_kwargs, + ) -> None: run_parentage = profiling_queries.lookup_db_parentage_from_run(run_id) if not run_parentage: self.router.navigate_with_warning( @@ -49,9 +57,9 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | ], ) - others_summary_column, pii_summary_column, _ = st.columns([.3, .3, .4]) - (liklihood_filter_column, issue_type_filter_column, sort_column, actions_column, export_button_column) = ( - st.columns([.16, .34, .08, .32, .1], vertical_alignment="bottom") + others_summary_column, pii_summary_column, actions_column = st.columns([.25, .25, .5], vertical_alignment="bottom") + (liklihood_filter_column, issue_type_filter_column, table_filter_column, column_filter_column, sort_column, export_button_column) = ( + st.columns([.15, .25, .2, .2, .1, .1], vertical_alignment="bottom") ) testgen.flex_row_end(actions_column) testgen.flex_row_end(export_button_column) @@ -78,6 +86,26 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | disabled=issue_class == "Potential PII", ) + run_columns_df = get_profiling_run_columns(run_id) + with table_filter_column: + table_name = testgen.select( + options=list(run_columns_df["table_name"].unique()), + default_value=table_name, + bind_to_query="table_name", + label="Table Name", + ) + + with column_filter_column: + column_options = list(run_columns_df.loc[run_columns_df["table_name"] == table_name]["column_name"]) + column_name = testgen.select( + options=column_options, + value_column="column_name", + default_value=column_name, + bind_to_query="column_name", + label="Column Name", + disabled=not table_name, + ) + with sort_column: sortable_columns = ( ("Table", "r.table_name"), @@ -95,7 +123,7 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | # Get hygiene issue list - df_pa = get_profiling_anomalies(run_id, issue_class, issue_type_id, sorting_columns) + df_pa = get_profiling_anomalies(run_id, issue_class, issue_type_id, table_name, column_name, sorting_columns) # Retrieve disposition action (cache refreshed) df_action = get_anomaly_disposition(run_id) @@ -110,7 +138,7 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | testgen.summary_bar( items=others_summary, label="Hygiene Issues", - height=40, + height=20, width=400, ) @@ -120,7 +148,7 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | testgen.summary_bar( items=anomalies_pii_summary, label="Potential PII", - height=40, + height=20, width=400, ) # write_frequency_graph(df_pa) @@ -252,24 +280,48 @@ def render(self, run_id: str, issue_class: str | None = None, issue_type: str | @st.cache_data(show_spinner=False) -def get_db_table_group_choices(str_project_code): - str_schema = st.session_state["dbschema"] - return dq.run_table_groups_lookup_query(str_schema, str_project_code) +def get_db_table_group_choices(project_code: str) -> pd.DataFrame: + schema: str = st.session_state["dbschema"] + return dq.run_table_groups_lookup_query(schema, project_code) + + +@st.cache_data(show_spinner="False") +def get_profiling_run_columns(profiling_run_id: str) -> pd.DataFrame: + schema: str = st.session_state["dbschema"] + sql = f""" + SELECT table_name, column_name + FROM {schema}.profile_anomaly_results + WHERE profile_run_id = '{profiling_run_id}' + ORDER BY table_name, column_name; + """ + return db.retrieve_data(sql) @st.cache_data(show_spinner="Retrieving Data") -def get_profiling_anomalies(str_profile_run_id, str_likelihood, issue_type_id, sorting_columns): - str_schema = st.session_state["dbschema"] - if str_likelihood is None: - str_criteria = " AND t.issue_likelihood <> 'Potential PII'" - else: - str_criteria = f" AND t.issue_likelihood = '{str_likelihood}'" - if sorting_columns: - str_order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns)) - else: - str_order_by = "" +def get_profiling_anomalies( + profile_run_id: str, + likelihood: str | None, + issue_type_id: str | None, + table_name: str | None, + column_name: str | None, + sorting_columns: list[str] | None, +): + schema: str = st.session_state["dbschema"] + criteria = "" + order_by = "" + + if likelihood: + criteria += f" AND t.issue_likelihood = '{likelihood}'" if issue_type_id: - str_criteria += f" AND t.id = '{issue_type_id}'" + criteria += f" AND t.id = '{issue_type_id}'" + if table_name: + criteria += f" AND r.table_name = '{table_name}'" + if column_name: + criteria += f" AND r.column_name = '{column_name}'" + + if sorting_columns: + order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns)) + # Define the query -- first visible column must be first, because will hold the multi-select box str_sql = f""" SELECT r.table_name, r.column_name, r.schema_name, @@ -291,17 +343,16 @@ def get_profiling_anomalies(str_profile_run_id, str_likelihood, issue_type_id, s t.anomaly_description, r.detail, t.suggested_action, r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, tg.table_groups_name - FROM {str_schema}.profile_anomaly_results r - INNER JOIN {str_schema}.profile_anomaly_types t + FROM {schema}.profile_anomaly_results r + INNER JOIN {schema}.profile_anomaly_types t ON r.anomaly_id = t.id - INNER JOIN {str_schema}.profiling_runs p + INNER JOIN {schema}.profiling_runs p ON r.profile_run_id = p.id - INNER JOIN {str_schema}.table_groups tg + INNER JOIN {schema}.table_groups tg ON r.table_groups_id = tg.id - - WHERE r.profile_run_id = '{str_profile_run_id}' - {str_criteria} - {str_order_by} + WHERE r.profile_run_id = '{profile_run_id}' + {criteria} + {order_by} """ # Retrieve data as df df = db.retrieve_data(str_sql) diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 9cc88eb..a1c3ea8 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -6,6 +6,7 @@ import plotly.express as px import plotly.graph_objects as go import streamlit as st +from streamlit.delta_generator import DeltaGenerator import testgen.ui.services.database_service as db import testgen.ui.services.form_service as fm @@ -43,7 +44,15 @@ class TestResultsPage(Page): lambda: "run_id" in session.current_page_args or "test-runs", ] - def render(self, run_id: str, status: str | None = None, test_type: str | None = None, **_kwargs) -> None: + def render( + self, + run_id: str, + status: str | None = None, + test_type: str | None = None, + table_name: str | None = None, + column_name: str | None = None, + **_kwargs, + ) -> None: run_parentage = get_drill_test_run(run_id) if not run_parentage: self.router.navigate_with_warning( @@ -65,17 +74,18 @@ def render(self, run_id: str, status: str | None = None, test_type: str | None = ], ) - # Display summary bar - tests_summary = get_test_result_summary(run_id) - testgen.summary_bar(items=tests_summary, height=40, width=800) - - # Setup Toolbar - status_filter_column, test_type_filter_column, sort_column, actions_column, export_button_column = st.columns( - [.2, .2, .08, .4, .12], vertical_alignment="bottom" + summary_column, actions_column = st.columns([.5, .5], vertical_alignment="bottom") + status_filter_column, test_type_filter_column, table_filter_column, column_filter_column, sort_column, export_button_column = st.columns( + [.2, .2, .2, .2, .1, .1], vertical_alignment="bottom" ) + testgen.flex_row_end(actions_column) testgen.flex_row_end(export_button_column) + with summary_column: + tests_summary = get_test_result_summary(run_id) + testgen.summary_bar(items=tests_summary, height=20, width=800) + with status_filter_column: status_options = [ "Failed + Warning", @@ -102,6 +112,26 @@ def render(self, run_id: str, status: str | None = None, test_type: str | None = label="Test Type", ) + run_columns_df = get_test_run_columns(run_id) + with table_filter_column: + table_name = testgen.select( + options=list(run_columns_df["table_name"].unique()), + default_value=table_name, + bind_to_query="table_name", + label="Table Name", + ) + + with column_filter_column: + column_options = list(run_columns_df.loc[run_columns_df["table_name"] == table_name]["column_name"]) + column_name = testgen.select( + options=column_options, + value_column="column_name", + default_value=column_name, + bind_to_query="column_name", + label="Column Name", + disabled=not table_name, + ) + with sort_column: sortable_columns = ( ("Table Name", "r.table_name"), @@ -131,7 +161,7 @@ def render(self, run_id: str, status: str | None = None, test_type: str | None = # Display main grid and retrieve selection selected = show_result_detail( - run_id, status, test_type, sorting_columns, do_multi_select, export_button_column + run_id, export_button_column, status, test_type, table_name, column_name, sorting_columns, do_multi_select ) # Need to render toolbar buttons after grid, so selection status is maintained @@ -190,25 +220,47 @@ def get_test_types(): return df -@st.cache_data(show_spinner="Retrieving Results") -def get_test_results(str_run_id, str_sel_test_status, test_type_id, sorting_columns): - schema = st.session_state["dbschema"] - return get_test_results_uncached(schema, str_run_id, str_sel_test_status, test_type_id, sorting_columns) +@st.cache_data(show_spinner="False") +def get_test_run_columns(test_run_id: str) -> pd.DataFrame: + schema: str = st.session_state["dbschema"] + sql = f""" + SELECT table_name, column_names AS column_name + FROM {schema}.test_results + WHERE test_run_id = '{test_run_id}' + ORDER BY table_name, column_names; + """ + return db.retrieve_data(sql) -def get_test_results_uncached(str_schema, str_run_id, str_sel_test_status, test_type_id=None, sorting_columns=None): +@st.cache_data(show_spinner="Retrieving Results") +def get_test_results( + run_id: str, + test_status: str | None = None, + test_type_id: str | None = None, + table_name: str | None = None, + column_name: str | None = None, + sorting_columns: list[str] | None = None, +) -> pd.DataFrame: + schema: str = st.session_state["dbschema"] # First visible row first, so multi-select checkbox will render - str_order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns)) if sorting_columns else "" - test_type_clause = f"AND r.test_type = '{test_type_id}'" if test_type_id else "" - status_clause = f" AND r.result_status IN ({str_sel_test_status})" if str_sel_test_status else "" - str_sql = f""" + order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns)) if sorting_columns else "" + filters = "" + if test_status: + filters += f" AND r.result_status IN ({test_status})" + if test_type_id: + filters += f" AND r.test_type = '{test_type_id}'" + if table_name: + filters += f" AND r.table_name = '{table_name}'" + if column_name: + filters += f" AND r.column_names = '{column_name}'" + + sql = f""" WITH run_results AS (SELECT * - FROM {str_schema}.test_results r + FROM {schema}.test_results r WHERE - r.test_run_id = '{str_run_id}' - {status_clause} - {test_type_clause} + r.test_run_id = '{run_id}' + {filters} ) SELECT r.table_name, p.project_name, ts.test_suite, tg.table_groups_name, cn.connection_name, cn.project_host, cn.sql_flavor, @@ -249,31 +301,31 @@ def get_test_results_uncached(str_schema, str_run_id, str_sel_test_status, test_ tt.threshold_description, tt.usage_notes, r.test_time FROM run_results r - INNER JOIN {str_schema}.test_types tt + INNER JOIN {schema}.test_types tt ON (r.test_type = tt.test_type) - LEFT JOIN {str_schema}.test_definitions rd + LEFT JOIN {schema}.test_definitions rd ON (r.test_definition_id = rd.id) - LEFT JOIN {str_schema}.test_definitions d + LEFT JOIN {schema}.test_definitions d ON (r.test_suite_id = d.test_suite_id AND r.table_name = d.table_name AND r.column_names = COALESCE(d.column_name, 'N/A') AND r.test_type = d.test_type AND r.auto_gen = TRUE AND d.last_auto_gen_date IS NOT NULL) - INNER JOIN {str_schema}.test_suites ts + INNER JOIN {schema}.test_suites ts ON r.test_suite_id = ts.id - INNER JOIN {str_schema}.projects p + INNER JOIN {schema}.projects p ON (ts.project_code = p.project_code) - INNER JOIN {str_schema}.table_groups tg + INNER JOIN {schema}.table_groups tg ON (ts.table_groups_id = tg.id) - INNER JOIN {str_schema}.connections cn + INNER JOIN {schema}.connections cn ON (tg.connection_id = cn.connection_id) - LEFT JOIN {str_schema}.cat_test_conditions c + LEFT JOIN {schema}.cat_test_conditions c ON (cn.sql_flavor = c.sql_flavor AND r.test_type = c.test_type) - {str_order_by} ; + {order_by} ; """ - df = db.retrieve_data(str_sql) + df = db.retrieve_data(sql) # Clean Up df["test_date"] = pd.to_datetime(df["test_date"]) @@ -449,11 +501,20 @@ def show_test_def_detail(str_test_def_id): ) -def show_result_detail(str_run_id, str_sel_test_status, test_type_id, sorting_columns, do_multi_select, export_container): +def show_result_detail( + run_id: str, + export_container: DeltaGenerator, + test_status: str | None = None, + test_type_id: str | None = None, + table_name: str | None = None, + column_name: str | None = None, + sorting_columns: list[str] | None = None, + do_multi_select: bool = False, +): # Retrieve test results (always cached, action as null) - df = get_test_results(str_run_id, str_sel_test_status, test_type_id, sorting_columns) + df = get_test_results(run_id, test_status, test_type_id, table_name, column_name, sorting_columns) # Retrieve disposition action (cache refreshed) - df_action = get_test_disposition(str_run_id) + df_action = get_test_disposition(run_id) # Update action from disposition df action_map = df_action.set_index("id")["action"].to_dict() df["action"] = df["test_result_id"].map(action_map).fillna(df["action"]) From 6116f3b2f8a6786ff1f8b7dd9a4c406bcc176f38 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 8 Nov 2024 18:02:09 -0500 Subject: [PATCH 69/91] fix(ui): add dk favicon to image --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b3fec2f..ce2438b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,7 +102,7 @@ include-package-data = true [tool.setuptools.package-data] "*" = ["*.toml", "*.sql", "*.yaml"] "testgen.template" = ["*.sql", "*.yaml", "**/*.sql", "**/*.yaml"] -"testgen.ui.assets" = ["*.svg", "*.png", "*.js", "*.css", "flavors/*.svg"] +"testgen.ui.assets" = ["*.svg", "*.png", "*.js", "*.css", "*.ico", "flavors/*.svg"] "testgen.ui.components.frontend" = ["*.html", "**/*.js", "**/*.css", "**/*.woff2", "**/*.svg"] [tool.setuptools.packages.find] From b8a94b3785ceff0abd01eacac1ba5e1dcf9d210f Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 11 Nov 2024 13:18:58 -0500 Subject: [PATCH 70/91] feat(ui): add community and training links to header --- testgen/ui/assets/style.css | 43 ++++++++++++++++++++-- testgen/ui/components/widgets/__init__.py | 1 + testgen/ui/components/widgets/page.py | 44 ++++++++++++++--------- testgen/ui/views/connections/page.py | 4 +-- testgen/ui/views/hygiene_issues.py | 2 +- testgen/ui/views/login.py | 13 ++++--- testgen/ui/views/overview.py | 3 +- testgen/ui/views/profiling_results.py | 2 +- testgen/ui/views/profiling_runs.py | 2 +- testgen/ui/views/project_settings.py | 2 +- testgen/ui/views/table_groups/page.py | 3 +- testgen/ui/views/test_definitions.py | 2 +- testgen/ui/views/test_results.py | 2 +- testgen/ui/views/test_runs.py | 2 +- testgen/ui/views/test_suites.py | 2 +- 15 files changed, 93 insertions(+), 34 deletions(-) diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css index 3122291..c5beb62 100644 --- a/testgen/ui/assets/style.css +++ b/testgen/ui/assets/style.css @@ -16,6 +16,7 @@ body { --secondary-text-color: #0000008a; --disabled-text-color: #00000042; --caption-text-color: rgba(49, 51, 63, 0.6); /* Match Streamlit's caption color */ + --border-color: rgba(0, 0, 0, .12); --sidebar-background-color: white; --sidebar-item-hover-color: #f5f5f5; @@ -68,15 +69,18 @@ section[data-testid="stSidebar"] { } section.main > :nth-child(1 of div).block-container { - padding: 24px; + padding: 12px 24px 24px; } div[data-testid="stVerticalBlock"] { gap: 0.5rem; } -div[data-testid="stSidebarCollapsedControl"] { +.appview-container:has(section[data-testid="stSidebar"]) div[data-testid="stSidebarCollapsedControl"] { top: 0.5rem; + border-radius: 4px; + background-color: var(--border-color); + padding: 3px 0 0 8px; } /* */ @@ -250,6 +254,40 @@ Use as testgen.text("text", "extra_styles") */ } /* */ +/* Page header */ +.tg-header { + margin: 0; + padding: 0; + font-weight: 500; + transition: padding 0.3s; +} + +[data-testid="stSidebarCollapsedControl"] ~ section.main .tg-header { + padding-left: 80px; +} + +.tg-header--line { + margin: 0; + border: none; + border-radius: 2px; + height: 2px; + background-color: var(--disabled-text-color); +} + +div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.tg-header--links) [data-testid="stLinkButton"] a { + border: none; + background: none; + padding: 6px; + min-height: 24px; + color: var(--primary-text-color); +} + +div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.tg-header--links) [data-testid="stLinkButton"] a p { + font-size: 20px; + line-height: 1; +} +/* */ + /* Summary bar component */ .tg-summary-bar--label { margin-bottom: 4px; @@ -309,6 +347,7 @@ Use as testgen.text("text", "extra_styles") */ --secondary-text-color: rgba(255, 255, 255, .7); --disabled-text-color: rgba(255, 255, 255, .5); --caption-text-color: rgba(250, 250, 250, .6); /* Match Streamlit's caption color */ + --border-color: rgba(255, 255, 255, .25); --sidebar-background-color: #14181f; --sidebar-item-hover-color: #10141b; diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py index c2d490c..2dc7762 100644 --- a/testgen/ui/components/widgets/__init__.py +++ b/testgen/ui/components/widgets/__init__.py @@ -15,6 +15,7 @@ flex_row_start, no_flex_gap, page_header, + page_links, text, whitespace, ) diff --git a/testgen/ui/components/widgets/page.py b/testgen/ui/components/widgets/page.py index 2703982..55e63a9 100644 --- a/testgen/ui/components/widgets/page.py +++ b/testgen/ui/components/widgets/page.py @@ -4,33 +4,45 @@ from testgen.ui.components.widgets.breadcrumbs import Breadcrumb from testgen.ui.components.widgets.breadcrumbs import breadcrumbs as tg_breadcrumbs +BASE_HELP_URL = "https://docs.datakitchen.io/articles/#!dataops-testgen-help/" +DEFAULT_HELP_TOPIC = "dataops-testgen-help" +SLACK_URL = "https://data-observability-slack.datakitchen.io/join" +TRAINING_URL = "https://info.datakitchen.io/data-quality-training-and-certifications" def page_header( title: str, - help_link:str | None = None, + help_topic: str | None = None, breadcrumbs: list["Breadcrumb"] | None = None, ): - hcol1, hcol2 = st.columns([0.95, 0.05]) - hcol1.subheader(title, anchor=False) - if help_link: - with hcol2: - whitespace(0.8) - st.page_link(help_link, label=" ", icon=":material/help:") - - if breadcrumbs: - tg_breadcrumbs(breadcrumbs=breadcrumbs) - - st.write( - '
', - unsafe_allow_html=True, - ) + with st.container(): + no_flex_gap() + title_column, links_column = st.columns([0.95, 0.05], vertical_alignment="bottom") + + with title_column: + no_flex_gap() + st.html(f'

{title}

') + if breadcrumbs: + tg_breadcrumbs(breadcrumbs=breadcrumbs) + + with links_column: + page_links(help_topic) + + st.html('
') + if "last_page" in st.session_state: if title != st.session_state["last_page"]: st.cache_data.clear() st.session_state["last_page"] = title +def page_links(help_topic: str): + css_class("tg-header--links") + flex_row_end() + st.link_button(":material/question_mark:", f"{BASE_HELP_URL}{help_topic or DEFAULT_HELP_TOPIC}", help="Help Center") + st.link_button(":material/group:", SLACK_URL, help="Slack Community") + st.link_button(":material/school:", TRAINING_URL, help="Training Portal") + + def whitespace(size: float, container: DeltaGenerator | None = None): _apply_html(f'
', container) diff --git a/testgen/ui/views/connections/page.py b/testgen/ui/views/connections/page.py index aeb939c..f86aa72 100644 --- a/testgen/ui/views/connections/page.py +++ b/testgen/ui/views/connections/page.py @@ -39,7 +39,7 @@ def render(self, project_code: str, **_kwargs) -> None: testgen.page_header( "Connection", - "https://docs.datakitchen.io/article/dataops-testgen-help/connect-your-database", + "connect-your-database", ) testgen.whitespace(0.3) @@ -59,7 +59,7 @@ def render(self, project_code: str, **_kwargs) -> None: right_icon="chevron_right", underline=False, height=40, - style="margin-left: auto; border-radius: 4px; background: white;" + style="margin-left: auto; border-radius: 4px; background: var(--dk-card-background);" " border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", ) else: diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py index 7f6aec5..44774c8 100644 --- a/testgen/ui/views/hygiene_issues.py +++ b/testgen/ui/views/hygiene_issues.py @@ -50,7 +50,7 @@ def render( testgen.page_header( "Hygiene Issues", - "https://docs.datakitchen.io/article/dataops-testgen-help/profile-anomalies", + "view-hygiene-issues", breadcrumbs=[ { "label": "Profiling Runs", "path": "profiling-runs", "params": { "project_code": project_code } }, { "label": f"{table_group_name} | {run_date}" }, diff --git a/testgen/ui/views/login.py b/testgen/ui/views/login.py index 13e08fa..beb50a0 100644 --- a/testgen/ui/views/login.py +++ b/testgen/ui/views/login.py @@ -4,6 +4,7 @@ import streamlit as st import streamlit_authenticator as stauth +from testgen.ui.components import widgets as testgen from testgen.ui.navigation.page import Page from testgen.ui.services import javascript_service, user_session_service from testgen.ui.session import session @@ -28,12 +29,16 @@ def render(self, **_kwargs) -> None: auth_data["preauthorized"], ) - _column_1, column_2, _column_3 = st.columns([0.25, 0.5, 0.25]) - with column_2: - st.markdown(""" + _, login_column, links_column = st.columns([0.25, 0.5, 0.25]) + + with links_column: + testgen.page_links() + + with login_column: + st.html("""


Welcome to DataKitchen DataOps TestGen

- """, unsafe_allow_html=True) + """) name, authentication_status, username = authenticator.login("Login") if authentication_status is False: diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index 2a7463c..e4fa57b 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -29,7 +29,7 @@ class OverviewPage(Page): def render(self, project_code: str | None = None, **_kwargs): testgen.page_header( "Project Overview", - "https://docs.datakitchen.io/article/dataops-testgen-help/introduction-to-dataops-testgen", + "introduction-to-dataops-testgen", ) project_code = project_code or session.project @@ -107,6 +107,7 @@ def render_empty_state(project_code: str) -> bool: def render_project_summary(table_groups: pd.DataFrame) -> None: project_column, _ = st.columns([.5, .5]) with project_column: + testgen.whitespace(0.3) with testgen.card(): summary_column, _ = st.columns([.8, .2]) # TODO: Uncomment and replace with below section when adding the score diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py index 5089e75..3e81017 100644 --- a/testgen/ui/views/profiling_results.py +++ b/testgen/ui/views/profiling_results.py @@ -38,7 +38,7 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str | testgen.page_header( "Data Profiling Results", - "https://docs.datakitchen.io/article/dataops-testgen-help/investigate-profiling", + "view-data-profiling-results", breadcrumbs=[ { "label": "Profiling Runs", "path": "profiling-runs", "params": { "project_code": project_code } }, { "label": f"{table_group_name} | {run_date}" }, diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py index e5daaf8..f396b17 100644 --- a/testgen/ui/views/profiling_runs.py +++ b/testgen/ui/views/profiling_runs.py @@ -34,7 +34,7 @@ class DataProfilingPage(Page): def render(self, project_code: str | None = None, table_group_id: str | None = None, **_kwargs) -> None: testgen.page_header( "Profiling Runs", - "https://docs.datakitchen.io/article/dataops-testgen-help/investigate-profiling", + "investigate-profiling", ) project_code = project_code or session.project diff --git a/testgen/ui/views/project_settings.py b/testgen/ui/views/project_settings.py index f5ab382..7c7f0c3 100644 --- a/testgen/ui/views/project_settings.py +++ b/testgen/ui/views/project_settings.py @@ -24,7 +24,7 @@ def render(self, project_code: str | None = None, **_kwargs) -> None: testgen.page_header( "Settings", - "https://docs.datakitchen.io/article/dataops-testgen-help/configuration", + "tg-project-settings", ) testgen.whitespace(1) diff --git a/testgen/ui/views/table_groups/page.py b/testgen/ui/views/table_groups/page.py index 7b9e8a9..a4b0a4c 100644 --- a/testgen/ui/views/table_groups/page.py +++ b/testgen/ui/views/table_groups/page.py @@ -39,7 +39,7 @@ def render(self, connection_id: str, **_kwargs) -> None: testgen.page_header( "Table Groups", - "https://docs.datakitchen.io/article/dataops-testgen-help/create-a-table-group", + "create-a-table-group", breadcrumbs=[ # type: ignore { "label": "Connections", "path": "connections", "params": { "project_code": project_code } }, { "label": connection["connection_name"] }, @@ -59,6 +59,7 @@ def render(self, connection_id: str, **_kwargs) -> None: ) return + testgen.whitespace(0.3) _, actions_column = st.columns([.1, .9], vertical_alignment="bottom") testgen.flex_row_end(actions_column) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index 84694eb..b57ac61 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -43,7 +43,7 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name: testgen.page_header( "Test Definitions", - "https://docs.datakitchen.io/article/dataops-testgen-help/testgen-test-types", + "testgen-test-types", breadcrumbs=[ { "label": "Test Suites", "path": "test-suites", "params": { "project_code": project_code } }, { "label": test_suite["test_suite"] }, diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index a1c3ea8..477657a 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -67,7 +67,7 @@ def render( testgen.page_header( "Test Results", - "https://docs.datakitchen.io/article/dataops-testgen-help/test-results", + "view-testgen-test-results", breadcrumbs=[ { "label": "Test Runs", "path": "test-runs", "params": { "project_code": project_code } }, { "label": f"{test_suite_name} | {run_date}" }, diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py index afd9d99..9edd3a8 100644 --- a/testgen/ui/views/test_runs.py +++ b/testgen/ui/views/test_runs.py @@ -34,7 +34,7 @@ class TestRunsPage(Page): def render(self, project_code: str | None = None, table_group_id: str | None = None, test_suite_id: str | None = None, **_kwargs) -> None: testgen.page_header( "Test Runs", - "https://docs.datakitchen.io/article/dataops-testgen-help/test-results", + "test-results", ) project_code = project_code or session.project diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index ca00ade..3518c24 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -35,7 +35,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N testgen.page_header( "Test Suites", - "https://docs.datakitchen.io/article/dataops-testgen-help/create-a-test-suite", + "create-a-test-suite", ) project_code = project_code or session.project From 7b51a95b59232667f185cefc20e87f9122b637d8 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 11 Nov 2024 22:57:27 -0500 Subject: [PATCH 71/91] misc(ui): improve query performance with new fields --- .../dbupgrade/0114_incremental_upgrade.sql | 84 ++++++++++++++ .../frontend/js/pages/data_hierarchy.js | 8 +- testgen/ui/queries/profiling_queries.py | 25 ++-- testgen/ui/queries/test_suite_queries.py | 21 +--- testgen/ui/views/data_hierarchy.py | 108 ++++-------------- testgen/ui/views/overview.py | 27 +---- testgen/ui/views/test_suites.py | 2 +- 7 files changed, 128 insertions(+), 147 deletions(-) create mode 100644 testgen/template/dbupgrade/0114_incremental_upgrade.sql diff --git a/testgen/template/dbupgrade/0114_incremental_upgrade.sql b/testgen/template/dbupgrade/0114_incremental_upgrade.sql new file mode 100644 index 0000000..86bbcf1 --- /dev/null +++ b/testgen/template/dbupgrade/0114_incremental_upgrade.sql @@ -0,0 +1,84 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + + +WITH last_test_run_dates AS ( + SELECT test_suite_id, + MAX(test_starttime) AS test_starttime + FROM test_runs + WHERE status = 'Complete' + GROUP BY test_suite_id +) +UPDATE test_suites +SET last_complete_test_run_id = tr.id +FROM last_test_run_dates ltd + LEFT JOIN test_runs tr ON ( + ltd.test_suite_id = tr.test_suite_id + AND ltd.test_starttime = tr.test_starttime + ) +WHERE test_suites.id = ltd.test_suite_id; + + +WITH last_profile_dates AS ( + SELECT table_groups_id, + MAX(profiling_starttime) AS profiling_starttime + FROM profiling_runs + WHERE status = 'Complete' + GROUP BY table_groups_id +) +UPDATE table_groups +SET last_complete_profile_run_id = pr.id +FROM last_profile_dates lpd + LEFT JOIN profiling_runs pr ON ( + lpd.table_groups_id = pr.table_groups_id + AND lpd.profiling_starttime = pr.profiling_starttime + ) +WHERE table_groups.id = lpd.table_groups_id; + + +WITH last_profile_dates AS ( + SELECT profiling_runs.table_groups_id, + table_name, + MAX(profiling_starttime) AS profiling_starttime + FROM profile_results + LEFT JOIN profiling_runs ON ( + profile_results.profile_run_id = profiling_runs.id + ) + WHERE status = 'Complete' + GROUP BY profiling_runs.table_groups_id, + table_name +) +UPDATE data_table_chars +SET last_complete_profile_run_id = pr.id +FROM last_profile_dates lpd + LEFT JOIN profiling_runs pr ON ( + lpd.table_groups_id = pr.table_groups_id + AND lpd.profiling_starttime = pr.profiling_starttime + ) +WHERE data_table_chars.table_groups_id = lpd.table_groups_id + AND data_table_chars.table_name = lpd.table_name; + + +WITH last_profile_dates AS ( + SELECT profiling_runs.table_groups_id, + table_name, + column_name, + MAX(profiling_starttime) AS profiling_starttime + FROM profile_results + LEFT JOIN profiling_runs ON ( + profile_results.profile_run_id = profiling_runs.id + ) + WHERE status = 'Complete' + GROUP BY profiling_runs.table_groups_id, + table_name, + column_name +) +UPDATE data_column_chars +SET last_complete_profile_run_id = pr.id +FROM last_profile_dates lpd + LEFT JOIN profiling_runs pr ON ( + lpd.table_groups_id = pr.table_groups_id + AND lpd.profiling_starttime = pr.profiling_starttime + ) +WHERE data_column_chars.table_groups_id = lpd.table_groups_id + AND data_column_chars.table_name = lpd.table_name + AND data_column_chars.column_name = lpd.column_name; diff --git a/testgen/ui/components/frontend/js/pages/data_hierarchy.js b/testgen/ui/components/frontend/js/pages/data_hierarchy.js index 2916a50..a8bd342 100644 --- a/testgen/ui/components/frontend/js/pages/data_hierarchy.js +++ b/testgen/ui/components/frontend/js/pages/data_hierarchy.js @@ -62,7 +62,7 @@ * * Latest Profile & Test Runs * @property {string} latest_profile_id * @property {number} latest_profile_date - * @property {number} latest_test_run_date + * @property {number} has_test_runs * * Issues * @property {Anomaly[]} latest_anomalies * @property {TestIssue[]} latest_test_issues @@ -92,7 +92,7 @@ * * Latest Profile & Test Runs * @property {string} latest_profile_id * @property {number} latest_profile_date - * @property {number} latest_test_run_date + * @property {number} has_test_runs * * Issues * @property {Anomaly[]} latest_anomalies * @property {TestResult[]} latest_test_results @@ -131,7 +131,7 @@ const columnIcons = { const DataHierarchy = (/** @type Properties */ props) => { loadStylesheet('data_hierarchy', stylesheet); Streamlit.setFrameHeight(1); // Non-zero value is needed to render - window.frameElement.style.setProperty('height', 'calc(100vh - 200px)'); + window.frameElement.style.setProperty('height', 'calc(100vh - 175px)'); window.testgen.isPage = true; const treeNodes = van.derive(() => { @@ -519,7 +519,7 @@ const TestIssuesCard = (/** @type Table | Column */ item) => { } let noneContent = 'No test issues detected'; - if (!item.latest_test_run_date) { + if (!item.has_test_runs) { if (item.drop_date) { noneContent = span({ class: 'text-secondary' }, `No test results for ${item.type}`); } else { diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py index dc93496..75477d0 100644 --- a/testgen/ui/queries/profiling_queries.py +++ b/testgen/ui/queries/profiling_queries.py @@ -12,23 +12,14 @@ def run_table_groups_lookup_query(str_project_code): @st.cache_data(show_spinner=False) -def get_latest_profile_run(str_table_group): - str_schema = st.session_state["dbschema"] - str_sql = f""" - WITH last_profile_run - AS (SELECT table_groups_id, MAX(profiling_starttime) as last_profile_run_date - FROM {str_schema}.profiling_runs - GROUP BY table_groups_id) - SELECT id as profile_run_id - FROM {str_schema}.profiling_runs r - INNER JOIN last_profile_run l - ON (r.table_groups_id = l.table_groups_id - AND r.profiling_starttime = l.last_profile_run_date) - WHERE r.table_groups_id = '{str_table_group}'; -""" - str_profile_run_id = db.retrieve_single_result(str_sql) - - return str_profile_run_id +def get_latest_profile_run(table_group_id: str) -> str: + schema: str = st.session_state["dbschema"] + sql = f""" + SELECT last_complete_profile_run_id + FROM {schema}.table_groups + WHERE id = '{table_group_id}'; + """ + return db.retrieve_single_result(sql) @st.cache_data(show_spinner=False) diff --git a/testgen/ui/queries/test_suite_queries.py b/testgen/ui/queries/test_suite_queries.py index 765a14c..7300695 100644 --- a/testgen/ui/queries/test_suite_queries.py +++ b/testgen/ui/queries/test_suite_queries.py @@ -13,12 +13,6 @@ def get_by_project(schema, project_code, table_group_id=None): FROM {schema}.test_definitions GROUP BY test_suite_id ), - last_run_date AS ( - SELECT test_suite_id, - MAX(test_starttime) as test_starttime - FROM {schema}.test_runs - GROUP BY test_suite_id - ), last_run AS ( SELECT test_runs.test_suite_id, test_runs.id, @@ -58,10 +52,9 @@ def get_by_project(schema, project_code, table_group_id=None): ELSE 0 END ) as dismissed_ct - FROM last_run_date lrd + FROM {schema}.test_suites LEFT JOIN {schema}.test_runs ON ( - lrd.test_suite_id = test_runs.test_suite_id - AND lrd.test_starttime = test_runs.test_starttime + test_suites.last_complete_test_run_id = test_runs.id ) LEFT JOIN {schema}.test_results ON ( test_runs.id = test_results.test_run_id @@ -73,12 +66,6 @@ def get_by_project(schema, project_code, table_group_id=None): COUNT(*) as count FROM {schema}.test_definitions GROUP BY test_suite_id - ), - last_profile_date AS ( - SELECT table_groups_id, - MAX(profiling_starttime) as profiling_starttime - FROM {schema}.profiling_runs - GROUP BY table_groups_id ) SELECT suites.id::VARCHAR(50), @@ -98,7 +85,7 @@ def get_by_project(schema, project_code, table_group_id=None): suites.component_name, test_defs.count as test_ct, last_gen_date.auto_gen_date as latest_auto_gen_date, - last_profile_date.profiling_starttime as latest_profiling_date, + last_complete_profile_run_id, last_run.id as latest_run_id, last_run.test_starttime as latest_run_start, last_run.test_ct as last_run_test_ct, @@ -118,8 +105,6 @@ def get_by_project(schema, project_code, table_group_id=None): ON (connections.connection_id = suites.connection_id) LEFT JOIN {schema}.table_groups as groups ON (groups.id = suites.table_groups_id) - LEFT JOIN last_profile_date - ON (groups.id = last_profile_date.table_groups_id) WHERE suites.project_code = '{project_code}' """ diff --git a/testgen/ui/views/data_hierarchy.py b/testgen/ui/views/data_hierarchy.py index 445f6c4..4ffa33f 100644 --- a/testgen/ui/views/data_hierarchy.py +++ b/testgen/ui/views/data_hierarchy.py @@ -182,26 +182,6 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None: if item_type == "table": sql = f""" - WITH latest_profile_dates AS ( - SELECT table_name, - profiling_runs.table_groups_id, - MAX(profiling_starttime) AS profiling_starttime - FROM {schema}.profile_results - LEFT JOIN {schema}.profiling_runs ON ( - profile_results.profile_run_id = profiling_runs.id - ) - GROUP BY profiling_runs.table_groups_id, table_name - ), - latest_test_run_dates AS ( - SELECT table_name, - test_results.table_groups_id, - MAX(test_starttime) AS test_starttime - FROM {schema}.test_results - LEFT JOIN {schema}.test_runs ON ( - test_results.test_run_id = test_runs.id - ) - GROUP BY test_results.table_groups_id, table_name - ) SELECT table_chars.table_name, table_chars.table_groups_id::VARCHAR(50) AS table_group_id, -- Characteristics @@ -221,49 +201,23 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None: transform_level, aggregation_level, -- Latest Profile & Test Runs - profiling_runs.id::VARCHAR(50) AS latest_profile_id, - lpd.profiling_starttime AS latest_profile_date, - lrd.test_starttime AS latest_test_run_date + last_complete_profile_run_id::VARCHAR(50) AS latest_profile_id, + profiling_starttime AS latest_profile_date, + EXISTS( + SELECT 1 + FROM {schema}.test_results + WHERE table_groups_id = '{table_group_id}' + AND table_name = table_chars.table_name + ) AS has_test_runs FROM {schema}.data_table_chars table_chars - LEFT JOIN latest_profile_dates lpd ON ( - table_chars.table_groups_id = lpd.table_groups_id - AND table_chars.table_name = lpd.table_name - ) - LEFT JOIN latest_test_run_dates lrd ON ( - table_chars.table_groups_id = lrd.table_groups_id - AND table_chars.table_name = lrd.table_name - ) LEFT JOIN {schema}.profiling_runs ON ( - lpd.table_groups_id = profiling_runs.table_groups_id - AND lpd.profiling_starttime = profiling_runs.profiling_starttime + table_chars.last_complete_profile_run_id = profiling_runs.id ) WHERE table_id = '{item_id}' AND table_chars.table_groups_id = '{table_group_id}'; """ else: sql = f""" - WITH latest_profile_dates AS ( - SELECT column_name, - table_name, - profile_results.table_groups_id, - MAX(profiling_starttime) AS profiling_starttime - FROM {schema}.profile_results - LEFT JOIN {schema}.profiling_runs ON ( - profile_results.profile_run_id = profiling_runs.id - ) - GROUP BY profile_results.table_groups_id, table_name, column_name - ), - latest_test_run_dates AS ( - SELECT column_names, - table_name, - test_results.table_groups_id, - MAX(test_starttime) AS test_starttime - FROM {schema}.test_results - LEFT JOIN {schema}.test_runs ON ( - test_results.test_run_id = test_runs.id - ) - GROUP BY test_results.table_groups_id, table_name, column_names - ) SELECT column_chars.column_name, column_chars.table_name, column_chars.table_groups_id::VARCHAR(50) AS table_group_id, @@ -294,9 +248,15 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None: table_chars.transform_level AS table_transform_level, table_chars.aggregation_level AS table_aggregation_level, -- Latest Profile & Test Runs - profiling_runs.id::VARCHAR(50) AS latest_profile_id, - lpd.profiling_starttime AS latest_profile_date, - lrd.test_starttime AS latest_test_run_date, + column_chars.last_complete_profile_run_id::VARCHAR(50) AS latest_profile_id, + run_date AS latest_profile_date, + EXISTS( + SELECT 1 + FROM {schema}.test_results + WHERE table_groups_id = '{table_group_id}' + AND table_name = column_chars.table_name + AND column_names = column_chars.column_name + ) AS has_test_runs, -- Value Counts profile_results.record_ct, value_ct, @@ -347,22 +307,8 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None: LEFT JOIN {schema}.data_table_chars table_chars ON ( column_chars.table_id = table_chars.table_id ) - LEFT JOIN latest_profile_dates lpd ON ( - column_chars.table_groups_id = lpd.table_groups_id - AND column_chars.table_name = lpd.table_name - AND column_chars.column_name = lpd.column_name - ) - LEFT JOIN latest_test_run_dates lrd ON ( - column_chars.table_groups_id = lrd.table_groups_id - AND column_chars.table_name = lrd.table_name - AND column_chars.column_name = lrd.column_names - ) - LEFT JOIN {schema}.profiling_runs ON ( - lpd.table_groups_id = profiling_runs.table_groups_id - AND lpd.profiling_starttime = profiling_runs.profiling_starttime - ) LEFT JOIN {schema}.profile_results ON ( - profiling_runs.id = profile_results.profile_run_id + column_chars.last_complete_profile_run_id = profile_results.profile_run_id AND column_chars.column_name = profile_results.column_name ) WHERE column_id = '{item_id}' @@ -442,23 +388,16 @@ def get_latest_test_issues(table_group_id: str, table_name: str, column_name: st column_condition = f"AND column_names = '{column_name}'" sql = f""" - WITH latest_run_dates AS ( - SELECT test_suite_id, - MAX(test_starttime) AS test_starttime - FROM {schema}.test_runs - GROUP BY test_suite_id - ) SELECT column_names AS column_name, test_name_short AS test_name, result_status, result_message, test_suite, test_results.test_run_id::VARCHAR(50), - lrd.test_starttime AS test_run_date - FROM latest_run_dates lrd + test_starttime AS test_run_date + FROM {schema}.test_suites LEFT JOIN {schema}.test_runs ON ( - lrd.test_suite_id = test_runs.test_suite_id - AND lrd.test_starttime = test_runs.test_starttime + test_suites.last_complete_test_run_id = test_runs.id ) LEFT JOIN {schema}.test_results ON ( test_runs.id = test_results.test_run_id @@ -466,9 +405,6 @@ def get_latest_test_issues(table_group_id: str, table_name: str, column_name: st LEFT JOIN {schema}.test_types ON ( test_results.test_type = test_types.test_type ) - LEFT JOIN {schema}.test_suites ON ( - lrd.test_suite_id = test_suites.id - ) WHERE test_suites.table_groups_id = '{table_group_id}' AND table_name = '{table_name}' {column_condition} diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index e4fa57b..a6ab0e0 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -294,13 +294,7 @@ def render_test_suite_item(test_suite: pd.Series, column_spec: list[int]) -> Non def get_table_groups_summary(project_code: str) -> pd.DataFrame: schema = st.session_state["dbschema"] sql = f""" - WITH latest_profile_dates AS ( - SELECT table_groups_id, - MAX(profiling_starttime) as profiling_starttime - FROM {schema}.profiling_runs - GROUP BY table_groups_id - ), - latest_profile AS ( + WITH latest_profile AS ( SELECT latest_run.table_groups_id, latest_run.id, latest_run.profiling_starttime, @@ -335,10 +329,9 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame: ELSE 0 END ) as dismissed_ct - FROM latest_profile_dates lpd + FROM {schema}.table_groups groups LEFT JOIN {schema}.profiling_runs latest_run ON ( - lpd.table_groups_id = latest_run.table_groups_id - AND lpd.profiling_starttime = latest_run.profiling_starttime + groups.last_complete_profile_run_id = latest_run.id ) LEFT JOIN {schema}.profile_anomaly_results latest_anomalies ON ( latest_run.id = latest_anomalies.profile_run_id @@ -348,17 +341,11 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame: ) GROUP BY latest_run.id ), - latest_run_dates AS ( - SELECT test_suite_id, - MAX(test_starttime) as test_starttime - FROM {schema}.test_runs - GROUP BY test_suite_id - ), latest_tests AS ( SELECT suites.table_groups_id, MAX(latest_run.test_starttime) AS test_starttime, COUNT(DISTINCT latest_run.test_suite_id) as test_suite_ct, - COUNT(*) as test_ct, + COUNT(latest_results.id) as test_ct, SUM( CASE WHEN COALESCE(latest_results.disposition, 'Confirmed') = 'Confirmed' @@ -393,15 +380,13 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame: ELSE 0 END ) as dismissed_ct - FROM latest_run_dates lrd + FROM {schema}.test_suites suites LEFT JOIN {schema}.test_runs latest_run ON ( - lrd.test_suite_id = latest_run.test_suite_id - AND lrd.test_starttime = latest_run.test_starttime + suites.last_complete_test_run_id = latest_run.id ) LEFT JOIN {schema}.test_results latest_results ON ( latest_run.id = latest_results.test_run_id ) - LEFT JOIN {schema}.test_suites as suites ON (suites.id = lrd.test_suite_id) GROUP BY suites.table_groups_id ) SELECT groups.id::VARCHAR(50), diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py index 3518c24..5af0a0a 100644 --- a/testgen/ui/views/test_suites.py +++ b/testgen/ui/views/test_suites.py @@ -154,7 +154,7 @@ def render(self, project_code: str | None = None, table_group_id: str | None = N disabled=run_disabled, key=f"test_suite:keys:runtests:{test_suite['id']}", ) - generate_disabled = pd.isnull(test_suite["latest_profiling_date"]) + generate_disabled = pd.isnull(test_suite["last_complete_profile_run_id"]) testgen.button( type_="stroked", label="Generate Tests", From 8a5593b287b2e6dabc71bf8ed1df6ef79f5aea54 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 12 Nov 2024 13:35:00 -0500 Subject: [PATCH 72/91] ci(tests): fix functional tests --- testgen/ui/views/test_results.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 477657a..478704c 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -242,6 +242,18 @@ def get_test_results( sorting_columns: list[str] | None = None, ) -> pd.DataFrame: schema: str = st.session_state["dbschema"] + return get_test_results_uncached(schema, run_id, test_status, test_type_id, table_name, column_name, sorting_columns) + + +def get_test_results_uncached( + schema: str, + run_id: str, + test_status: str | None = None, + test_type_id: str | None = None, + table_name: str | None = None, + column_name: str | None = None, + sorting_columns: list[str] | None = None, +) -> pd.DataFrame: # First visible row first, so multi-select checkbox will render order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns)) if sorting_columns else "" filters = "" From 5be063ccaff944d85f9e774550957738a80c7e49 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 12 Nov 2024 15:07:31 -0500 Subject: [PATCH 73/91] fix(ui): error on login screen --- testgen/ui/components/widgets/page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/components/widgets/page.py b/testgen/ui/components/widgets/page.py index 55e63a9..e387b28 100644 --- a/testgen/ui/components/widgets/page.py +++ b/testgen/ui/components/widgets/page.py @@ -35,7 +35,7 @@ def page_header( st.session_state["last_page"] = title -def page_links(help_topic: str): +def page_links(help_topic: str | None = None): css_class("tg-header--links") flex_row_end() st.link_button(":material/question_mark:", f"{BASE_HELP_URL}{help_topic or DEFAULT_HELP_TOPIC}", help="Help Center") From 8390a162fe553f0868da463a5870e7a8a9a3483d Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 12 Nov 2024 19:55:48 -0500 Subject: [PATCH 74/91] feat(pdf): add link backs to pdf issue reports --- testgen/ui/pdf/hygiene_issue_report.py | 29 ++++++++++++++++-------- testgen/ui/pdf/style.py | 21 +++++++++++++++++ testgen/ui/pdf/test_result_report.py | 31 +++++++++++++++++++------- testgen/ui/views/hygiene_issues.py | 4 ++-- testgen/ui/views/test_results.py | 2 +- testgen/utils/__init__.py | 8 +++++++ 6 files changed, 75 insertions(+), 20 deletions(-) diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py index b228231..7a0462a 100644 --- a/testgen/ui/pdf/hygiene_issue_report.py +++ b/testgen/ui/pdf/hygiene_issue_report.py @@ -1,4 +1,3 @@ -import pandas from reportlab.lib import colors from reportlab.lib.colors import HexColor from reportlab.lib.enums import TA_CENTER @@ -13,13 +12,16 @@ PARA_STYLE_FOOTNOTE, PARA_STYLE_H1, PARA_STYLE_INFO, + PARA_STYLE_LINK, PARA_STYLE_MONO, PARA_STYLE_TEXT, PARA_STYLE_TITLE, TABLE_STYLE_DEFAULT, + get_formatted_datetime, ) from testgen.ui.pdf.templates import DatakitchenTemplate from testgen.ui.services.hygiene_issues_service import get_source_data +from testgen.utils import get_base_url SECTION_MIN_AVAILABLE_HEIGHT = 120 @@ -38,9 +40,6 @@ def build_summary_table(document, hi_data): ("GRID", (0, 0), (-1, -1), 2, colors.white), ("BACKGROUND", (0, 0), (-1, -1), COLOR_GRAY_BG), - # Empty cells - ("BACKGROUND", (2, 5), (-1, -1), colors.white), - # Header cells *[ (cmd[0], *coords, *cmd[1:]) @@ -64,7 +63,10 @@ def build_summary_table(document, hi_data): ("SPAN", (3, 3), (4, 3)), ("SPAN", (3, 4), (4, 4)), ("SPAN", (3, 5), (4, 5)), + ("SPAN", (2, 5), (4, 5)), + # Link cell + ("BACKGROUND", (2, 5), (4, 5), colors.white), # Status cell *[ @@ -80,7 +82,7 @@ def build_summary_table(document, hi_data): ) - profiling_timestamp = pandas.to_datetime(hi_data["profiling_starttime"]).strftime("%Y-%m-%d %H:%M:%S") + profiling_timestamp = get_formatted_datetime(hi_data["profiling_starttime"]) summary_table_data = [ ( "Hygiene Issue", @@ -106,7 +108,16 @@ def build_summary_table(document, hi_data): ("Database/Schema", hi_data["schema_name"], "Profiling Date", profiling_timestamp), ("Table", hi_data["table_name"], "Table Group", hi_data["table_groups_name"]), ("Column", hi_data["column_name"], "Disposition", hi_data["disposition"] or "No Decision"), - ("Column Type", hi_data["column_type"]), + ( + "Column Type", + hi_data["column_type"], + Paragraph( + f""" + View on TestGen > + """, + style=PARA_STYLE_LINK, + ), + ), ] summary_table_col_widths = [n * document.width for n in (.15, .35, .15, .15, .20)] @@ -132,7 +143,7 @@ def build_sample_data_content(document, sample_data_tuple): yield from df_table_builder.split_in_columns(table_flowables) -def build_sql_query_conntent(sample_data_tuple): +def build_sql_query_content(sample_data_tuple): lookup_query = sample_data_tuple[2] if lookup_query: return Paragraph(lookup_query, PARA_STYLE_MONO) @@ -141,7 +152,7 @@ def build_sql_query_conntent(sample_data_tuple): def get_report_content(document, hi_data): - yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE) + yield Paragraph("TestGen Hygiene Issue Report", PARA_STYLE_TITLE) yield build_summary_table(document, hi_data) yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT) @@ -156,7 +167,7 @@ def get_report_content(document, hi_data): yield KeepTogether([ Paragraph("SQL Query", PARA_STYLE_H1), - build_sql_query_conntent(sample_data_tuple) + build_sql_query_content(sample_data_tuple) ]) diff --git a/testgen/ui/pdf/style.py b/testgen/ui/pdf/style.py index 197674e..03ed49a 100644 --- a/testgen/ui/pdf/style.py +++ b/testgen/ui/pdf/style.py @@ -1,13 +1,18 @@ +import pandas +import streamlit as st from reportlab.lib import enums from reportlab.lib.colors import HexColor from reportlab.lib.styles import ParagraphStyle from reportlab.platypus import TableStyle +from testgen.common import date_service + COLOR_GRAY_BG = HexColor(0xF2F2F2) COLOR_GREEN_BG = HexColor(0xDCE4DA) COLOR_YELLOW_BG = HexColor(0xA0C84E40, hasAlpha=True) COLOR_GREEN_TEXT = HexColor(0x139549) COLOR_FADED_TEXT = HexColor(0x404040) +COLOR_LINK_TEXT = HexColor(0x1976D2) PARA_STYLE_DEFAULT = ParagraphStyle( "default", @@ -86,3 +91,19 @@ fontName="Helvetica", leading=10, ) + +PARA_STYLE_LINK = ParagraphStyle( + "link", + PARA_STYLE_DEFAULT, + fontSize=9, + alignment=enums.TA_RIGHT, + textColor=COLOR_LINK_TEXT, +) + + +def get_formatted_datetime(value) -> str: + return date_service.get_timezoned_timestamp( + st.session_state, + pandas.to_datetime(value), + "%b %-d, %-I:%M %p %Z", + ) diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py index f0fa019..c60cfc3 100644 --- a/testgen/ui/pdf/test_result_report.py +++ b/testgen/ui/pdf/test_result_report.py @@ -18,10 +18,12 @@ PARA_STYLE_FOOTNOTE, PARA_STYLE_H1, PARA_STYLE_INFO, + PARA_STYLE_LINK, PARA_STYLE_MONO, PARA_STYLE_TEXT, PARA_STYLE_TITLE, TABLE_STYLE_DEFAULT, + get_formatted_datetime, ) from testgen.ui.pdf.templates import DatakitchenTemplate from testgen.ui.services.database_service import get_schema @@ -30,6 +32,7 @@ do_source_data_lookup_custom, get_test_result_history, ) +from testgen.utils import get_base_url SECTION_MIN_AVAILABLE_HEIGHT = 120 @@ -52,8 +55,8 @@ def build_summary_table(document, tr_data): *[ (cmd[0], *coords, *cmd[1:]) for coords in ( - ((3, 3), (3, -1)), - ((0, 0), (0, -1)) + ((3, 3), (3, -2)), + ((0, 0), (0, -2)) ) for cmd in ( ("FONT", "Helvetica-Bold"), @@ -75,6 +78,10 @@ def build_summary_table(document, tr_data): ("SPAN", (4, 5), (5, 5)), ("SPAN", (1, 6), (2, 6)), ("SPAN", (4, 6), (5, 6)), + ("SPAN", (0, 7), (5, 7)), + + # Link cell + ("BACKGROUND", (0, 7), (5, 7), colors.white), # Measure cell ("FONT", (1, 1), (1, 1), "Helvetica-Bold"), @@ -94,7 +101,7 @@ def build_summary_table(document, tr_data): parent=TABLE_STYLE_DEFAULT, ) - test_timestamp = pandas.to_datetime(tr_data["test_time"]).strftime("%Y-%m-%d %H:%M:%S") + test_timestamp = get_formatted_datetime(tr_data["test_time"]) summary_table_data = [ ( "Test", @@ -111,10 +118,18 @@ def build_summary_table(document, tr_data): ("Measured Value", tr_data["result_measure"], tr_data["measure_uom_description"]), ("Threshold Value", tr_data["threshold_value"], tr_data["threshold_description"]), - ("Date", test_timestamp, None, "Table Group", tr_data["table_groups_name"]), + ("Test Run Date", test_timestamp, None, "Table Group", tr_data["table_groups_name"]), ("Database/Schema", tr_data["schema_name"], None, "Test Suite", tr_data["test_suite"]), ("Table", tr_data["table_name"], None, "Data Quality Dimension", tr_data["dq_dimension"]), ("Column", tr_data["column_names"], None, "Disposition", tr_data["disposition"] or "No Decision"), + ( + Paragraph( + f""" + View on TestGen > + """, + style=PARA_STYLE_LINK, + ), + ), ] summary_table_col_widths = [n * document.width for n in (.2, .1, .2, .2, .15, .15)] @@ -143,7 +158,7 @@ def build_history_table(document, tr_data): history_df = pandas.DataFrame() history_df = history_df.assign( - test_date=history_data["test_date"].copy(), + test_date=history_data["test_date"].map(get_formatted_datetime).copy(), threshold_value=history_data["threshold_value"].astype(float).copy(), result_measure=history_data["result_measure"].astype(float).copy(), result_status=history_data["result_status"].map( @@ -176,7 +191,7 @@ def build_sample_data_content(document, sample_data_tuple): yield from df_table_builder.split_in_columns(table_flowables) -def build_sql_query_conntent(sample_data_tuple): +def build_sql_query_content(sample_data_tuple): lookup_query = sample_data_tuple[2] if lookup_query: return Paragraph(lookup_query, PARA_STYLE_MONO) @@ -185,7 +200,7 @@ def build_sql_query_conntent(sample_data_tuple): def get_report_content(document, tr_data): - yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE) + yield Paragraph("TestGen Test Issue Report", PARA_STYLE_TITLE) yield build_summary_table(document, tr_data) yield KeepTogether([ @@ -208,7 +223,7 @@ def get_report_content(document, tr_data): yield KeepTogether([ Paragraph("SQL Query", PARA_STYLE_H1), - build_sql_query_conntent(sample_data_tuple) + build_sql_query_content(sample_data_tuple) ]) diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py index 44774c8..49af2ad 100644 --- a/testgen/ui/views/hygiene_issues.py +++ b/testgen/ui/views/hygiene_issues.py @@ -341,7 +341,7 @@ def get_profiling_anomalies( WHEN t.issue_likelihood = 'Definite' THEN 4 END AS likelihood_order, t.anomaly_description, r.detail, t.suggested_action, - r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, + r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, r.profile_run_id::VARCHAR, tg.table_groups_name FROM {schema}.profile_anomaly_results r INNER JOIN {schema}.profile_anomaly_types t @@ -493,7 +493,7 @@ def do_disposition_update(selected, str_new_status): def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE: hi_id = tr_data["anomaly_id"] profiling_time = pd.Timestamp(tr_data["profiling_starttime"]).strftime("%Y%m%d_%H%M%S") - file_name = f"testgen_issue_report_{hi_id}_{profiling_time}.pdf" + file_name = f"testgen_hygiene_issue_report_{hi_id}_{profiling_time}.pdf" with BytesIO() as buffer: create_report(buffer, tr_data) diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 478704c..f8d60ed 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -817,7 +817,7 @@ def view_edit_test(button_container, test_definition_id): def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE: td_id = tr_data["test_definition_id_runtime"][:6] tr_time = pd.Timestamp(tr_data["test_time"]).strftime("%Y%m%d_%H%M%S") - file_name = f"testgen_issue_report_{td_id}_{tr_time}.pdf" + file_name = f"testgen_test_issue_report_{td_id}_{tr_time}.pdf" with BytesIO() as buffer: create_report(buffer, tr_data) diff --git a/testgen/utils/__init__.py b/testgen/utils/__init__.py index db58739..40f42b6 100644 --- a/testgen/utils/__init__.py +++ b/testgen/utils/__init__.py @@ -1,7 +1,9 @@ import math +import urllib.parse from uuid import UUID import pandas as pd +import streamlit as st def to_int(value: float | int) -> int: @@ -23,3 +25,9 @@ def is_uuid4(value: str) -> bool: return False return str(uuid) == value + + +# https://github.com/streamlit/streamlit/issues/798#issuecomment-1647759949 +def get_base_url() -> str: + session = st.runtime.get_instance()._session_mgr.list_active_sessions()[0] + return urllib.parse.urlunparse([session.client.request.protocol, session.client.request.host, "", "", "", ""]) From 9c2848e81c6fcbb738e96617c5d8a2a5f5f61e74 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 12 Nov 2024 19:56:33 -0500 Subject: [PATCH 75/91] fix(runs): increase sleep before closing run dialogs --- testgen/ui/views/dialogs/run_profiling_dialog.py | 2 +- testgen/ui/views/dialogs/run_tests_dialog.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/testgen/ui/views/dialogs/run_profiling_dialog.py b/testgen/ui/views/dialogs/run_profiling_dialog.py index b39aa15..b1077f8 100644 --- a/testgen/ui/views/dialogs/run_profiling_dialog.py +++ b/testgen/ui/views/dialogs/run_profiling_dialog.py @@ -73,7 +73,7 @@ def run_profiling_dialog(project_code: str, table_group: pd.Series | None = None style="margin-left: auto; border-radius: 4px; border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", ) else: - time.sleep(1) + time.sleep(2) st.cache_data.clear() st.rerun() diff --git a/testgen/ui/views/dialogs/run_tests_dialog.py b/testgen/ui/views/dialogs/run_tests_dialog.py index 3e46e7b..a5b9eb6 100644 --- a/testgen/ui/views/dialogs/run_tests_dialog.py +++ b/testgen/ui/views/dialogs/run_tests_dialog.py @@ -77,7 +77,7 @@ def run_tests_dialog(project_code: str, test_suite: pd.Series | None = None, def style="margin-left: auto; border-radius: 4px; border: var(--button-stroked-border); padding: 8px 8px 8px 16px; color: var(--primary-color)", ) else: - time.sleep(1) + time.sleep(2) st.cache_data.clear() st.rerun() From 7d78958f5b2350ab476c7290e1f187778114255e Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 12 Nov 2024 23:44:45 -0500 Subject: [PATCH 76/91] fix(ui): deep-linking on test results page --- testgen/ui/services/form_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index 58c1bcf..19c7faa 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -887,7 +887,7 @@ def render_grid_select( pre_selected_rows=pre_selected_rows, ) - if bind_to_query_prop and bind_to_query_prop.isalnum(): + if bind_to_query_prop: gb.configure_grid_options(getRowId=JsCode(f"""function(row) {{ return row.data.{bind_to_query_prop}; }}""")) all_columns = list(df.columns) From 6593ddfe9d7d0059462bbc68e065ed9f86bd69e6 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 13 Nov 2024 10:44:22 -0500 Subject: [PATCH 77/91] fix(ui): update bind query js syntax --- testgen/ui/services/form_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py index 19c7faa..06ed0f9 100644 --- a/testgen/ui/services/form_service.py +++ b/testgen/ui/services/form_service.py @@ -888,7 +888,7 @@ def render_grid_select( ) if bind_to_query_prop: - gb.configure_grid_options(getRowId=JsCode(f"""function(row) {{ return row.data.{bind_to_query_prop}; }}""")) + gb.configure_grid_options(getRowId=JsCode(f"""function(row) {{ return row.data['{bind_to_query_prop}'] }}""")) all_columns = list(df.columns) From 726bd9a139ba0aa96f1c1b94764a6b903bcde084 Mon Sep 17 00:00:00 2001 From: Astor Date: Thu, 14 Nov 2024 10:15:33 -0300 Subject: [PATCH 78/91] fix(profiling): TG-774 - disable profiling dialog for multi-table results. --- testgen/ui/views/dialogs/profiling_results_dialog.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/testgen/ui/views/dialogs/profiling_results_dialog.py b/testgen/ui/views/dialogs/profiling_results_dialog.py index 5cce9c6..9a07b32 100644 --- a/testgen/ui/views/dialogs/profiling_results_dialog.py +++ b/testgen/ui/views/dialogs/profiling_results_dialog.py @@ -13,10 +13,11 @@ def view_profiling_button(str_table_name, str_column_name, str_profile_run_id=None, str_table_groups_id=None): - if st.button( - BUTTON_TEXT, help=BUTTON_HELP, use_container_width=True - ): - profiling_results_dialog(str_table_name, str_column_name, str_profile_run_id, str_table_groups_id) + if str_table_name != "(multi-table)": + if st.button( + BUTTON_TEXT, help=BUTTON_HELP, use_container_width=True + ): + profiling_results_dialog(str_table_name, str_column_name, str_profile_run_id, str_table_groups_id) @st.dialog(title="Profiling Results") From fca069ba8371458f1c4c3f7ef9342ff131b790d3 Mon Sep 17 00:00:00 2001 From: Ricardo Boni Date: Thu, 14 Nov 2024 15:08:43 -0500 Subject: [PATCH 79/91] fix(functions): Strip out comments when loading templated functions --- testgen/common/read_file.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/testgen/common/read_file.py b/testgen/common/read_file.py index bfc2e9b..41b5bbb 100644 --- a/testgen/common/read_file.py +++ b/testgen/common/read_file.py @@ -77,6 +77,8 @@ def read_template_yaml_function(function_name: str, db_flavour: str) -> str: sub_directory=f"flavors/{db_flavour}/profiling", ) template = yaml_functions[function_name] + template = re.sub(r"/\*.*?\*/", "", template, flags=re.DOTALL) + template = re.sub(r"\s\s*", " ", template) return template From 973bb4a6263860b6531c112513f2481543a1b791 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 13 Nov 2024 17:08:17 -0500 Subject: [PATCH 80/91] fix(ui): bug fixes on test results and connection wizard --- .../ui/components/frontend/js/components/flavor_selector.js | 2 +- testgen/ui/components/widgets/select.py | 6 +++++- testgen/ui/views/hygiene_issues.py | 2 +- testgen/ui/views/table_groups/forms.py | 2 +- testgen/ui/views/test_definitions.py | 2 +- testgen/ui/views/test_results.py | 5 +++-- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/testgen/ui/components/frontend/js/components/flavor_selector.js b/testgen/ui/components/frontend/js/components/flavor_selector.js index e5ff790..8cd1c17 100644 --- a/testgen/ui/components/frontend/js/components/flavor_selector.js +++ b/testgen/ui/components/frontend/js/components/flavor_selector.js @@ -75,7 +75,7 @@ const DatabaseFlavor = ( ) => { return div( { - class: `tg-flavor ${props.selected.val ? 'selected' : ''}`, + class: () => `tg-flavor ${props.selected.val ? 'selected' : ''}`, onclick: onClick, }, span({class: 'tg-flavor-focus-state-indicator'}, ''), diff --git a/testgen/ui/components/widgets/select.py b/testgen/ui/components/widgets/select.py index e259410..31fa748 100644 --- a/testgen/ui/components/widgets/select.py +++ b/testgen/ui/components/widgets/select.py @@ -4,6 +4,7 @@ from testgen.ui.navigation.router import Router +EMPTY_VALUE = "---" def select( label: str, @@ -13,6 +14,7 @@ def select( default_value = None, required: bool = False, bind_to_query: str | None = None, + bind_empty_value: bool = False, **kwargs, ): kwargs = {**kwargs} @@ -28,6 +30,8 @@ def select( kwargs["options"] = options if default_value in options: kwargs["index"] = options.index(default_value) + (0 if required else 1) + elif default_value == EMPTY_VALUE and not required: + kwargs["index"] = 0 if bind_to_query: kwargs["key"] = kwargs.get("key", f"testgen_select_{bind_to_query}") @@ -36,7 +40,7 @@ def select( def update_query_params(): query_value = st.session_state[kwargs["key"]] - if not required and query_value == "---": + if not required and query_value == EMPTY_VALUE and not bind_empty_value: query_value = None elif isinstance(options, pd.DataFrame): query_value = options.loc[options[display_column] == query_value, value_column].iloc[0] diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py index 49af2ad..175341d 100644 --- a/testgen/ui/views/hygiene_issues.py +++ b/testgen/ui/views/hygiene_issues.py @@ -96,7 +96,7 @@ def render( ) with column_filter_column: - column_options = list(run_columns_df.loc[run_columns_df["table_name"] == table_name]["column_name"]) + column_options = list(run_columns_df.loc[run_columns_df["table_name"] == table_name]["column_name"].unique()) column_name = testgen.select( options=column_options, value_column="column_name", diff --git a/testgen/ui/views/table_groups/forms.py b/testgen/ui/views/table_groups/forms.py index 7f60e32..00ae5a2 100644 --- a/testgen/ui/views/table_groups/forms.py +++ b/testgen/ui/views/table_groups/forms.py @@ -13,7 +13,7 @@ class TableGroupForm(BaseForm, ManualRender): default="", min_length=1, max_length=40, - st_kwargs_label="Name", + st_kwargs_label="Table Group Name", st_kwargs_max_chars=40, st_kwargs_help="A unique name to describe the table group", ) diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py index b57ac61..5fe317e 100644 --- a/testgen/ui/views/test_definitions.py +++ b/testgen/ui/views/test_definitions.py @@ -532,7 +532,7 @@ def show_test_form( elif dynamic_attribute in ["threshold_value"]: test_definition[dynamic_attribute] = current_column.number_input( label=actual_dynamic_attributes_labels, - value=value, + value=float(value), help=actual_dynamic_attributes_help, ) else: diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index f8d60ed..6ea5f3a 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -98,6 +98,7 @@ def render( default_value=status or "Failed + Warning", required=False, bind_to_query="status", + bind_empty_value=True, label="Result Status", ) @@ -122,7 +123,7 @@ def render( ) with column_filter_column: - column_options = list(run_columns_df.loc[run_columns_df["table_name"] == table_name]["column_name"]) + column_options = list(run_columns_df.loc[run_columns_df["table_name"] == table_name]["column_name"].unique()) column_name = testgen.select( options=column_options, value_column="column_name", @@ -810,7 +811,7 @@ def source_data_dialog(selected_row): def view_edit_test(button_container, test_definition_id): with button_container: - if st.button("🖊️ Edit Test", help="Edit the Test Definition", use_container_width=True): + if st.button(":material/edit: Edit Test", help="Edit the Test Definition", use_container_width=True): show_test_form_by_id(test_definition_id) From a4729efba4ea05387a852b2a7edeef7a7cf1e9ab Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 14 Nov 2024 22:23:15 -0500 Subject: [PATCH 81/91] fix(pdf): make file names unique when zipping multiple --- testgen/ui/views/hygiene_issues.py | 4 ++-- testgen/ui/views/test_results.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py index 175341d..e227646 100644 --- a/testgen/ui/views/hygiene_issues.py +++ b/testgen/ui/views/hygiene_issues.py @@ -239,7 +239,7 @@ def render( ) else: zip_func = zip_multi_file_data( - "testgen_issue_reports.zip", + "testgen_hygiene_issue_reports.zip", get_report_file_data, [(arg,) for arg in selected], ) @@ -491,7 +491,7 @@ def do_disposition_update(selected, str_new_status): return str_result def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE: - hi_id = tr_data["anomaly_id"] + hi_id = tr_data["id"][:8] profiling_time = pd.Timestamp(tr_data["profiling_starttime"]).strftime("%Y%m%d_%H%M%S") file_name = f"testgen_hygiene_issue_report_{hi_id}_{profiling_time}.pdf" diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py index 6ea5f3a..b6af05e 100644 --- a/testgen/ui/views/test_results.py +++ b/testgen/ui/views/test_results.py @@ -661,7 +661,7 @@ def show_result_detail( ) else: zip_func = zip_multi_file_data( - "testgen_issue_reports.zip", + "testgen_test_issue_reports.zip", get_report_file_data, [(arg,) for arg in selected_rows], ) @@ -816,9 +816,9 @@ def view_edit_test(button_container, test_definition_id): def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE: - td_id = tr_data["test_definition_id_runtime"][:6] + tr_id = tr_data["test_result_id"][:8] tr_time = pd.Timestamp(tr_data["test_time"]).strftime("%Y%m%d_%H%M%S") - file_name = f"testgen_test_issue_report_{td_id}_{tr_time}.pdf" + file_name = f"testgen_test_issue_report_{tr_id}_{tr_time}.pdf" with BytesIO() as buffer: create_report(buffer, tr_data) From c8f1cd2537724bb61c56f0a706b778d378d006e4 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 14 Nov 2024 23:09:12 -0500 Subject: [PATCH 82/91] refactor: cleanup references to qc schema --- docs/configuration.md | 6 -- .../queries/execute_cat_tests_query.py | 5 +- testgen/commands/queries/profiling_query.py | 12 --- testgen/commands/run_execute_cat_tests.py | 1 - testgen/commands/run_launch_db_config.py | 1 - testgen/commands/run_profiling_bridge.py | 1 - testgen/common/get_pipeline_parms.py | 1 - testgen/settings.py | 8 -- .../030_initialize_new_schema_structure.sql | 1 - .../040_populate_new_schema_project.sql | 3 +- .../dbupgrade/0115_incremental_upgrade.sql | 3 + .../ex_cat_build_agg_table_tests.sql | 6 +- .../ex_cat_get_distinct_tables.sql | 3 +- .../create_functions_trino.sql | 92 ------------------- .../create_qc_schema_trino.sql | 1 - .../template/get_entities/get_connection.sql | 1 - testgen/template/parms/parms_profiling.sql | 1 - .../template/parms/parms_test_execution.sql | 1 - testgen/ui/queries/connection_queries.py | 8 +- testgen/ui/services/connection_service.py | 34 +------ testgen/ui/services/hygiene_issues_service.py | 3 +- testgen/ui/services/table_group_service.py | 6 +- testgen/ui/services/test_results_service.py | 5 +- testgen/ui/views/connections/page.py | 15 +-- testgen/ui/views/table_groups/page.py | 13 +-- 25 files changed, 21 insertions(+), 210 deletions(-) create mode 100644 testgen/template/dbupgrade/0115_incremental_upgrade.sql delete mode 100644 testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql delete mode 100644 testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql diff --git a/docs/configuration.md b/docs/configuration.md index 2b844b1..d5a1035 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -159,12 +159,6 @@ Determine how many tests are grouped together in a single query. Increase for be default: `5000` -#### `PROJECT_QC_SCHEMA` - -Name of the schema to be created in the project database. - -default: `qc` - #### `PROJECT_DATABASE_NAME` Name of the database the auto generated project will run test against. diff --git a/testgen/commands/queries/execute_cat_tests_query.py b/testgen/commands/queries/execute_cat_tests_query.py index 89e8ff8..ecbd6aa 100644 --- a/testgen/commands/queries/execute_cat_tests_query.py +++ b/testgen/commands/queries/execute_cat_tests_query.py @@ -19,7 +19,6 @@ class CCATExecutionSQL: # Test Set Parameters target_schema = "" target_table = "" - replace_qc_schema = "" dctTestParms: typing.ClassVar = {} def __init__(self, strProjectCode, strTestSuiteId, strTestSuite, strSQLFlavor, max_query_chars, minutes_offset=0): @@ -41,9 +40,7 @@ def _ReplaceParms(self, strInputString): strInputString = strInputString.replace("{TEST_SUITE}", self.test_suite) strInputString = strInputString.replace("{TEST_SUITE_ID}", self.test_suite_id) strInputString = strInputString.replace("{TABLE_GROUPS_ID}", self.table_groups_id) - # NOTE: REPLACE_QC_SCHEMA is parm replaced to run build query: sets the actual value to replace. - # DATA_QC_SCHEMA is parm in cat_test_conditions that build query replaces via SQL. - strInputString = strInputString.replace("{REPLACE_QC_SCHEMA}", self.replace_qc_schema) + strInputString = strInputString.replace("{SQL_FLAVOR}", self.flavor) strInputString = strInputString.replace("{CONCAT_OPERATOR}", self.concat_operator) diff --git a/testgen/commands/queries/profiling_query.py b/testgen/commands/queries/profiling_query.py index ed35c0c..4a71df3 100644 --- a/testgen/commands/queries/profiling_query.py +++ b/testgen/commands/queries/profiling_query.py @@ -14,7 +14,6 @@ class CProfilingSQL: table_groups_id = "" flavor = "" run_date = "" - data_qc_schema = "" data_schema = "" data_table = "" @@ -75,7 +74,6 @@ def ReplaceParms(self, strInputString): strInputString = strInputString.replace("{TABLE_GROUPS_ID}", self.table_groups_id) strInputString = strInputString.replace("{RUN_DATE}", self.run_date) strInputString = strInputString.replace("{DATA_SCHEMA}", self.data_schema) - strInputString = strInputString.replace("{DATA_QC_SCHEMA}", self.data_qc_schema) strInputString = strInputString.replace("{DATA_TABLE}", self.data_table) strInputString = strInputString.replace("{COL_NAME}", self.col_name) strInputString = strInputString.replace("{COL_NAME_SANITIZED}", self.col_name.replace("'", "''")) @@ -245,16 +243,6 @@ def _get_mask_query(self, mask, is_include): sub_query += ")" return sub_query - def GetFunctionCreatorQuery(self): - # Runs on Project DB - strQ = self.ReplaceParms( - read_template_sql_file( - f"project_function_creator_{self.flavor}.sql", - sub_directory=f"flavors/{self.flavor}/setup_profiling_tools", - ) - ) - return strQ - def GetProfilingQuery(self): # Runs on Project DB if not self.dctSnippetTemplate: diff --git a/testgen/commands/run_execute_cat_tests.py b/testgen/commands/run_execute_cat_tests.py index 23e20a5..d126189 100644 --- a/testgen/commands/run_execute_cat_tests.py +++ b/testgen/commands/run_execute_cat_tests.py @@ -123,7 +123,6 @@ def run_cat_test_queries( for dctTable in lstTables: clsCATExecute.target_schema = dctTable["schema_name"] clsCATExecute.target_table = dctTable["table_name"] - clsCATExecute.replace_qc_schema = dctTable["replace_qc_schema"] AggregateTableTests(clsCATExecute) LOG.info("CurrentStep: Retrieving CAT Tests to Run") diff --git a/testgen/commands/run_launch_db_config.py b/testgen/commands/run_launch_db_config.py index e6ab186..fed8176 100644 --- a/testgen/commands/run_launch_db_config.py +++ b/testgen/commands/run_launch_db_config.py @@ -47,7 +47,6 @@ def _get_params_mapping() -> dict: "PROFILING_SAMPLE_MIN_COUNT": "", "PROFILING_DELAY_DAYS": "", "CONNECTION_NAME": settings.PROJECT_CONNECTION_NAME, - "PROJECT_QC_SCHEMA": settings.PROJECT_QC_SCHEMA, "TABLE_GROUPS_NAME": settings.DEFAULT_TABLE_GROUPS_NAME, "TEST_SUITE": settings.DEFAULT_TEST_SUITE_KEY, "TEST_SUITE_DESCRIPTION": settings.DEFAULT_TEST_SUITE_DESCRIPTION, diff --git a/testgen/commands/run_profiling_bridge.py b/testgen/commands/run_profiling_bridge.py index 4dd42b3..68654b6 100644 --- a/testgen/commands/run_profiling_bridge.py +++ b/testgen/commands/run_profiling_bridge.py @@ -288,7 +288,6 @@ def run_profiling_queries(strTableGroupsID, spinner=None): clsProfiling.parm_do_patterns = "Y" clsProfiling.parm_max_pattern_length = 25 clsProfiling.profile_run_id = strProfileRunID - clsProfiling.data_qc_schema = dctParms["project_qc_schema"] clsProfiling.data_schema = dctParms["table_group_schema"] clsProfiling.parm_table_set = dctParms["profiling_table_set"] clsProfiling.parm_table_include_mask = dctParms["profiling_include_mask"] diff --git a/testgen/common/get_pipeline_parms.py b/testgen/common/get_pipeline_parms.py index c3f81d6..af673ca 100644 --- a/testgen/common/get_pipeline_parms.py +++ b/testgen/common/get_pipeline_parms.py @@ -20,7 +20,6 @@ def RetrieveProfilingParms(strTableGroupsID): or lstParms[0]["profile_use_sampling"] == "" or lstParms[0]["profile_sample_percent"] == "" or lstParms[0]["profile_sample_min_count"] == "" - or lstParms[0]["project_qc_schema"] == "" or lstParms[0]["table_group_schema"] == "" ): raise ValueError("Project Connection parameters not correctly set") diff --git a/testgen/settings.py b/testgen/settings.py index 595e402..2a708af 100644 --- a/testgen/settings.py +++ b/testgen/settings.py @@ -214,14 +214,6 @@ defaults to: `5000` """ -PROJECT_QC_SCHEMA: str = os.getenv("PROJECT_QC_SCHEMA", "qc") -""" -Name of the schema to be created in the project database. - -from env variable: `PROJECT_QC_SCHEMA` -defaults to: `qc` -""" - PROJECT_DATABASE_NAME: str = os.getenv("PROJECT_DATABASE_NAME", "demo_db") """ Name of the database the auto generated project will run test diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql index 4e6a7be..7c4b08f 100644 --- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql +++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql @@ -55,7 +55,6 @@ CREATE TABLE connections ( project_user VARCHAR(50), project_db VARCHAR(100), connection_name VARCHAR(40), - project_qc_schema VARCHAR(200), project_pw_encrypted BYTEA, max_threads INTEGER DEFAULT 4, max_query_chars INTEGER, diff --git a/testgen/template/dbsetup/040_populate_new_schema_project.sql b/testgen/template/dbsetup/040_populate_new_schema_project.sql index d71e944..8ac7fdc 100644 --- a/testgen/template/dbsetup/040_populate_new_schema_project.sql +++ b/testgen/template/dbsetup/040_populate_new_schema_project.sql @@ -10,7 +10,7 @@ SELECT '{PROJECT_CODE}' as project_code, INSERT INTO connections (project_code, sql_flavor, - project_host, project_port, project_user, project_db, project_qc_schema, + project_host, project_port, project_user, project_db, connection_name, project_pw_encrypted, max_threads, max_query_chars) SELECT '{PROJECT_CODE}' as project_code, '{SQL_FLAVOR}' as sql_flavor, @@ -18,7 +18,6 @@ SELECT '{PROJECT_CODE}' as project_code, '{PROJECT_PORT}' as project_port, '{PROJECT_USER}' as project_user, '{PROJECT_DB}' as project_db, - '{PROJECT_QC_SCHEMA}' as project_qc_schema, '{CONNECTION_NAME}' as connection_name, '{PROJECT_PW_ENCRYPTED}' as project_pw_encrypted, '{MAX_THREADS}'::INTEGER as max_threads, diff --git a/testgen/template/dbupgrade/0115_incremental_upgrade.sql b/testgen/template/dbupgrade/0115_incremental_upgrade.sql new file mode 100644 index 0000000..82a3058 --- /dev/null +++ b/testgen/template/dbupgrade/0115_incremental_upgrade.sql @@ -0,0 +1,3 @@ +SET SEARCH_PATH TO {SCHEMA_NAME}; + +ALTER TABLE connections DROP COLUMN project_qc_schema; diff --git a/testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql b/testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql index bee3588..c5e5fb6 100644 --- a/testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +++ b/testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql @@ -29,10 +29,9 @@ WITH test_detail -- Standard Measure start 'CAST(' || -- Nested parm replacements - part of query, not Python parms - REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE( + REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE( c.measure, '{COLUMN_NAME}', COALESCE(fn_PrepColumnName(t.column_name), '')), - '{DATA_QC_SCHEMA}', '{REPLACE_QC_SCHEMA}'), '{BASELINE_CT}', COALESCE(t.baseline_ct, '')), '{BASELINE_UNIQUE_CT}', COALESCE(t.baseline_unique_ct, '')), '{BASELINE_VALUE}', COALESCE(t.baseline_value, '') ), @@ -48,10 +47,9 @@ WITH test_detail -- Standard CASE for condition starts 'CASE WHEN ' || -- Nested parm replacements - standard - REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE( + REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE( c.measure || c.test_operator || c.test_condition, '{COLUMN_NAME}', COALESCE(fn_PrepColumnName(t.column_name), '')), - '{DATA_QC_SCHEMA}', '{REPLACE_QC_SCHEMA}'), '{BASELINE_CT}', COALESCE(t.baseline_ct, '')), '{BASELINE_UNIQUE_CT}', COALESCE(t.baseline_unique_ct, '')), '{BASELINE_VALUE}', COALESCE(t.baseline_value, '') ), diff --git a/testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql b/testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql index ff2878b..e8e85d0 100644 --- a/testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +++ b/testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql @@ -1,6 +1,5 @@ SELECT DISTINCT schema_name, - table_name, - project_qc_schema as replace_qc_schema + table_name FROM test_definitions td INNER JOIN test_types tt ON td.test_type = tt.test_type diff --git a/testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql b/testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql deleted file mode 100644 index f4b1adc..0000000 --- a/testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +++ /dev/null @@ -1,92 +0,0 @@ - --- The following functions are inline functions --- INLINE FUNCTION TO CHECK FOR A NUMBER - -WITH FUNCTION num_check(a varchar) - RETURNS integer - RETURN - CASE WHEN regexp_like(a, '^[0-9]+(\.[0-9]+)?$') = TRUE THEN 1 - WHEN regexp_like(a, '\$[0-9]+(\.[0-9]+)?$') = TRUE THEN 1 - WHEN regexp_like(a, '^[0-9]+(\.[0-9]+)?\$') = TRUE THEN 1 - ELSE 0 -END -SELECT num_check('1234567'), num_check('$45.945843'), num_check('0.123$'); - - --- INLINE FUNCTION TO CHECK FOR A DATE - -WITH FUNCTION date_check(a varchar) - RETURNS integer - RETURN - CASE WHEN REGEXP_LIKE(a, '^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])\s(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\s[0-9]{6})?$') - THEN CASE WHEN CAST(SUBSTRING(a, 1, 4) AS INT) BETWEEN 1800 AND 2200 - AND( ( SUBSTRING(a, 6, 2) IN ('01', '03', '05', '07', '08', '10', '12') - AND CAST(SUBSTRING(a, 9, 2) AS INT) BETWEEN 1 AND 31) - OR (SUBSTRING(a, 6, 2) IN ('04', '06', '09') AND CAST(SUBSTRING(a, 9, 2) AS INT) BETWEEN 1 AND 30) - OR (SUBSTRING(a, 6, 2) = '02' AND CAST(SUBSTRING(a, 9, 2) AS INT) BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 -END -WHEN REGEXP_LIKE(a, '^(\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])([0-5][0-9])([0-5][0-9])([0-9]{6})$') - OR REGEXP_LIKE(a, '^(\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])$') - THEN CASE WHEN CAST(SUBSTRING(a, 1, 4) AS INT) BETWEEN 1800 AND 2200 - AND ( (SUBSTRING(a, 5, 2) IN ('01', '03', '05', '07', '08', '10', '12') - AND CAST(SUBSTRING(a, 7, 2) AS INT) BETWEEN 1 AND 31) - OR (SUBSTRING(a, 5, 2) IN ('04', '06', '09') AND CAST(SUBSTRING(a, 7, 2) AS INT) BETWEEN 1 AND 30) - OR (SUBSTRING(a, 5, 2) = '02' AND CAST(SUBSTRING(a, 7, 2) AS INT) BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 -END -WHEN LENGTH(a) > 11 THEN 0 - WHEN REGEXP_LIKE(REGEXP_REPLACE(UPPER(a), '(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)', '12'), '[12][09][0-9][0-9]-[0-1]?[0-9]-[0-3]?[0-9]') - THEN CASE WHEN CAST(SPLIT_PART(a, '-', 1) AS INT) BETWEEN 1800 AND 2200 - AND ( (UPPER(SPLIT_PART(a, '-', 2)) IN ('01', '03', '05', '07', '08', - '1', '3', '5', '7', '8', '10', '12', - 'JAN', 'MAR', 'MAY', 'JUL', 'AUG', - 'OCT', 'DEC') - AND CAST(SPLIT_PART(a, '-', 3) AS INT) BETWEEN 1 AND 31) - OR (UPPER(SPLIT_PART(a, '-', 2)) IN ('04', '06', '09', '4', '6', '9', '11', 'APR', 'JUN', 'SEP', 'NOV') - AND CAST(SPLIT_PART(a, '-', 3) AS INT) BETWEEN 1 AND 30) - OR (UPPER(SPLIT_PART(a, '-', 2)) IN ('02', '2', 'FEB') AND CAST(SPLIT_PART(a, '-', 3) AS INT) BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 -END -WHEN REGEXP_LIKE(REPLACE(a, '-', '/') , '^[0-1]?[0-9]/[0-3]?[0-9]/[12][09][0-9][0-9]$') - OR REGEXP_LIKE(REPLACE(a, '-', '/') , '^[0-1]?[0-9]/[0-3]?[0-9]/[0-9][0-9]$') - THEN CASE WHEN CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 1) AS INT) BETWEEN 1 AND 12 - AND ( (CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 1) AS INT) IN (1, 3, 5, 7, 8, 10, 12) - AND CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 2) AS INT) BETWEEN 1 AND 31) - OR (CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 1) AS INT) IN (4, 6, 9, 11) - AND CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 2) AS INT) BETWEEN 1 AND 30) - OR (CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 1) AS INT) = 2 - AND CAST(SPLIT_PART(REPLACE(a, '-', '/'), '/', 2) AS INT) BETWEEN 1 AND 29) - ) - AND CAST(('20' || SUBSTRING(SPLIT_PART(REPLACE(a, '-', '/'), '/', 3), -2 )) AS INT) BETWEEN 1800 AND 2200 - THEN 1 - ELSE 0 -END -WHEN REGEXP_LIKE(UPPER(a) , '[0-3]?[0-9]-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-[12][09][0-9][0-9]') - THEN CASE WHEN CAST(SPLIT_PART(a, '-', 3) AS INT) BETWEEN 1800 AND 2200 - AND ( (UPPER(SPLIT_PART(a, '-', 2)) IN ('JAN', 'MAR', 'MAY', 'JUL', 'AUG', 'OCT', 'DEC') - AND CAST(SPLIT_PART(a, '-', 1) AS INT) BETWEEN 1 AND 31) - OR (UPPER(SPLIT_PART(a, '-', 2)) IN ('APR', 'JUN', 'SEP', 'NOV') - AND CAST(SPLIT_PART(a, '-', 1) AS INT) BETWEEN 1 AND 30) - OR (UPPER(SPLIT_PART(a, '-', 2)) = 'FEB' - AND CAST(SPLIT_PART(a, '-', 1) AS INT) BETWEEN 1 AND 29) - ) - THEN 1 - ELSE 0 -END -ELSE 0 -END -SELECT date_check('2002-02-30 12:01:35'), - date_check('2002-02-21 12:01:35 121324'), - date_check('20100314224518304596'), - date_check('20100230'), - date_check('201002301234'), - date_check('2010-03-30'), date_check('2010-MAR-30'), - date_check('05-21-22'), date_check('10/23/2023'), - date_check('10-SEP-2024'); \ No newline at end of file diff --git a/testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql b/testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql deleted file mode 100644 index 4cd79fe..0000000 --- a/testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +++ /dev/null @@ -1 +0,0 @@ -CREATE SCHEMA IF NOT exists {DATA_QC_SCHEMA}; diff --git a/testgen/template/get_entities/get_connection.sql b/testgen/template/get_entities/get_connection.sql index b24c7ba..30621ea 100644 --- a/testgen/template/get_entities/get_connection.sql +++ b/testgen/template/get_entities/get_connection.sql @@ -11,7 +11,6 @@ SELECT project_pw_encrypted, max_threads, max_query_chars, - project_qc_schema, url, connect_by_url, connect_by_key, diff --git a/testgen/template/parms/parms_profiling.sql b/testgen/template/parms/parms_profiling.sql index eabb737..80c93c4 100644 --- a/testgen/template/parms/parms_profiling.sql +++ b/testgen/template/parms/parms_profiling.sql @@ -23,7 +23,6 @@ SELECT cc.project_code, tg.profile_use_sampling, tg.profile_sample_percent, tg.profile_sample_min_count, - cc.project_qc_schema, tg.profile_do_pair_rules, tg.profile_pair_rule_pct, cc.max_threads diff --git a/testgen/template/parms/parms_test_execution.sql b/testgen/template/parms/parms_test_execution.sql index d39b644..15aba61 100644 --- a/testgen/template/parms/parms_test_execution.sql +++ b/testgen/template/parms/parms_test_execution.sql @@ -8,7 +8,6 @@ SELECT ts.project_code, cc.project_port, cc.project_user, cc.project_db, - cc.project_qc_schema, cc.connect_by_key, cc.private_key, cc.private_key_passphrase, diff --git a/testgen/ui/queries/connection_queries.py b/testgen/ui/queries/connection_queries.py index e03dfcc..087c9f0 100644 --- a/testgen/ui/queries/connection_queries.py +++ b/testgen/ui/queries/connection_queries.py @@ -10,7 +10,7 @@ def get_by_id(connection_id): str_schema = st.session_state["dbschema"] str_sql = f""" SELECT id::VARCHAR(50), project_code, connection_id, connection_name, - sql_flavor, project_host, project_port, project_user, project_qc_schema, + sql_flavor, project_host, project_port, project_user, project_db, project_pw_encrypted, NULL as password, max_threads, max_query_chars, url, connect_by_url, connect_by_key, private_key, private_key_passphrase FROM {str_schema}.connections @@ -23,7 +23,7 @@ def get_connections(project_code): str_schema = st.session_state["dbschema"] str_sql = f""" SELECT id::VARCHAR(50), project_code, connection_id, connection_name, - sql_flavor, project_host, project_port, project_user, project_qc_schema, + sql_flavor, project_host, project_port, project_user, project_db, project_pw_encrypted, NULL as password, max_threads, max_query_chars, connect_by_url, url, connect_by_key, private_key, private_key_passphrase @@ -48,7 +48,6 @@ def edit_connection(schema, connection, encrypted_password, encrypted_private_ke project_port = '{connection["project_port"]}', project_user = '{connection["project_user"]}', project_db = '{connection["project_db"]}', - project_qc_schema = '{connection["project_qc_schema"]}', connection_name = '{connection["connection_name"]}', max_threads = '{connection["max_threads"]}', max_query_chars = '{connection["max_query_chars"]}', @@ -79,7 +78,7 @@ def add_connection( ) -> int: sql_header = f"""INSERT INTO {schema}.connections (project_code, sql_flavor, url, connect_by_url, connect_by_key, - project_host, project_port, project_user, project_db, project_qc_schema, + project_host, project_port, project_user, project_db, connection_name,""" sql_footer = f""" SELECT @@ -92,7 +91,6 @@ def add_connection( '{connection["project_port"]}' as project_port, '{connection["project_user"]}' as project_user, '{connection["project_db"]}' as project_db, - '{connection["project_qc_schema"]}' as project_qc_schema, '{connection["connection_name"]}' as connection_name, """ if encrypted_password: diff --git a/testgen/ui/services/connection_service.py b/testgen/ui/services/connection_service.py index 66796d4..293a623 100644 --- a/testgen/ui/services/connection_service.py +++ b/testgen/ui/services/connection_service.py @@ -5,7 +5,6 @@ from testgen.commands.run_profiling_bridge import InitializeProfilingSQL from testgen.common.database.database_service import ( AssignConnectParms, - RetrieveDBResultsToList, empty_cache, get_db_type, get_flavor_service, @@ -138,7 +137,6 @@ def init_profiling_sql(project_code, connection, table_group_schema=None): project_port = connection["project_port"] project_db = connection["project_db"] project_user = connection["project_user"] - project_qc_schema = connection["project_qc_schema"] password = connection["password"] # prepare the profiling query @@ -150,7 +148,7 @@ def init_profiling_sql(project_code, connection, table_group_schema=None): project_host, project_port, project_db, - table_group_schema if table_group_schema else project_qc_schema, + table_group_schema, project_user, sql_flavor, url, @@ -165,36 +163,6 @@ def init_profiling_sql(project_code, connection, table_group_schema=None): return clsProfiling -def test_qc_connection(project_code, connection, init_profiling=True): - qc_results = {} - - if init_profiling: - init_profiling_sql(project_code, connection) - - project_qc_schema = connection["project_qc_schema"] - query_isnum_true = f"select {project_qc_schema}.fndk_isnum('32')" - query_isnum_true_result_raw = RetrieveDBResultsToList("PROJECT", query_isnum_true) - isnum_true_result = query_isnum_true_result_raw[0][0][0] == 1 - qc_results["isnum_true_result"] = isnum_true_result - - query_isnum_false = f"select {project_qc_schema}.fndk_isnum('HELLO')" - query_isnum_false_result_raw = RetrieveDBResultsToList("PROJECT", query_isnum_false) - isnum_false_result = query_isnum_false_result_raw[0][0][0] == 0 - qc_results["isnum_false_result"] = isnum_false_result - - query_isdate_true = f"select {project_qc_schema}.fndk_isdate('2013-05-18')" - query_isdate_true_result_raw = RetrieveDBResultsToList("PROJECT", query_isdate_true) - isdate_true_result = query_isdate_true_result_raw[0][0][0] == 1 - qc_results["isdate_true_result"] = isdate_true_result - - query_isdate_false = f"select {project_qc_schema}.fndk_isdate('HELLO')" - query_isdate_false_result_raw = RetrieveDBResultsToList("PROJECT", query_isdate_false) - isdate_false_result = query_isdate_false_result_raw[0][0][0] == 0 - qc_results["isdate_false_result"] = isdate_false_result - - return qc_results - - def form_overwritten_connection_url(connection): flavor = connection["sql_flavor"] diff --git a/testgen/ui/services/hygiene_issues_service.py b/testgen/ui/services/hygiene_issues_service.py index 0668876..c6024a9 100644 --- a/testgen/ui/services/hygiene_issues_service.py +++ b/testgen/ui/services/hygiene_issues_service.py @@ -7,7 +7,7 @@ def get_source_data(hi_data): str_schema = st.session_state["dbschema"] # Define the query str_sql = f""" - SELECT t.lookup_query, tg.table_group_schema, c.project_qc_schema, + SELECT t.lookup_query, tg.table_group_schema, c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase FROM {str_schema}.target_data_lookups t @@ -48,7 +48,6 @@ def replace_parms(str_query): str_query = str_query.replace("{TARGET_SCHEMA}", lst_query[0]["table_group_schema"]) str_query = str_query.replace("{TABLE_NAME}", hi_data["table_name"]) str_query = str_query.replace("{COLUMN_NAME}", hi_data["column_name"]) - str_query = str_query.replace("{DATA_QC_SCHEMA}", lst_query[0]["project_qc_schema"]) str_query = str_query.replace("{DETAIL_EXPRESSION}", hi_data["detail"]) str_query = str_query.replace("{PROFILE_RUN_DATE}", hi_data["profiling_starttime"]) if str_query is None or str_query == "": diff --git a/testgen/ui/services/table_group_service.py b/testgen/ui/services/table_group_service.py index f51d360..92a8509 100644 --- a/testgen/ui/services/table_group_service.py +++ b/testgen/ui/services/table_group_service.py @@ -81,7 +81,6 @@ def test_table_group(table_group, connection_id, project_code): # get table group data table_group_schema = table_group["table_group_schema"] table_group_id = table_group["id"] - project_qc_schema = connection["project_qc_schema"] profiling_table_set = table_group["profiling_table_set"] profiling_include_mask = table_group["profiling_include_mask"] profiling_exclude_mask = table_group["profiling_exclude_mask"] @@ -104,7 +103,6 @@ def test_table_group(table_group, connection_id, project_code): clsProfiling.parm_do_patterns = "Y" clsProfiling.parm_max_pattern_length = 25 clsProfiling.profile_run_id = "" - clsProfiling.data_qc_schema = project_qc_schema clsProfiling.data_schema = table_group_schema clsProfiling.parm_table_set = get_profiling_table_set_with_quotes(profiling_table_set) clsProfiling.parm_table_include_mask = profiling_include_mask @@ -118,9 +116,7 @@ def test_table_group(table_group, connection_id, project_code): query = clsProfiling.GetDDFQuery() table_group_results = RetrieveDBResultsToDictList("PROJECT", query) - qc_results = connection_service.test_qc_connection(project_code, connection, init_profiling=False) - - return table_group_results, qc_results + return table_group_results def get_profiling_table_set_with_quotes(profiling_table_set): diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py index 9dba905..0fe29d0 100644 --- a/testgen/ui/services/test_results_service.py +++ b/testgen/ui/services/test_results_service.py @@ -39,7 +39,7 @@ def get_test_result_history(db_schema, tr_data): def do_source_data_lookup_custom(db_schema, tr_data): # Define the query str_sql = f""" - SELECT d.custom_query as lookup_query, tg.table_group_schema, c.project_qc_schema, + SELECT d.custom_query as lookup_query, tg.table_group_schema, c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase FROM {db_schema}.test_definitions d @@ -86,7 +86,7 @@ def do_source_data_lookup_custom(db_schema, tr_data): def do_source_data_lookup(db_schema, tr_data, sql_only=False): # Define the query str_sql = f""" - SELECT t.lookup_query, tg.table_group_schema, c.project_qc_schema, + SELECT t.lookup_query, tg.table_group_schema, c.sql_flavor, c.project_host, c.project_port, c.project_db, c.project_user, c.project_pw_encrypted, c.url, c.connect_by_url, c.connect_by_key, c.private_key, c.private_key_passphrase @@ -107,7 +107,6 @@ def replace_parms(df_test, str_query): str_query = str_query.replace("{TARGET_SCHEMA}", empty_if_null(lst_query[0]["table_group_schema"])) str_query = str_query.replace("{TABLE_NAME}", empty_if_null(tr_data["table_name"])) str_query = str_query.replace("{COLUMN_NAME}", empty_if_null(tr_data["column_names"])) - str_query = str_query.replace("{DATA_QC_SCHEMA}", empty_if_null(lst_query[0]["project_qc_schema"])) str_query = str_query.replace("{TEST_DATE}", str(empty_if_null(tr_data["test_date"]))) str_query = str_query.replace("{CUSTOM_QUERY}", empty_if_null(df_test.at[0, "custom_query"])) diff --git a/testgen/ui/views/connections/page.py b/testgen/ui/views/connections/page.py index f86aa72..9518ba4 100644 --- a/testgen/ui/views/connections/page.py +++ b/testgen/ui/views/connections/page.py @@ -100,7 +100,6 @@ def show_connection_form(self, selected_connection: dict, _mode: str, project_co ) data.update({ "project_code": project_code, - "project_qc_schema": "", }) if "private_key" not in data: data.update({ @@ -208,19 +207,7 @@ def test_connection(self, connection: dict) -> "ConnectionStatus": if not connection_successful: return ConnectionStatus(message="Error completing a query to the database server.", successful=False) - - qc_error_message = "The connection was successful, but there is an issue with the QC Utility Schema" - try: - qc_results = connection_service.test_qc_connection(connection["project_code"], connection) - if not all(qc_results): - return ConnectionStatus( - message=qc_error_message, - details=f"QC Utility Schema confirmation failed. details: {qc_results}", - successful=False, - ) - return ConnectionStatus(message="The connection was successful.", successful=True) - except Exception as error: - return ConnectionStatus(message=qc_error_message, details=error.args[0], successful=False) + return ConnectionStatus(message="The connection was successful.", successful=True) except Exception as error: return ConnectionStatus(message="Error attempting the Connection.", details=error.args[0], successful=False) diff --git a/testgen/ui/views/table_groups/page.py b/testgen/ui/views/table_groups/page.py index a4b0a4c..0e53dbc 100644 --- a/testgen/ui/views/table_groups/page.py +++ b/testgen/ui/views/table_groups/page.py @@ -409,8 +409,8 @@ def table_group_preview(entity, connection_id, project_code, status): status.empty() status.info("Connecting to the Table Group ...") try: - table_group_results, qc_results = table_group_service.test_table_group(entity, connection_id, project_code) - if len(table_group_results) > 0 and all(qc_results): + table_group_results = table_group_service.test_table_group(entity, connection_id, project_code) + if len(table_group_results) > 0: tables = set() columns = [] schemas = set() @@ -419,7 +419,7 @@ def table_group_preview(entity, connection_id, project_code, status): tables.add(result["table_name"]) columns.append(result["column_name"]) - show_test_results(schemas, tables, columns, qc_results) + show_test_results(schemas, tables, columns) status.empty() status.success("Operation has finished successfully.") @@ -429,8 +429,6 @@ def table_group_preview(entity, connection_id, project_code, status): error_message = "" if len(table_group_results) == 0: error_message = "Result is empty." - if not all(qc_results): - error_message = f"Error testing the connection to the Table Group. Details: {qc_results}" st.text_area("Table Group Error Details", value=error_message) except Exception as e: status.empty() @@ -439,10 +437,7 @@ def table_group_preview(entity, connection_id, project_code, status): st.text_area("Table Group Error Details", value=error_message) -def show_test_results(schemas, tables, columns, qc_results): - qc_test_results = all(qc_results) - st.markdown(f"**Utility QC Schema Validity Test**: {':white_check_mark:' if qc_test_results else ':x:'}") - +def show_test_results(schemas, tables, columns): st.markdown(f"**Schema**: {schemas.pop()}") st.markdown(f"**Column Count**: {len(columns)}") From da6e23a21bf0b03118b3c6781528f10c51d66cee Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 18 Nov 2024 12:16:34 -0500 Subject: [PATCH 83/91] fix(ui): bug in format duration utility --- testgen/ui/components/frontend/js/display_utils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js index 1be340b..bbd9a46 100644 --- a/testgen/ui/components/frontend/js/display_utils.js +++ b/testgen/ui/components/frontend/js/display_utils.js @@ -19,7 +19,7 @@ function formatDuration(/** @type string */ duration) { return '--'; } - const { hour, minute, second } = duration.split(':'); + const [ hour, minute, second ] = duration.split(':'); let formatted = [ { value: Number(hour), unit: 'h' }, { value: Number(minute), unit: 'm' }, From c69012512cdb581665d954fdbca96dbc6e6ff754 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Mon, 18 Nov 2024 20:46:27 -0500 Subject: [PATCH 84/91] fix(data-hierarchy): add selected query param to test issue link --- testgen/ui/components/frontend/js/pages/data_hierarchy.js | 2 ++ testgen/ui/views/data_hierarchy.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/testgen/ui/components/frontend/js/pages/data_hierarchy.js b/testgen/ui/components/frontend/js/pages/data_hierarchy.js index a8bd342..852434b 100644 --- a/testgen/ui/components/frontend/js/pages/data_hierarchy.js +++ b/testgen/ui/components/frontend/js/pages/data_hierarchy.js @@ -19,6 +19,7 @@ * * @typedef TestIssue * @type {object} + * @property {string} id * @property {string} column_name * @property {string} test_name * @property {'Failed' | 'Warning' | 'Error' } result_status @@ -504,6 +505,7 @@ const TestIssuesCard = (/** @type Table | Column */ item) => { run_id: issue.test_run_id, table_name: item.table_name, column_name: item.column_name, + selected: issue.id, }, open_new: true, label: formatTimestamp(issue.test_run_date), diff --git a/testgen/ui/views/data_hierarchy.py b/testgen/ui/views/data_hierarchy.py index 4ffa33f..5f9dcbb 100644 --- a/testgen/ui/views/data_hierarchy.py +++ b/testgen/ui/views/data_hierarchy.py @@ -388,7 +388,8 @@ def get_latest_test_issues(table_group_id: str, table_name: str, column_name: st column_condition = f"AND column_names = '{column_name}'" sql = f""" - SELECT column_names AS column_name, + SELECT test_results.id::VARCHAR(50), + column_names AS column_name, test_name_short AS test_name, result_status, result_message, From 524218e92f42ae5678ca1fda10f9127a85349435 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 19 Nov 2024 21:47:38 -0500 Subject: [PATCH 85/91] fix(sql): bug in source data dialog --- testgen/ui/services/hygiene_issues_service.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/testgen/ui/services/hygiene_issues_service.py b/testgen/ui/services/hygiene_issues_service.py index c6024a9..1085f3e 100644 --- a/testgen/ui/services/hygiene_issues_service.py +++ b/testgen/ui/services/hygiene_issues_service.py @@ -1,5 +1,6 @@ import streamlit as st +from testgen.common.read_file import replace_templated_functions from testgen.ui.services import database_service as db @@ -50,6 +51,10 @@ def replace_parms(str_query): str_query = str_query.replace("{COLUMN_NAME}", hi_data["column_name"]) str_query = str_query.replace("{DETAIL_EXPRESSION}", hi_data["detail"]) str_query = str_query.replace("{PROFILE_RUN_DATE}", hi_data["profiling_starttime"]) + + if "{{DKFN_" in str_query: + str_query = replace_templated_functions(str_query, lst_query[0]["sql_flavor"]) + if str_query is None or str_query == "": raise ValueError("Lookup query is not defined for this Anomoly Type.") return str_query From 926b8a12d806175fb5cf5c3be3a360d2bfa20aa2 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 19 Nov 2024 23:35:44 -0500 Subject: [PATCH 86/91] fix(ui): deep links intermittently don't work --- testgen/ui/navigation/router.py | 1 + 1 file changed, 1 insertion(+) diff --git a/testgen/ui/navigation/router.py b/testgen/ui/navigation/router.py index 3b812a3..d49df3d 100644 --- a/testgen/ui/navigation/router.py +++ b/testgen/ui/navigation/router.py @@ -58,6 +58,7 @@ def run(self, hide_sidebar=False) -> None: current_page.run() else: session.cookies_ready += 1 + time.sleep(0.3) def navigate(self, /, to: str, with_args: dict = {}) -> None: # noqa: B006 From 666b4477a8b51826735cfa71d85b15565013b5c7 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Wed, 20 Nov 2024 11:50:11 -0500 Subject: [PATCH 87/91] misc: change level of repetitive docker check log --- testgen/common/version_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/common/version_service.py b/testgen/common/version_service.py index c2317b1..8e03cb1 100644 --- a/testgen/common/version_service.py +++ b/testgen/common/version_service.py @@ -54,7 +54,7 @@ def _get_last_docker_release() -> str: ) if response.status_code != 200: - LOG.warning(f"version_service: Failed to fetch docker tags. Status code: {response.status_code}") + LOG.debug(f"version_service: Failed to fetch docker tags. Status code: {response.status_code}") return "unknown" tags_to_return = [] From c55e04b34633d7ceda08294dfcf00de8109972b6 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Thu, 21 Nov 2024 16:01:24 -0500 Subject: [PATCH 88/91] fix(sql): escape special characters in pattern match test --- .../template/dbsetup/050_populate_new_schema_metadata.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql index 2524edc..57bea03 100644 --- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql +++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql @@ -116,7 +116,7 @@ VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count con ('1023', 'Monthly_Rec_Ct', 'Monthly Records', 'At least one date per month present within date range', 'Tests for presence of at least one date per calendar month within min/max date range, per baseline data', 'At least one date per month expected in min/max date range.', 'Missing months', 'Calendar months without date values present', 'general_type= ''D'' AND date_days_present > 1 AND date_months_present > 2 AND date_months_present - (datediff( ''MON'' , min_date, max_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/{DATE_MONTHS_PRESENT}::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Count of Months without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar months without dates present', 'Monthly Records tests that at least one record is present for every calendar month within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each month. A failure here would suggest missing records for the number of months identified without data. You can adjust the threshold to accept a number of month that you know legitimately have no records.', 'Y'), ('1024', 'Outlier_Pct_Above', 'Outliers Above', 'Consistent outlier counts over 2 SD above mean', 'Tests that percent of outliers over 2 SD above Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD above the mean is greater than expected threshold.', 'Pct records over limit', NULL, 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over upper 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations above the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'), ('1025', 'Outlier_Pct_Below', 'Outliers Below', 'Consistent outlier counts under 2 SD below mean', 'Tests that percent of outliers over 2 SD below Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD below the mean is greater than expected threshold.', 'Pct records under limit', NULL, 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over lower 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations below the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'), - ('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, 'fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'trim(REPLACE(REPLACE(REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database: REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y'), + ('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, 'fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'TRIM(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''([*+\-%_])'', ''\\\1'', ''g''), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database: REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y'), ('1028', 'Recency', 'Recency', 'Latest date within expected range of test date', 'Tests that the latest date in column is within a set number of days of the test date', 'Most recent date value not within expected days of test date.', 'Days before test', 'Number of days that most recent date precedes the date of test', 'general_type= ''D'' AND max_date <= run_date AND NOT column_name IN ( ''filedate'' , ''file_date'' ) AND NOT functional_data_type IN (''Future Date'', ''Schedule Date'') AND DATEDIFF( ''DAY'' , max_date, run_date) <= 62', '(ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/(1.0+DATEDIFF(''DAY'', ''{MIN_DATE}'', ''{MAX_DATE}''))::FLOAT)/{RECORD_CT}::FLOAT', '0.75', NULL, NULL, 'threshold_value', 'CASE WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 3 THEN DATEDIFF(''DAY'', max_date, run_date) + 3 WHEN DATEDIFF(''DAY'', max_date, run_date) <= 7 then DATEDIFF(''DAY'', max_date, run_date) + 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 7.0) * 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) > 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 30.0) * 30 END', 'Threshold Maximum Days before Test', NULL, 'Warning', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected maximum count of days preceding test date', 'This test evaluates recency based on the latest referenced dates in the column. The test is appropriate for transactional dates and timestamps. The test can be especially valuable because timely data deliveries themselves may not assure that the most recent data is present. You can adjust the expected threshold to the maximum number of days that you expect the data to age before the dataset is refreshed. ', 'Y'), ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'), ('1033', 'Street_Addr_Pattern', 'Street Address', 'Enough street address entries match defined pattern', 'Tests for percent of records matching standard street address pattern.', 'Percent of values matching standard street address format is under expected threshold.', 'Percent matches', 'Percent of records that match street address pattern', '(std_pattern_match=''STREET_ADDR'') AND (avg_length <> round(avg_length)) AND (avg_embedded_spaces BETWEEN 2 AND 6) AND (avg_length < 35)', '({VALUE_CT}::FLOAT * ({RESULT_MEASURE}::FLOAT - {THRESHOLD_VALUE}::FLOAT)/100.0)/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '75', 'Threshold Pct that Match Address Pattern', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected percent of records that match standard street address pattern', 'The street address pattern used in this test should match the vast majority of USA addresses. You can adjust the threshold percent of matches based on the results you are getting -- you may well want to tighten it to make the test more sensitive to invalid entries.', 'Y'), @@ -283,7 +283,7 @@ VALUES ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', ' ('3018', 'Monthly_Rec_Ct', 'mssql', '(MAX(DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}''AS DATE))) - MIN(DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}'' AS DATE))) + 1) - COUNT(DISTINCT DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}''AS DATE)))', '>', '{THRESHOLD_VALUE}'), ('3019', 'Outlier_Pct_Above', 'mssql', 'CAST(SUM(CASE WHEN CAST({COLUMN_NAME} AS FLOAT) > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS FLOAT) / CAST(COUNT({COLUMN_NAME}) AS FLOAT)', '>', '{THRESHOLD_VALUE}'), ('3020', 'Outlier_Pct_Below', 'mssql', 'CAST(SUM(CASE WHEN CAST( {COLUMN_NAME} AS FLOAT) < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS FLOAT) / CAST(COUNT({COLUMN_NAME}) AS FLOAT)', '>', '{THRESHOLD_VALUE}'), - ('3021', 'Pattern_Match', 'mssql', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - CAST(SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') LIKE ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END) AS BIGINT)', '>', '{THRESHOLD_VALUE}'), + ('3021', 'Pattern_Match', 'mssql', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - CAST(SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') LIKE ''{BASELINE_VALUE}'' ESCAPE ''\'' THEN 1 ELSE 0 END) AS BIGINT)', '>', '{THRESHOLD_VALUE}'), ('3022', 'Recency', 'mssql', 'DATEDIFF(day, MAX({COLUMN_NAME}), CAST(''{RUN_DATE}''AS DATE))', '>', '{THRESHOLD_VALUE}'), ('3023', 'Required', 'mssql', 'COUNT(*) - COUNT( {COLUMN_NAME} )', '>', '{THRESHOLD_VALUE}'), ('3024', 'Row_Ct', 'mssql', 'COUNT(*)', '<', '{THRESHOLD_VALUE}'), @@ -640,7 +640,7 @@ SELECT check_period, record_ct, ORDER BY check_period DESC;'), ('1158', '1024', 'Test Results', 'Outlier_Pct_Above', 'mssql', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CAST("{COLUMN_NAME}" AS FLOAT) > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'), ('1159', '1025', 'Test Results', 'Outlier_Pct_Below', 'mssql', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CAST("{COLUMN_NAME}" AS FLOAT) < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'), - ('1160', '1026', 'Test Results', 'Pattern_Match', 'mssql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT LIKE ''{BASELINE_VALUE}'' GROUP BY "{COLUMN_NAME}";'), + ('1160', '1026', 'Test Results', 'Pattern_Match', 'mssql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT LIKE ''{BASELINE_VALUE}'' ESCAPE ''\'' GROUP BY "{COLUMN_NAME}";'), ('1161', '1028', 'Test Results', 'Recency', 'mssql', NULL, 'SELECT DISTINCT col AS latest_date_available, CAST(''{TEST_DATE}'' AS DATE) AS test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE DATEDIFF(day, col, CAST(''{TEST_DATE}'' AS DATE)) > {THRESHOLD_VALUE};'), ('1162', '1030', 'Test Results', 'Required', 'mssql', NULL, 'SELECT TOP 500 * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL;'), ('1163', '1031', 'Test Results', 'Row_Ct', 'mssql', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, ABS(ROUND(CAST(100 * (current_count - {THRESHOLD_VALUE}) AS NUMERIC) / CAST({THRESHOLD_VALUE} AS NUMERIC) ,2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE};'), From fbaa6beb9c34e879cf89ec5ae44328f1dc53ff51 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 22 Nov 2024 16:51:07 -0500 Subject: [PATCH 89/91] fix(overview): error when profiling date exists but no test date --- testgen/ui/views/overview.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testgen/ui/views/overview.py b/testgen/ui/views/overview.py index a6ab0e0..e37565e 100644 --- a/testgen/ui/views/overview.py +++ b/testgen/ui/views/overview.py @@ -50,7 +50,7 @@ def render(self, project_code: str | None = None, **_kwargs): with table_group_sort_col: table_groups_df["latest_activity_date"] = table_groups_df[ ["latest_profile_start", "latest_tests_start"] - ].max(axis=1) + ].apply(pd.to_datetime).max(axis=1) # apply is needed to handle missing values ascending_fields: list[str] = ["table_groups_name"] sort_options = pd.DataFrame({ "value": ["table_groups_name", "latest_activity_date"], From 374a5d5728eeb1abb25d12d47fa4198ead07be3b Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Fri, 22 Nov 2024 22:32:56 -0500 Subject: [PATCH 90/91] fix(sql): special characters in pattern match test --- .../template/dbsetup/050_populate_new_schema_metadata.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql index 57bea03..3032a8e 100644 --- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql +++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql @@ -116,7 +116,7 @@ VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count con ('1023', 'Monthly_Rec_Ct', 'Monthly Records', 'At least one date per month present within date range', 'Tests for presence of at least one date per calendar month within min/max date range, per baseline data', 'At least one date per month expected in min/max date range.', 'Missing months', 'Calendar months without date values present', 'general_type= ''D'' AND date_days_present > 1 AND date_months_present > 2 AND date_months_present - (datediff( ''MON'' , min_date, max_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/{DATE_MONTHS_PRESENT}::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Count of Months without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar months without dates present', 'Monthly Records tests that at least one record is present for every calendar month within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each month. A failure here would suggest missing records for the number of months identified without data. You can adjust the threshold to accept a number of month that you know legitimately have no records.', 'Y'), ('1024', 'Outlier_Pct_Above', 'Outliers Above', 'Consistent outlier counts over 2 SD above mean', 'Tests that percent of outliers over 2 SD above Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD above the mean is greater than expected threshold.', 'Pct records over limit', NULL, 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over upper 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations above the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'), ('1025', 'Outlier_Pct_Below', 'Outliers Below', 'Consistent outlier counts under 2 SD below mean', 'Tests that percent of outliers over 2 SD below Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD below the mean is greater than expected threshold.', 'Pct records under limit', NULL, 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over lower 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations below the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'), - ('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, 'fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'TRIM(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''([*+\-%_])'', ''\\\1'', ''g''), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database: REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y'), + ('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, 'fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'TRIM(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''([*+\-%_])'', ''[\1]'', ''g''), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database: REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y'), ('1028', 'Recency', 'Recency', 'Latest date within expected range of test date', 'Tests that the latest date in column is within a set number of days of the test date', 'Most recent date value not within expected days of test date.', 'Days before test', 'Number of days that most recent date precedes the date of test', 'general_type= ''D'' AND max_date <= run_date AND NOT column_name IN ( ''filedate'' , ''file_date'' ) AND NOT functional_data_type IN (''Future Date'', ''Schedule Date'') AND DATEDIFF( ''DAY'' , max_date, run_date) <= 62', '(ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/(1.0+DATEDIFF(''DAY'', ''{MIN_DATE}'', ''{MAX_DATE}''))::FLOAT)/{RECORD_CT}::FLOAT', '0.75', NULL, NULL, 'threshold_value', 'CASE WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 3 THEN DATEDIFF(''DAY'', max_date, run_date) + 3 WHEN DATEDIFF(''DAY'', max_date, run_date) <= 7 then DATEDIFF(''DAY'', max_date, run_date) + 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 7.0) * 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) > 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 30.0) * 30 END', 'Threshold Maximum Days before Test', NULL, 'Warning', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected maximum count of days preceding test date', 'This test evaluates recency based on the latest referenced dates in the column. The test is appropriate for transactional dates and timestamps. The test can be especially valuable because timely data deliveries themselves may not assure that the most recent data is present. You can adjust the expected threshold to the maximum number of days that you expect the data to age before the dataset is refreshed. ', 'Y'), ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'), ('1033', 'Street_Addr_Pattern', 'Street Address', 'Enough street address entries match defined pattern', 'Tests for percent of records matching standard street address pattern.', 'Percent of values matching standard street address format is under expected threshold.', 'Percent matches', 'Percent of records that match street address pattern', '(std_pattern_match=''STREET_ADDR'') AND (avg_length <> round(avg_length)) AND (avg_embedded_spaces BETWEEN 2 AND 6) AND (avg_length < 35)', '({VALUE_CT}::FLOAT * ({RESULT_MEASURE}::FLOAT - {THRESHOLD_VALUE}::FLOAT)/100.0)/{RECORD_CT}::FLOAT', '1.0', NULL, NULL, 'threshold_value', '75', 'Threshold Pct that Match Address Pattern', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected percent of records that match standard street address pattern', 'The street address pattern used in this test should match the vast majority of USA addresses. You can adjust the threshold percent of matches based on the results you are getting -- you may well want to tighten it to make the test more sensitive to invalid entries.', 'Y'), @@ -283,7 +283,7 @@ VALUES ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', ' ('3018', 'Monthly_Rec_Ct', 'mssql', '(MAX(DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}''AS DATE))) - MIN(DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}'' AS DATE))) + 1) - COUNT(DISTINCT DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}''AS DATE)))', '>', '{THRESHOLD_VALUE}'), ('3019', 'Outlier_Pct_Above', 'mssql', 'CAST(SUM(CASE WHEN CAST({COLUMN_NAME} AS FLOAT) > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS FLOAT) / CAST(COUNT({COLUMN_NAME}) AS FLOAT)', '>', '{THRESHOLD_VALUE}'), ('3020', 'Outlier_Pct_Below', 'mssql', 'CAST(SUM(CASE WHEN CAST( {COLUMN_NAME} AS FLOAT) < {BASELINE_AVG}-(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS FLOAT) / CAST(COUNT({COLUMN_NAME}) AS FLOAT)', '>', '{THRESHOLD_VALUE}'), - ('3021', 'Pattern_Match', 'mssql', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - CAST(SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') LIKE ''{BASELINE_VALUE}'' ESCAPE ''\'' THEN 1 ELSE 0 END) AS BIGINT)', '>', '{THRESHOLD_VALUE}'), + ('3021', 'Pattern_Match', 'mssql', 'COUNT(NULLIF({COLUMN_NAME}, '''')) - CAST(SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') LIKE ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END) AS BIGINT)', '>', '{THRESHOLD_VALUE}'), ('3022', 'Recency', 'mssql', 'DATEDIFF(day, MAX({COLUMN_NAME}), CAST(''{RUN_DATE}''AS DATE))', '>', '{THRESHOLD_VALUE}'), ('3023', 'Required', 'mssql', 'COUNT(*) - COUNT( {COLUMN_NAME} )', '>', '{THRESHOLD_VALUE}'), ('3024', 'Row_Ct', 'mssql', 'COUNT(*)', '<', '{THRESHOLD_VALUE}'), @@ -640,7 +640,7 @@ SELECT check_period, record_ct, ORDER BY check_period DESC;'), ('1158', '1024', 'Test Results', 'Outlier_Pct_Above', 'mssql', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CAST("{COLUMN_NAME}" AS FLOAT) > ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'), ('1159', '1025', 'Test Results', 'Outlier_Pct_Below', 'mssql', NULL, 'SELECT ({BASELINE_AVG} + (2*{BASELINE_SD})) AS outlier_threshold, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE CAST("{COLUMN_NAME}" AS FLOAT) < ({BASELINE_AVG} + (2*{BASELINE_SD})) GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'), - ('1160', '1026', 'Test Results', 'Pattern_Match', 'mssql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT LIKE ''{BASELINE_VALUE}'' ESCAPE ''\'' GROUP BY "{COLUMN_NAME}";'), + ('1160', '1026', 'Test Results', 'Pattern_Match', 'mssql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE NULLIF("{COLUMN_NAME}", '''') NOT LIKE ''{BASELINE_VALUE}'' GROUP BY "{COLUMN_NAME}";'), ('1161', '1028', 'Test Results', 'Recency', 'mssql', NULL, 'SELECT DISTINCT col AS latest_date_available, CAST(''{TEST_DATE}'' AS DATE) AS test_run_date FROM (SELECT MAX("{COLUMN_NAME}") AS col FROM {TARGET_SCHEMA}.{TABLE_NAME}) a WHERE DATEDIFF(day, col, CAST(''{TEST_DATE}'' AS DATE)) > {THRESHOLD_VALUE};'), ('1162', '1030', 'Test Results', 'Required', 'mssql', NULL, 'SELECT TOP 500 * FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE "{COLUMN_NAME}" IS NULL;'), ('1163', '1031', 'Test Results', 'Row_Ct', 'mssql', NULL, 'WITH CTE AS (SELECT COUNT(*) AS current_count FROM {TARGET_SCHEMA}.{TABLE_NAME}) SELECT current_count, ABS(ROUND(CAST(100 * (current_count - {THRESHOLD_VALUE}) AS NUMERIC) / CAST({THRESHOLD_VALUE} AS NUMERIC) ,2)) AS row_count_pct_decrease FROM cte WHERE current_count < {THRESHOLD_VALUE};'), From 62386b31fd6f2cc873bd06a0da2a8d7158f4b885 Mon Sep 17 00:00:00 2001 From: Aarthy Adityan Date: Tue, 26 Nov 2024 21:51:24 -0500 Subject: [PATCH 91/91] release: 2.15.3 -> 2.24.7 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ce2438b..4c70813 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta" [project] name = "dataops-testgen" -version = "2.15.3" +version = "2.24.7" description = "DataKitchen's Data Quality DataOps TestGen" authors = [ { "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },