Skip to content

Commit

Permalink
feat(pdf): add link backs to pdf issue reports
Browse files Browse the repository at this point in the history
  • Loading branch information
aarthy-dk committed Nov 13, 2024
1 parent 5be063c commit 8390a16
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 20 deletions.
29 changes: 20 additions & 9 deletions testgen/ui/pdf/hygiene_issue_report.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import pandas
from reportlab.lib import colors
from reportlab.lib.colors import HexColor
from reportlab.lib.enums import TA_CENTER
Expand All @@ -13,13 +12,16 @@
PARA_STYLE_FOOTNOTE,
PARA_STYLE_H1,
PARA_STYLE_INFO,
PARA_STYLE_LINK,
PARA_STYLE_MONO,
PARA_STYLE_TEXT,
PARA_STYLE_TITLE,
TABLE_STYLE_DEFAULT,
get_formatted_datetime,
)
from testgen.ui.pdf.templates import DatakitchenTemplate
from testgen.ui.services.hygiene_issues_service import get_source_data
from testgen.utils import get_base_url

SECTION_MIN_AVAILABLE_HEIGHT = 120

Expand All @@ -38,9 +40,6 @@ def build_summary_table(document, hi_data):
("GRID", (0, 0), (-1, -1), 2, colors.white),
("BACKGROUND", (0, 0), (-1, -1), COLOR_GRAY_BG),

# Empty cells
("BACKGROUND", (2, 5), (-1, -1), colors.white),

# Header cells
*[
(cmd[0], *coords, *cmd[1:])
Expand All @@ -64,7 +63,10 @@ def build_summary_table(document, hi_data):
("SPAN", (3, 3), (4, 3)),
("SPAN", (3, 4), (4, 4)),
("SPAN", (3, 5), (4, 5)),
("SPAN", (2, 5), (4, 5)),

# Link cell
("BACKGROUND", (2, 5), (4, 5), colors.white),

# Status cell
*[
Expand All @@ -80,7 +82,7 @@ def build_summary_table(document, hi_data):
)


profiling_timestamp = pandas.to_datetime(hi_data["profiling_starttime"]).strftime("%Y-%m-%d %H:%M:%S")
profiling_timestamp = get_formatted_datetime(hi_data["profiling_starttime"])
summary_table_data = [
(
"Hygiene Issue",
Expand All @@ -106,7 +108,16 @@ def build_summary_table(document, hi_data):
("Database/Schema", hi_data["schema_name"], "Profiling Date", profiling_timestamp),
("Table", hi_data["table_name"], "Table Group", hi_data["table_groups_name"]),
("Column", hi_data["column_name"], "Disposition", hi_data["disposition"] or "No Decision"),
("Column Type", hi_data["column_type"]),
(
"Column Type",
hi_data["column_type"],
Paragraph(
f"""<a href="{get_base_url()}/profiling-runs:hygiene?run_id={hi_data["profile_run_id"]}&selected={hi_data["id"]}">
View on TestGen >
</a>""",
style=PARA_STYLE_LINK,
),
),
]

summary_table_col_widths = [n * document.width for n in (.15, .35, .15, .15, .20)]
Expand All @@ -132,7 +143,7 @@ def build_sample_data_content(document, sample_data_tuple):
yield from df_table_builder.split_in_columns(table_flowables)


def build_sql_query_conntent(sample_data_tuple):
def build_sql_query_content(sample_data_tuple):
lookup_query = sample_data_tuple[2]
if lookup_query:
return Paragraph(lookup_query, PARA_STYLE_MONO)
Expand All @@ -141,7 +152,7 @@ def build_sql_query_conntent(sample_data_tuple):


def get_report_content(document, hi_data):
yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE)
yield Paragraph("TestGen Hygiene Issue Report", PARA_STYLE_TITLE)
yield build_summary_table(document, hi_data)

yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT)
Expand All @@ -156,7 +167,7 @@ def get_report_content(document, hi_data):

yield KeepTogether([
Paragraph("SQL Query", PARA_STYLE_H1),
build_sql_query_conntent(sample_data_tuple)
build_sql_query_content(sample_data_tuple)
])


Expand Down
21 changes: 21 additions & 0 deletions testgen/ui/pdf/style.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import pandas
import streamlit as st
from reportlab.lib import enums
from reportlab.lib.colors import HexColor
from reportlab.lib.styles import ParagraphStyle
from reportlab.platypus import TableStyle

from testgen.common import date_service

COLOR_GRAY_BG = HexColor(0xF2F2F2)
COLOR_GREEN_BG = HexColor(0xDCE4DA)
COLOR_YELLOW_BG = HexColor(0xA0C84E40, hasAlpha=True)
COLOR_GREEN_TEXT = HexColor(0x139549)
COLOR_FADED_TEXT = HexColor(0x404040)
COLOR_LINK_TEXT = HexColor(0x1976D2)

PARA_STYLE_DEFAULT = ParagraphStyle(
"default",
Expand Down Expand Up @@ -86,3 +91,19 @@
fontName="Helvetica",
leading=10,
)

PARA_STYLE_LINK = ParagraphStyle(
"link",
PARA_STYLE_DEFAULT,
fontSize=9,
alignment=enums.TA_RIGHT,
textColor=COLOR_LINK_TEXT,
)


def get_formatted_datetime(value) -> str:
return date_service.get_timezoned_timestamp(
st.session_state,
pandas.to_datetime(value),
"%b %-d, %-I:%M %p %Z",
)
31 changes: 23 additions & 8 deletions testgen/ui/pdf/test_result_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
PARA_STYLE_FOOTNOTE,
PARA_STYLE_H1,
PARA_STYLE_INFO,
PARA_STYLE_LINK,
PARA_STYLE_MONO,
PARA_STYLE_TEXT,
PARA_STYLE_TITLE,
TABLE_STYLE_DEFAULT,
get_formatted_datetime,
)
from testgen.ui.pdf.templates import DatakitchenTemplate
from testgen.ui.services.database_service import get_schema
Expand All @@ -30,6 +32,7 @@
do_source_data_lookup_custom,
get_test_result_history,
)
from testgen.utils import get_base_url

SECTION_MIN_AVAILABLE_HEIGHT = 120

Expand All @@ -52,8 +55,8 @@ def build_summary_table(document, tr_data):
*[
(cmd[0], *coords, *cmd[1:])
for coords in (
((3, 3), (3, -1)),
((0, 0), (0, -1))
((3, 3), (3, -2)),
((0, 0), (0, -2))
)
for cmd in (
("FONT", "Helvetica-Bold"),
Expand All @@ -75,6 +78,10 @@ def build_summary_table(document, tr_data):
("SPAN", (4, 5), (5, 5)),
("SPAN", (1, 6), (2, 6)),
("SPAN", (4, 6), (5, 6)),
("SPAN", (0, 7), (5, 7)),

# Link cell
("BACKGROUND", (0, 7), (5, 7), colors.white),

# Measure cell
("FONT", (1, 1), (1, 1), "Helvetica-Bold"),
Expand All @@ -94,7 +101,7 @@ def build_summary_table(document, tr_data):
parent=TABLE_STYLE_DEFAULT,
)

test_timestamp = pandas.to_datetime(tr_data["test_time"]).strftime("%Y-%m-%d %H:%M:%S")
test_timestamp = get_formatted_datetime(tr_data["test_time"])
summary_table_data = [
(
"Test",
Expand All @@ -111,10 +118,18 @@ def build_summary_table(document, tr_data):
("Measured Value", tr_data["result_measure"], tr_data["measure_uom_description"]),
("Threshold Value", tr_data["threshold_value"], tr_data["threshold_description"]),

("Date", test_timestamp, None, "Table Group", tr_data["table_groups_name"]),
("Test Run Date", test_timestamp, None, "Table Group", tr_data["table_groups_name"]),
("Database/Schema", tr_data["schema_name"], None, "Test Suite", tr_data["test_suite"]),
("Table", tr_data["table_name"], None, "Data Quality Dimension", tr_data["dq_dimension"]),
("Column", tr_data["column_names"], None, "Disposition", tr_data["disposition"] or "No Decision"),
(
Paragraph(
f"""<a href="{get_base_url()}/test-runs:results?run_id={tr_data["test_run_id"]}&selected={tr_data["test_result_id"]}">
View on TestGen >
</a>""",
style=PARA_STYLE_LINK,
),
),
]

summary_table_col_widths = [n * document.width for n in (.2, .1, .2, .2, .15, .15)]
Expand Down Expand Up @@ -143,7 +158,7 @@ def build_history_table(document, tr_data):

history_df = pandas.DataFrame()
history_df = history_df.assign(
test_date=history_data["test_date"].copy(),
test_date=history_data["test_date"].map(get_formatted_datetime).copy(),
threshold_value=history_data["threshold_value"].astype(float).copy(),
result_measure=history_data["result_measure"].astype(float).copy(),
result_status=history_data["result_status"].map(
Expand Down Expand Up @@ -176,7 +191,7 @@ def build_sample_data_content(document, sample_data_tuple):
yield from df_table_builder.split_in_columns(table_flowables)


def build_sql_query_conntent(sample_data_tuple):
def build_sql_query_content(sample_data_tuple):
lookup_query = sample_data_tuple[2]
if lookup_query:
return Paragraph(lookup_query, PARA_STYLE_MONO)
Expand All @@ -185,7 +200,7 @@ def build_sql_query_conntent(sample_data_tuple):


def get_report_content(document, tr_data):
yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE)
yield Paragraph("TestGen Test Issue Report", PARA_STYLE_TITLE)
yield build_summary_table(document, tr_data)

yield KeepTogether([
Expand All @@ -208,7 +223,7 @@ def get_report_content(document, tr_data):

yield KeepTogether([
Paragraph("SQL Query", PARA_STYLE_H1),
build_sql_query_conntent(sample_data_tuple)
build_sql_query_content(sample_data_tuple)
])


Expand Down
4 changes: 2 additions & 2 deletions testgen/ui/views/hygiene_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def get_profiling_anomalies(
WHEN t.issue_likelihood = 'Definite' THEN 4
END AS likelihood_order,
t.anomaly_description, r.detail, t.suggested_action,
r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime,
r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, r.profile_run_id::VARCHAR,
tg.table_groups_name
FROM {schema}.profile_anomaly_results r
INNER JOIN {schema}.profile_anomaly_types t
Expand Down Expand Up @@ -493,7 +493,7 @@ def do_disposition_update(selected, str_new_status):
def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE:
hi_id = tr_data["anomaly_id"]
profiling_time = pd.Timestamp(tr_data["profiling_starttime"]).strftime("%Y%m%d_%H%M%S")
file_name = f"testgen_issue_report_{hi_id}_{profiling_time}.pdf"
file_name = f"testgen_hygiene_issue_report_{hi_id}_{profiling_time}.pdf"

with BytesIO() as buffer:
create_report(buffer, tr_data)
Expand Down
2 changes: 1 addition & 1 deletion testgen/ui/views/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,7 @@ def view_edit_test(button_container, test_definition_id):
def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE:
td_id = tr_data["test_definition_id_runtime"][:6]
tr_time = pd.Timestamp(tr_data["test_time"]).strftime("%Y%m%d_%H%M%S")
file_name = f"testgen_issue_report_{td_id}_{tr_time}.pdf"
file_name = f"testgen_test_issue_report_{td_id}_{tr_time}.pdf"

with BytesIO() as buffer:
create_report(buffer, tr_data)
Expand Down
8 changes: 8 additions & 0 deletions testgen/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import math
import urllib.parse
from uuid import UUID

import pandas as pd
import streamlit as st


def to_int(value: float | int) -> int:
Expand All @@ -23,3 +25,9 @@ def is_uuid4(value: str) -> bool:
return False

return str(uuid) == value


# https://github.com/streamlit/streamlit/issues/798#issuecomment-1647759949
def get_base_url() -> str:
session = st.runtime.get_instance()._session_mgr.list_active_sessions()[0]
return urllib.parse.urlunparse([session.client.request.protocol, session.client.request.host, "", "", "", ""])

0 comments on commit 8390a16

Please sign in to comment.