From 8390a162fe553f0868da463a5870e7a8a9a3483d Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 12 Nov 2024 19:55:48 -0500
Subject: [PATCH] feat(pdf): add link backs to pdf issue reports

---
 testgen/ui/pdf/hygiene_issue_report.py | 29 ++++++++++++++++--------
 testgen/ui/pdf/style.py                | 21 +++++++++++++++++
 testgen/ui/pdf/test_result_report.py   | 31 +++++++++++++++++++-------
 testgen/ui/views/hygiene_issues.py     |  4 ++--
 testgen/ui/views/test_results.py       |  2 +-
 testgen/utils/__init__.py              |  8 +++++++
 6 files changed, 75 insertions(+), 20 deletions(-)
diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py
index b228231..7a0462a 100644
--- a/testgen/ui/pdf/hygiene_issue_report.py
+++ b/testgen/ui/pdf/hygiene_issue_report.py
@@ -1,4 +1,3 @@
-import pandas
 from reportlab.lib import colors
 from reportlab.lib.colors import HexColor
 from reportlab.lib.enums import TA_CENTER
@@ -13,13 +12,16 @@
     PARA_STYLE_FOOTNOTE,
     PARA_STYLE_H1,
     PARA_STYLE_INFO,
+    PARA_STYLE_LINK,
     PARA_STYLE_MONO,
     PARA_STYLE_TEXT,
     PARA_STYLE_TITLE,
     TABLE_STYLE_DEFAULT,
+    get_formatted_datetime,
 )
 from testgen.ui.pdf.templates import DatakitchenTemplate
 from testgen.ui.services.hygiene_issues_service import get_source_data
+from testgen.utils import get_base_url
 
 SECTION_MIN_AVAILABLE_HEIGHT = 120
 
@@ -38,9 +40,6 @@ def build_summary_table(document, hi_data):
             ("GRID", (0, 0), (-1, -1), 2, colors.white),
             ("BACKGROUND", (0, 0), (-1, -1), COLOR_GRAY_BG),
 
-            # Empty cells
-            ("BACKGROUND", (2, 5), (-1, -1), colors.white),
-
             # Header cells
             *[
                 (cmd[0], *coords, *cmd[1:])
@@ -64,7 +63,10 @@ def build_summary_table(document, hi_data):
             ("SPAN", (3, 3), (4, 3)),
             ("SPAN", (3, 4), (4, 4)),
             ("SPAN", (3, 5), (4, 5)),
+            ("SPAN", (2, 5), (4, 5)),
 
+            # Link cell
+            ("BACKGROUND", (2, 5), (4, 5), colors.white),
 
             # Status cell
             *[
@@ -80,7 +82,7 @@ def build_summary_table(document, hi_data):
     )
 
 
-    profiling_timestamp = pandas.to_datetime(hi_data["profiling_starttime"]).strftime("%Y-%m-%d %H:%M:%S")
+    profiling_timestamp = get_formatted_datetime(hi_data["profiling_starttime"])
     summary_table_data = [
         (
             "Hygiene Issue",
@@ -106,7 +108,16 @@ def build_summary_table(document, hi_data):
         ("Database/Schema", hi_data["schema_name"], "Profiling Date", profiling_timestamp),
         ("Table", hi_data["table_name"], "Table Group", hi_data["table_groups_name"]),
         ("Column", hi_data["column_name"], "Disposition", hi_data["disposition"] or "No Decision"),
-        ("Column Type", hi_data["column_type"]),
+        (
+            "Column Type",
+            hi_data["column_type"],
+            Paragraph(
+                f"""<a href="{get_base_url()}/profiling-runs:hygiene?run_id={hi_data["profile_run_id"]}&selected={hi_data["id"]}">
+                    View on TestGen >
+                </a>""",
+                style=PARA_STYLE_LINK,
+            ),
+        ),
     ]
 
     summary_table_col_widths = [n * document.width for n in (.15, .35, .15, .15, .20)]
@@ -132,7 +143,7 @@ def build_sample_data_content(document, sample_data_tuple):
         yield from df_table_builder.split_in_columns(table_flowables)
 
 
-def build_sql_query_conntent(sample_data_tuple):
+def build_sql_query_content(sample_data_tuple):
     lookup_query = sample_data_tuple[2]
     if lookup_query:
         return Paragraph(lookup_query, PARA_STYLE_MONO)
@@ -141,7 +152,7 @@ def build_sql_query_conntent(sample_data_tuple):
 
 
 def get_report_content(document, hi_data):
-    yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE)
+    yield Paragraph("TestGen Hygiene Issue Report", PARA_STYLE_TITLE)
     yield build_summary_table(document, hi_data)
 
     yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT)
@@ -156,7 +167,7 @@ def get_report_content(document, hi_data):
 
     yield KeepTogether([
         Paragraph("SQL Query", PARA_STYLE_H1),
-        build_sql_query_conntent(sample_data_tuple)
+        build_sql_query_content(sample_data_tuple)
     ])
 
 
diff --git a/testgen/ui/pdf/style.py b/testgen/ui/pdf/style.py
index 197674e..03ed49a 100644
--- a/testgen/ui/pdf/style.py
+++ b/testgen/ui/pdf/style.py
@@ -1,13 +1,18 @@
+import pandas
+import streamlit as st
 from reportlab.lib import enums
 from reportlab.lib.colors import HexColor
 from reportlab.lib.styles import ParagraphStyle
 from reportlab.platypus import TableStyle
 
+from testgen.common import date_service
+
 COLOR_GRAY_BG = HexColor(0xF2F2F2)
 COLOR_GREEN_BG = HexColor(0xDCE4DA)
 COLOR_YELLOW_BG = HexColor(0xA0C84E40, hasAlpha=True)
 COLOR_GREEN_TEXT = HexColor(0x139549)
 COLOR_FADED_TEXT = HexColor(0x404040)
+COLOR_LINK_TEXT = HexColor(0x1976D2)
 
 PARA_STYLE_DEFAULT = ParagraphStyle(
     "default",
@@ -86,3 +91,19 @@
     fontName="Helvetica",
     leading=10,
 )
+
+PARA_STYLE_LINK = ParagraphStyle(
+    "link",
+    PARA_STYLE_DEFAULT,
+    fontSize=9,
+    alignment=enums.TA_RIGHT,
+    textColor=COLOR_LINK_TEXT,
+)
+
+
+def get_formatted_datetime(value) -> str:
+    return date_service.get_timezoned_timestamp(
+        st.session_state,
+        pandas.to_datetime(value),
+        "%b %-d, %-I:%M %p %Z",
+    )
diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py
index f0fa019..c60cfc3 100644
--- a/testgen/ui/pdf/test_result_report.py
+++ b/testgen/ui/pdf/test_result_report.py
@@ -18,10 +18,12 @@
     PARA_STYLE_FOOTNOTE,
     PARA_STYLE_H1,
     PARA_STYLE_INFO,
+    PARA_STYLE_LINK,
     PARA_STYLE_MONO,
     PARA_STYLE_TEXT,
     PARA_STYLE_TITLE,
     TABLE_STYLE_DEFAULT,
+    get_formatted_datetime,
 )
 from testgen.ui.pdf.templates import DatakitchenTemplate
 from testgen.ui.services.database_service import get_schema
@@ -30,6 +32,7 @@
     do_source_data_lookup_custom,
     get_test_result_history,
 )
+from testgen.utils import get_base_url
 
 SECTION_MIN_AVAILABLE_HEIGHT = 120
 
@@ -52,8 +55,8 @@ def build_summary_table(document, tr_data):
             *[
                 (cmd[0], *coords, *cmd[1:])
                 for coords in (
-                    ((3, 3), (3, -1)),
-                    ((0, 0), (0, -1))
+                    ((3, 3), (3, -2)),
+                    ((0, 0), (0, -2))
                 )
                 for cmd in (
                     ("FONT", "Helvetica-Bold"),
@@ -75,6 +78,10 @@ def build_summary_table(document, tr_data):
             ("SPAN", (4, 5), (5, 5)),
             ("SPAN", (1, 6), (2, 6)),
             ("SPAN", (4, 6), (5, 6)),
+            ("SPAN", (0, 7), (5, 7)),
+
+            # Link cell
+            ("BACKGROUND", (0, 7), (5, 7), colors.white),
 
             # Measure cell
             ("FONT", (1, 1), (1, 1), "Helvetica-Bold"),
@@ -94,7 +101,7 @@ def build_summary_table(document, tr_data):
         parent=TABLE_STYLE_DEFAULT,
     )
 
-    test_timestamp = pandas.to_datetime(tr_data["test_time"]).strftime("%Y-%m-%d %H:%M:%S")
+    test_timestamp = get_formatted_datetime(tr_data["test_time"])
     summary_table_data = [
         (
             "Test",
@@ -111,10 +118,18 @@ def build_summary_table(document, tr_data):
         ("Measured Value", tr_data["result_measure"], tr_data["measure_uom_description"]),
         ("Threshold Value", tr_data["threshold_value"], tr_data["threshold_description"]),
 
-        ("Date", test_timestamp, None, "Table Group", tr_data["table_groups_name"]),
+        ("Test Run Date", test_timestamp, None, "Table Group", tr_data["table_groups_name"]),
         ("Database/Schema", tr_data["schema_name"], None, "Test Suite", tr_data["test_suite"]),
         ("Table", tr_data["table_name"], None, "Data Quality Dimension", tr_data["dq_dimension"]),
         ("Column", tr_data["column_names"], None, "Disposition", tr_data["disposition"] or "No Decision"),
+        (
+            Paragraph(
+                f"""<a href="{get_base_url()}/test-runs:results?run_id={tr_data["test_run_id"]}&selected={tr_data["test_result_id"]}">
+                    View on TestGen >
+                </a>""",
+                style=PARA_STYLE_LINK,
+            ),
+        ),
     ]
 
     summary_table_col_widths = [n * document.width for n in (.2, .1, .2, .2, .15, .15)]
@@ -143,7 +158,7 @@ def build_history_table(document, tr_data):
 
     history_df = pandas.DataFrame()
     history_df = history_df.assign(
-        test_date=history_data["test_date"].copy(),
+        test_date=history_data["test_date"].map(get_formatted_datetime).copy(),
         threshold_value=history_data["threshold_value"].astype(float).copy(),
         result_measure=history_data["result_measure"].astype(float).copy(),
         result_status=history_data["result_status"].map(
@@ -176,7 +191,7 @@ def build_sample_data_content(document, sample_data_tuple):
         yield from df_table_builder.split_in_columns(table_flowables)
 
 
-def build_sql_query_conntent(sample_data_tuple):
+def build_sql_query_content(sample_data_tuple):
     lookup_query = sample_data_tuple[2]
     if lookup_query:
         return Paragraph(lookup_query, PARA_STYLE_MONO)
@@ -185,7 +200,7 @@ def build_sql_query_conntent(sample_data_tuple):
 
 
 def get_report_content(document, tr_data):
-    yield Paragraph("TestGen Issue Report", PARA_STYLE_TITLE)
+    yield Paragraph("TestGen Test Issue Report", PARA_STYLE_TITLE)
     yield build_summary_table(document, tr_data)
 
     yield KeepTogether([
@@ -208,7 +223,7 @@ def get_report_content(document, tr_data):
 
     yield KeepTogether([
         Paragraph("SQL Query", PARA_STYLE_H1),
-        build_sql_query_conntent(sample_data_tuple)
+        build_sql_query_content(sample_data_tuple)
     ])
 
 
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 44774c8..49af2ad 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -341,7 +341,7 @@ def get_profiling_anomalies(
                      WHEN t.issue_likelihood = 'Definite'  THEN 4
                    END AS likelihood_order,
                    t.anomaly_description, r.detail, t.suggested_action,
-                   r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime,
+                   r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, r.profile_run_id::VARCHAR,
                    tg.table_groups_name
               FROM {schema}.profile_anomaly_results r
             INNER JOIN {schema}.profile_anomaly_types t
@@ -493,7 +493,7 @@ def do_disposition_update(selected, str_new_status):
 def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE:
     hi_id = tr_data["anomaly_id"]
     profiling_time = pd.Timestamp(tr_data["profiling_starttime"]).strftime("%Y%m%d_%H%M%S")
-    file_name = f"testgen_issue_report_{hi_id}_{profiling_time}.pdf"
+    file_name = f"testgen_hygiene_issue_report_{hi_id}_{profiling_time}.pdf"
 
     with BytesIO() as buffer:
         create_report(buffer, tr_data)
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 478704c..f8d60ed 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -817,7 +817,7 @@ def view_edit_test(button_container, test_definition_id):
 def get_report_file_data(update_progress, tr_data) -> FILE_DATA_TYPE:
     td_id = tr_data["test_definition_id_runtime"][:6]
     tr_time = pd.Timestamp(tr_data["test_time"]).strftime("%Y%m%d_%H%M%S")
-    file_name = f"testgen_issue_report_{td_id}_{tr_time}.pdf"
+    file_name = f"testgen_test_issue_report_{td_id}_{tr_time}.pdf"
 
     with BytesIO() as buffer:
         create_report(buffer, tr_data)
diff --git a/testgen/utils/__init__.py b/testgen/utils/__init__.py
index db58739..40f42b6 100644
--- a/testgen/utils/__init__.py
+++ b/testgen/utils/__init__.py
@@ -1,7 +1,9 @@
 import math
+import urllib.parse
 from uuid import UUID
 
 import pandas as pd
+import streamlit as st
 
 
 def to_int(value: float | int) -> int:
@@ -23,3 +25,9 @@ def is_uuid4(value: str) -> bool:
         return False
     
     return str(uuid) == value
+
+
+# https://github.com/streamlit/streamlit/issues/798#issuecomment-1647759949
+def get_base_url() -> str:
+    session = st.runtime.get_instance()._session_mgr.list_active_sessions()[0]
+    return urllib.parse.urlunparse([session.client.request.protocol, session.client.request.host, "", "", "", ""])