Skip to content

Commit

Permalink
Merge pull request #25 from DataKitchen/release/2.24.7
Browse files Browse the repository at this point in the history
Release/2.24.7
  • Loading branch information
datakitchen-devops authored Nov 27, 2024
2 parents ce0cc42 + 62386b3 commit 12852bd
Show file tree
Hide file tree
Showing 173 changed files with 10,187 additions and 2,647 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM python:3.10-slim-bookworm AS build-image

RUN mkdir -p /dk && \
apt-get update && \
apt-get install -y gcc libpcre3 libpcre3-dev g++
apt-get install -y gcc libpcre3 libpcre3-dev g++ git

COPY ./pyproject.toml /tmp/dk/
RUN python3 -m pip install /tmp/dk --prefix=/dk
Expand Down
6 changes: 0 additions & 6 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,6 @@ Determine how many tests are grouped together in a single query. Increase for be

default: `5000`

#### `PROJECT_QC_SCHEMA`

Name of the schema to be created in the project database.

default: `qc`

#### `PROJECT_DATABASE_NAME`

Name of the database the auto generated project will run test against.
Expand Down
13 changes: 9 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "dataops-testgen"
version = "2.15.3"
version = "2.24.7"
description = "DataKitchen's Data Quality DataOps TestGen"
authors = [
{ "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },
Expand All @@ -32,6 +32,7 @@ requires-python = ">=3.10"
dependencies = [
"PyYAML==6.0.1",
"click==8.1.3",
"regex==2024.9.11",
"sqlalchemy==1.4.46",
"snowflake-sqlalchemy==1.4.7",
"pyodbc==5.0.0",
Expand Down Expand Up @@ -60,6 +61,8 @@ dependencies = [
"concurrent_log_handler==0.9.25",
"cryptography==42.0.8",
"validators==0.33.0",
"reportlab==4.2.2",
"streamlit-pydantic @ git+https://github.com/LukasMasuch/streamlit-pydantic.git@9f84145b6b6e74cdff3a7815ab75b0464c4d4f24",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -99,7 +102,7 @@ include-package-data = true
[tool.setuptools.package-data]
"*" = ["*.toml", "*.sql", "*.yaml"]
"testgen.template" = ["*.sql", "*.yaml", "**/*.sql", "**/*.yaml"]
"testgen.ui.assets" = ["*.svg", "*.png", "*.js", "*.css"]
"testgen.ui.assets" = ["*.svg", "*.png", "*.js", "*.css", "*.ico", "flavors/*.svg"]
"testgen.ui.components.frontend" = ["*.html", "**/*.js", "**/*.css", "**/*.woff2", "**/*.svg"]

[tool.setuptools.packages.find]
Expand Down Expand Up @@ -224,8 +227,9 @@ select = ["A", "F", "S", "I", "T10", "B", "UP", "ISC", "T20", "RSE", "Q", "ARG",
# globally ignore the following error codes
# * TRY003: Avoid specifying long messages outside the exception class
# * S608: Hardcoded SQL
# # F841: Unused local variable (it is instable)
ignore = ["TRY003", "S608", "S404", "F841"]
# * F841: Unused local variable (it is instable)
# * B023: Buggy: https://github.com/astral-sh/ruff/issues/7847
ignore = ["TRY003", "S608", "S404", "F841", "B023"]

# Ignore the following errors in files:
# F403 - in __init__.py: We use __all__ in our module files so this behavior is acceptable in __init__.py
Expand All @@ -237,6 +241,7 @@ ignore = ["TRY003", "S608", "S404", "F841"]
"tests*" = ["S101", "T201"]
"invocations/**" = ["ARG001", "T201"]
"testgen/common/encrypt.py" = ["S413"]
"testgen/ui/pdf/dk_logo.py" = ["T201"]

# See: https://coverage.readthedocs.io/en/latest/config.html
[tool.coverage.run]
Expand Down
80 changes: 0 additions & 80 deletions testgen/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import getpass
import logging
import os
import subprocess
Expand Down Expand Up @@ -33,7 +32,6 @@
from testgen.commands.run_observability_exporter import run_observability_exporter
from testgen.commands.run_profiling_bridge import run_profiling_queries
from testgen.commands.run_quick_start import run_quick_start, run_quick_start_increment
from testgen.commands.run_setup_profiling_tools import run_setup_profiling_tools
from testgen.commands.run_upgrade_db_config import get_schema_revision, is_db_revision_up_to_date, run_upgrade_db_config
from testgen.common import (
configure_logging,
Expand Down Expand Up @@ -450,84 +448,6 @@ def do_upgrade_system_version():
click.echo("System and services upgrade is not required.")


@cli.command(
"setup-target-db-functions", help="Use to set up the utility functions in the target database for running profiles."
)
@click.option(
"-c",
"--connection-id",
help="The identifier for the connection. Use a connection_id shown in list-connections.",
required=True,
type=click.STRING,
)
@click.option(
"-dr",
"--dry-run",
default=False,
is_flag=True,
required=False,
help="Dry run to show which schema will be modified",
)
@click.option(
"-cs",
"--create-qc-schema",
default=False,
is_flag=True,
required=False,
help="Create the QC utility schema required in the target database",
)
@click.option("--yes", "-y", default=False, is_flag=True, required=False, help="Force yes")
@click.option(
"--skip-asking-credentials",
"-s",
default=False,
is_flag=True,
required=False,
help="Skip request for special write credentials for target database, uses standard credentials instead",
)
@click.option(
"--skip-granting-privileges",
"-sgp",
default=False,
is_flag=True,
required=False,
help="Skip granting execute privileges to the user for the QC utility schema in the target database",
)
@pass_configuration
def setup_profiling_tools(
configuration: Configuration,
connection_id: str,
dry_run: bool,
create_qc_schema: bool,
yes: bool,
skip_asking_credentials: bool,
skip_granting_privileges: bool,
):
db_user = None
db_password = None
if not skip_asking_credentials:
db_user = input("Admin DB User?")
db_password = getpass.getpass("Admin DB Password?")

if not yes and not dry_run:
confirm = input(
f"Are you sure you want to setup the utility functions to be able to run the profile for connection {connection_id}? [yes/No]"
)
if confirm.lower() != "yes":
click.echo("Exiting without any operation performed.")
return
project_qc_schema = run_setup_profiling_tools(
connection_id, dry_run, create_qc_schema, db_user, db_password, skip_granting_privileges
)
if not dry_run:
message = f"Project DB has been set up. Modified schema: {project_qc_schema}"
else:
message = (
f"Project DB dry run completed, no changes applied. Modified schema would have been: {project_qc_schema}"
)
click.echo(message)


@cli.command("get-test-results", help="Fetches results for a test run.")
@click.option(
"-tr",
Expand Down
20 changes: 16 additions & 4 deletions testgen/commands/queries/execute_cat_tests_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from testgen.common import date_service, read_template_sql_file
from testgen.common.database import database_service
from testgen.common.read_file import replace_templated_functions


class CCATExecutionSQL:
Expand All @@ -11,13 +12,13 @@ class CCATExecutionSQL:
test_suite = ""
run_date = ""
test_run_id = ""
table_groups_id = ""
max_query_chars = ""
exception_message = ""

# Test Set Parameters
target_schema = ""
target_table = ""
replace_qc_schema = ""
dctTestParms: typing.ClassVar = {}

def __init__(self, strProjectCode, strTestSuiteId, strTestSuite, strSQLFlavor, max_query_chars, minutes_offset=0):
Expand All @@ -38,9 +39,8 @@ def _ReplaceParms(self, strInputString):
strInputString = strInputString.replace("{PROJECT_CODE}", self.project_code)
strInputString = strInputString.replace("{TEST_SUITE}", self.test_suite)
strInputString = strInputString.replace("{TEST_SUITE_ID}", self.test_suite_id)
# NOTE: REPLACE_QC_SCHEMA is parm replaced to run build query: sets the actual value to replace.
# DATA_QC_SCHEMA is parm in cat_test_conditions that build query replaces via SQL.
strInputString = strInputString.replace("{REPLACE_QC_SCHEMA}", self.replace_qc_schema)
strInputString = strInputString.replace("{TABLE_GROUPS_ID}", self.table_groups_id)

strInputString = strInputString.replace("{SQL_FLAVOR}", self.flavor)
strInputString = strInputString.replace("{CONCAT_OPERATOR}", self.concat_operator)

Expand All @@ -60,6 +60,9 @@ def _ReplaceParms(self, strInputString):

strInputString = strInputString.replace("{RUN_DATE}", self.run_date)

if "{{DKFN_" in strInputString:
strInputString = replace_templated_functions(strInputString, self.flavor)

# Adding escape character where ':' is referenced
strInputString = strInputString.replace(":", "\\:")

Expand Down Expand Up @@ -95,3 +98,12 @@ def FinalizeTestResultsSQL(self):
def PushTestRunStatusUpdateSQL(self):
strQ = self._ReplaceParms(read_template_sql_file("ex_update_test_record_in_testrun_table.sql", "execution"))
return strQ

def FinalizeTestSuiteUpdateSQL(self):
strQ = self._ReplaceParms(read_template_sql_file("ex_update_test_suite.sql", "execution"))
return strQ


def TestScoringRollupSQL(self):
strQ = self._ReplaceParms(read_template_sql_file("test_scoring_rollup.sql", "execution"))
return strQ
32 changes: 19 additions & 13 deletions testgen/commands/queries/profiling_query.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import typing

from testgen.common import date_service, read_template_sql_file, read_template_yaml_file
from testgen.common.read_file import replace_templated_functions


class CProfilingSQL:
Expand All @@ -13,7 +14,6 @@ class CProfilingSQL:
table_groups_id = ""
flavor = ""
run_date = ""
data_qc_schema = ""
data_schema = ""
data_table = ""

Expand Down Expand Up @@ -74,7 +74,6 @@ def ReplaceParms(self, strInputString):
strInputString = strInputString.replace("{TABLE_GROUPS_ID}", self.table_groups_id)
strInputString = strInputString.replace("{RUN_DATE}", self.run_date)
strInputString = strInputString.replace("{DATA_SCHEMA}", self.data_schema)
strInputString = strInputString.replace("{DATA_QC_SCHEMA}", self.data_qc_schema)
strInputString = strInputString.replace("{DATA_TABLE}", self.data_table)
strInputString = strInputString.replace("{COL_NAME}", self.col_name)
strInputString = strInputString.replace("{COL_NAME_SANITIZED}", self.col_name.replace("'", "''"))
Expand All @@ -98,6 +97,8 @@ def ReplaceParms(self, strInputString):
strInputString = strInputString.replace("{CONTINGENCY_COLUMNS}", self.contingency_columns)
strInputString = strInputString.replace("{CONTINGENCY_MAX_VALUES}", self.contingency_max_values)
strInputString = strInputString.replace("{PROCESS_ID}", str(self.process_id))
if "{{DKFN_" in strInputString:
strInputString = replace_templated_functions(strInputString, self.flavor)

return strInputString

Expand Down Expand Up @@ -141,11 +142,16 @@ def GetPIIFlagUpdateQuery(self):
strQ = self.ReplaceParms(read_template_sql_file("pii_flag.sql", sub_directory="profiling"))
return strQ

def GetAnomalyRefreshQuery(self):
def GetAnomalyStatsRefreshQuery(self):
# Runs on DK Postgres Server
strQ = self.ReplaceParms(read_template_sql_file("refresh_anomalies.sql", sub_directory="profiling"))
return strQ

def GetAnomalyScoringRollupQuery(self):
# Runs on DK Postgres Server
strQ = self.ReplaceParms(read_template_sql_file("profile_anomaly_scoring_rollup.sql", sub_directory="profiling"))
return strQ

def GetAnomalyTestTypesQuery(self):
# Runs on DK Postgres Server
strQ = self.ReplaceParms(read_template_sql_file("profile_anomaly_types_get.sql", sub_directory="profiling"))
Expand Down Expand Up @@ -175,6 +181,16 @@ def GetAnomalyTestQuery(self, dct_test_type):

return strQ

def GetAnomalyScoringQuery(self, dct_test_type):
# Runs on DK Postgres Server
strQ = read_template_sql_file("profile_anomaly_scoring.sql", sub_directory="profiling")
if strQ:
strQ = strQ.replace("{PROFILE_RUN_ID}", self.profile_run_id)
strQ = strQ.replace("{ANOMALY_ID}", dct_test_type["id"])
strQ = strQ.replace("{PREV_FORMULA}", dct_test_type["dq_score_prevalence_formula"])
strQ = strQ.replace("{RISK}", dct_test_type["dq_score_risk_factor"])
return strQ

def GetDataCharsRefreshQuery(self):
# Runs on DK Postgres Server
strQ = self.ReplaceParms(
Expand Down Expand Up @@ -227,16 +243,6 @@ def _get_mask_query(self, mask, is_include):
sub_query += ")"
return sub_query

def GetFunctionCreatorQuery(self):
# Runs on Project DB
strQ = self.ReplaceParms(
read_template_sql_file(
f"project_function_creator_{self.flavor}.sql",
sub_directory=f"flavors/{self.flavor}/setup_profiling_tools",
)
)
return strQ

def GetProfilingQuery(self):
# Runs on Project DB
if not self.dctSnippetTemplate:
Expand Down
7 changes: 5 additions & 2 deletions testgen/commands/run_execute_cat_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ def ParseCATResults(clsCATExecute):


def FinalizeTestRun(clsCATExecute):
lstQueries = [clsCATExecute.FinalizeTestResultsSQL(), clsCATExecute.PushTestRunStatusUpdateSQL()]
lstQueries = [clsCATExecute.FinalizeTestResultsSQL(),
clsCATExecute.PushTestRunStatusUpdateSQL(),
clsCATExecute.FinalizeTestSuiteUpdateSQL(),
clsCATExecute.TestScoringRollupSQL()]
RunActionQueryList(("DKTG"), lstQueries)


Expand All @@ -80,6 +83,7 @@ def run_cat_test_queries(
)
clsCATExecute.test_run_id = strTestRunID
clsCATExecute.run_date = strTestTime
clsCATExecute.table_groups_id = dctParms["table_groups_id"]
clsCATExecute.exception_message += error_msg

# Set Project Connection Params in common.db_bridgers from retrieved params
Expand Down Expand Up @@ -119,7 +123,6 @@ def run_cat_test_queries(
for dctTable in lstTables:
clsCATExecute.target_schema = dctTable["schema_name"]
clsCATExecute.target_table = dctTable["table_name"]
clsCATExecute.replace_qc_schema = dctTable["replace_qc_schema"]
AggregateTableTests(clsCATExecute)

LOG.info("CurrentStep: Retrieving CAT Tests to Run")
Expand Down
1 change: 0 additions & 1 deletion testgen/commands/run_launch_db_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def _get_params_mapping() -> dict:
"PROFILING_SAMPLE_MIN_COUNT": "",
"PROFILING_DELAY_DAYS": "",
"CONNECTION_NAME": settings.PROJECT_CONNECTION_NAME,
"PROJECT_QC_SCHEMA": settings.PROJECT_QC_SCHEMA,
"TABLE_GROUPS_NAME": settings.DEFAULT_TABLE_GROUPS_NAME,
"TEST_SUITE": settings.DEFAULT_TEST_SUITE_KEY,
"TEST_SUITE_DESCRIPTION": settings.DEFAULT_TEST_SUITE_DESCRIPTION,
Expand Down
Loading

0 comments on commit 12852bd

Please sign in to comment.