-
Notifications
You must be signed in to change notification settings - Fork 4.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
qa-engine: implement early enrichments and validations on QA report (#…
- Loading branch information
1 parent
438c1eb
commit 734d548
Showing
12 changed files
with
247 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# |
15 changes: 15 additions & 0 deletions
15
tools/ci_connector_ops/ci_connector_ops/qa_engine/constants.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
|
||
CLOUD_CATALOG_URL = "https://storage.googleapis.com/prod-airbyte-cloud-connector-metadata-service/cloud_catalog.json" | ||
OSS_CATALOG_URL = "https://storage.googleapis.com/prod-airbyte-cloud-connector-metadata-service/oss_catalog.json" | ||
|
||
INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS = [ | ||
"8be1cf83-fde1-477f-a4ad-318d23c9f3c6", # Local CSV | ||
"a625d593-bba5-4a1c-a53d-2d246268a816", # Local JSON | ||
"b76be0a6-27dc-4560-95f6-2623da0bd7b6" # Local SQL Lite | ||
] | ||
|
||
GCS_QA_REPORT_PATH = "gs://prod-airbyte-cloud-connector-metadata-service/qa_report.json" |
42 changes: 42 additions & 0 deletions
42
tools/ci_connector_ops/ci_connector_ops/qa_engine/enrichments.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
|
||
import pandas as pd | ||
|
||
def get_enriched_catalog(oss_catalog: pd.DataFrame, cloud_catalog: pd.DataFrame) -> pd.DataFrame: | ||
"""Merge OSS and Cloud catalog in a single dataframe on their definition id. | ||
Transformations: | ||
- Rename columns to snake case. | ||
- Rename name column to connector_name. | ||
- Rename docker_image_tag to connector_version. | ||
- Replace null value for release_stage with alpha. | ||
Enrichments: | ||
- is_on_cloud: determined by the merge operation results. | ||
- connector_technical_name: built from the docker repository field. airbyte/source-pokeapi -> source-pokeapi. | ||
Args: | ||
oss_catalog (pd.DataFrame): The open source catalog dataframe. | ||
cloud_catalog (pd.DataFrame): The cloud catalog dataframe. | ||
Returns: | ||
pd.DataFrame: The enriched catalog. | ||
""" | ||
enriched_catalog = pd.merge( | ||
oss_catalog, | ||
cloud_catalog, | ||
how="left", | ||
on="connector_definition_id", | ||
indicator=True, | ||
suffixes=("", "_del"), | ||
) | ||
enriched_catalog.columns = enriched_catalog.columns.str.replace( | ||
"(?<=[a-z])(?=[A-Z])", "_", regex=True | ||
).str.lower() # column names to snake case | ||
enriched_catalog["is_on_cloud"] = enriched_catalog["_merge"] == "both" | ||
enriched_catalog = enriched_catalog.drop(columns="_merge") | ||
enriched_catalog["connector_name"] = enriched_catalog["name"] | ||
enriched_catalog["connector_technical_name"] = enriched_catalog["docker_repository"].str.replace("airbyte/", "") | ||
enriched_catalog["connector_version"] = enriched_catalog["docker_image_tag"] | ||
enriched_catalog["release_stage"] = enriched_catalog["release_stage"].fillna("unknown") | ||
return enriched_catalog |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
60 changes: 60 additions & 0 deletions
60
tools/ci_connector_ops/ci_connector_ops/qa_engine/validations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
|
||
import pandas as pd | ||
import requests | ||
|
||
from .constants import INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS | ||
from .inputs import OSS_CATALOG | ||
from .models import ConnectorQAReport, QAReport | ||
|
||
class QAReportGenerationError(Exception): | ||
pass | ||
|
||
def url_is_reachable(url: str) -> bool: | ||
response = requests.get(url) | ||
return response.status_code == 200 | ||
|
||
def is_appropriate_for_cloud_use(definition_id: str) -> bool: | ||
return definition_id not in INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS | ||
|
||
def get_qa_report(enriched_catalog: pd.DataFrame) -> pd.DataFrame: | ||
"""Perform validation steps on top of the enriched catalog. | ||
Adds the following columns: | ||
- documentation_is_available: | ||
GET the documentation URL and expect a 200 status code. | ||
- is_appropriate_for_cloud_use: | ||
Determined from an hardcoded list of definition ids inappropriate for cloud use. | ||
- latest_build_is_successful: | ||
Check if the latest build for the current connector version is successful. | ||
- number_of_connections: | ||
Get the number of connections using this connector version from our datawarehouse. | ||
- number_of_users: | ||
Get the number of users using this connector version from our datawarehouse. | ||
- sync_success_rate: | ||
Get the sync success rate of the connections with this connector version from our datawarehouse. | ||
Args: | ||
enriched_catalog (pd.DataFrame): The enriched catalog. | ||
Returns: | ||
pd.DataFrame: The final QA report. | ||
""" | ||
qa_report = enriched_catalog.copy(deep=True) | ||
qa_report["documentation_is_available"] = qa_report.documentation_url.apply(url_is_reachable) | ||
qa_report["is_appropriate_for_cloud_use"] = qa_report.connector_definition_id.apply(is_appropriate_for_cloud_use) | ||
|
||
# TODO YET TO IMPLEMENT VALIDATIONS | ||
qa_report["latest_build_is_successful"] = False # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21720 | ||
qa_report["number_of_connections"] = 0 # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21721 | ||
qa_report["number_of_users"] = 0 # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21721 | ||
qa_report["sync_success_rate"] = .0 # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21721 | ||
|
||
# Only select dataframe columns defined in the ConnectorQAReport model. | ||
qa_report= qa_report[[field.name for field in ConnectorQAReport.__fields__.values()]] | ||
# Validate the report structure with pydantic QAReport model. | ||
QAReport(connectors_qa_report=qa_report.to_dict(orient="records")) | ||
if len(qa_report) != len(OSS_CATALOG): | ||
raise QAReportGenerationError("The QA report does not contain all the connectors defined in the OSS catalog.") | ||
return qa_report |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# |
38 changes: 38 additions & 0 deletions
38
tools/ci_connector_ops/tests/test_qa_engine/test_enrichments.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
|
||
import re | ||
|
||
import pandas as pd | ||
import pytest | ||
|
||
from ci_connector_ops.qa_engine import inputs, enrichments | ||
|
||
@pytest.fixture | ||
def enriched_catalog() -> pd.DataFrame: | ||
return enrichments.get_enriched_catalog(inputs.OSS_CATALOG, inputs.CLOUD_CATALOG) | ||
|
||
@pytest.fixture | ||
def enriched_catalog_columns(enriched_catalog: pd.DataFrame) -> set: | ||
return set(enriched_catalog.columns) | ||
|
||
def test_merge_performed_correctly(enriched_catalog): | ||
assert len(enriched_catalog) == len(inputs.OSS_CATALOG) | ||
|
||
def test_new_columns_are_added(enriched_catalog_columns): | ||
expected_new_columns = { | ||
"is_on_cloud", | ||
"connector_name", | ||
"connector_technical_name", | ||
"connector_version" | ||
} | ||
assert expected_new_columns.issubset(enriched_catalog_columns) | ||
|
||
def test_no_column_are_removed_and_lowercased(enriched_catalog_columns): | ||
for column in inputs.OSS_CATALOG: | ||
assert re.sub(r"(?<!^)(?=[A-Z])", "_", column).lower() in enriched_catalog_columns | ||
|
||
def test_release_stage_not_null(enriched_catalog): | ||
assert len(enriched_catalog["release_stage"].dropna()) == len(enriched_catalog["release_stage"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
tools/ci_connector_ops/tests/test_qa_engine/test_validations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
|
||
import pandas as pd | ||
import pytest | ||
|
||
from ci_connector_ops.qa_engine import inputs, enrichments, models, validations | ||
|
||
@pytest.fixture | ||
def enriched_catalog() -> pd.DataFrame: | ||
return enrichments.get_enriched_catalog(inputs.OSS_CATALOG, inputs.CLOUD_CATALOG) | ||
|
||
@pytest.fixture | ||
def qa_report(enriched_catalog, mocker) -> pd.DataFrame: | ||
mocker.patch.object(validations, "url_is_reachable", mocker.Mock(return_value=True)) | ||
return validations.get_qa_report(enriched_catalog) | ||
|
||
@pytest.fixture | ||
def qa_report_columns(qa_report: pd.DataFrame) -> set: | ||
return set(qa_report.columns) | ||
|
||
def test_all_columns_are_declared(qa_report_columns: set): | ||
expected_columns = set([field.name for field in models.ConnectorQAReport.__fields__.values()]) | ||
assert qa_report_columns == expected_columns | ||
|
||
def test_not_null_values_after_validation(qa_report: pd.DataFrame): | ||
assert len(qa_report.dropna()) == len(qa_report) | ||
|
||
def test_report_generation_error(enriched_catalog, mocker): | ||
mocker.patch.object(validations, "url_is_reachable", mocker.Mock(return_value=True)) | ||
with pytest.raises(validations.QAReportGenerationError): | ||
return validations.get_qa_report(enriched_catalog.sample(10)) |