Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingestion/lookml): fix for sql parsing error #11079

Merged
merged 9 commits into from
Aug 9, 2024
2 changes: 1 addition & 1 deletion metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
sqlglot_lib = {
# Using an Acryl fork of sqlglot.
# https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1
"acryl-sqlglot[rs]==25.3.1.dev3",
"acryl-sqlglot[rs]==25.8.2.dev9",
}

classification_lib = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"query_type": "SELECT",
"query_type_props": {},
"query_fingerprint": "772187d1c6ce8dbed2dd1ba79975b108d4e733015ffb7bcbf9b7146e64cf9914",
"query_fingerprint": "c721ce16410601b36e5f32bd9c5c28488500a93e617363739faebfe71496f163",
"in_tables": [
"urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMNS,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-staging-2.smoke_test_db_4.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS,PROD)"
Expand Down Expand Up @@ -178,6 +178,6 @@
],
"debug_info": {
"confidence": 0.2,
"generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMNS AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC"
"generalized_statement": "SELECT c.table_catalog AS table_catalog, c.table_schema AS table_schema, c.table_name AS table_name, c.column_name AS column_name, c.ordinal_position AS ordinal_position, cfp.field_path AS field_path, c.is_nullable AS is_nullable, CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ?) THEN NULL ELSE c.data_type END AS data_type, description AS comment, c.is_hidden AS is_hidden, c.is_partitioning_column AS is_partitioning_column, c.clustering_ordinal_position AS clustering_ordinal_position FROM `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMNS AS c JOIN `acryl-staging-2`.`smoke_test_db_4`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS AS cfp ON cfp.table_name = c.table_name AND cfp.column_name = c.column_name ORDER BY table_catalog, table_schema, table_name, ordinal_position ASC, data_type DESC"
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"query_type": "MERGE",
"query_type_props": {},
"query_fingerprint": "38a78af8cc48333df0e4de7d6af5b9507a87dd8a2f129ef97c9b06dce2ca7b9f",
"query_fingerprint": "8001b852498d94a7f0f532dcd8cfa05328981ba437df6314466c764cc408969c",
"in_tables": [
"urn:li:dataset:(urn:li:dataPlatform:bigquery,demo-pipelines-stg.referrer.prep_from_ios,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:bigquery,demo-pipelines-stg.referrer.prep_from_web,PROD)"
Expand All @@ -12,6 +12,6 @@
"column_lineage": null,
"debug_info": {
"confidence": 0.2,
"generalized_statement": "MERGE INTO `demo-pipelines-stg`.`referrer`.`base_union` AS DBT_INTERNAL_DEST USING (SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_ios` WHERE partition_time = ? UNION ALL SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_web` WHERE partition_time = ?) AS DBT_INTERNAL_SOURCE ON FALSE WHEN NOT MATCHED BY SOURCE AND timestamp_trunc(DBT_INTERNAL_DEST.partition_time, DAY) IN (TIMESTAMP(?)) THEN delete WHEN NOT MATCHED THEN INSERT (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`) VALUES (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`)"
"generalized_statement": "MERGE INTO `demo-pipelines-stg`.`referrer`.`base_union` AS DBT_INTERNAL_DEST USING (SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_ios` WHERE partition_time = ? UNION ALL SELECT * FROM `demo-pipelines-stg`.`referrer`.`prep_from_web` WHERE partition_time = ?) AS DBT_INTERNAL_SOURCE ON FALSE WHEN NOT MATCHED BY SOURCE AND timestamp_trunc(DBT_INTERNAL_DEST.partition_time, DAY) IN (timestamp(?)) THEN delete WHEN NOT MATCHED THEN INSERT (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`) VALUES (`platform`, `pageview_id`, `query`, `referrer`, `partition_time`)"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1199,7 +1199,7 @@ def test_bigquery_information_schema_query() -> None:
c.ordinal_position as ordinal_position,
cfp.field_path as field_path,
c.is_nullable as is_nullable,
CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type,
CASE WHEN CONTAINS_SUBSTR(cfp.field_path, ".") THEN NULL ELSE c.data_type END as data_type,
description as comment,
c.is_hidden as is_hidden,
c.is_partitioning_column as is_partitioning_column,
Expand Down
Loading