Skip to content

Commit

Permalink
fix(ingest/redshift): handle multiline alter table commands (#10727)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored and yoonhyejin committed Jul 16, 2024
1 parent a484d32 commit f6cd95f
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 6 deletions.
1 change: 0 additions & 1 deletion metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@
*sqlglot_lib,
"GitPython>2",
"python-liquid",
*sqlglot_lib,
}

bigquery_common = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
)
from datahub.metadata.urns import DatasetUrn
from datahub.sql_parsing.schema_resolver import SchemaResolver
from datahub.sql_parsing.sqlglot_utils import get_dialect, parse_statement
from datahub.utilities import memory_footprint
from datahub.utilities.dedup_list import deduplicate_list

Expand Down Expand Up @@ -128,7 +129,7 @@ def parse_alter_table_rename(default_schema: str, query: str) -> Tuple[str, str,
Parses an ALTER TABLE ... RENAME TO ... query and returns the schema, previous table name, and new table name.
"""

parsed_query = sqlglot.parse_one(query, dialect="redshift")
parsed_query = parse_statement(query, dialect=get_dialect("redshift"))
assert isinstance(parsed_query, sqlglot.exp.AlterTable)
prev_name = parsed_query.this.name
rename_clause = parsed_query.args["actions"][0]
Expand Down Expand Up @@ -865,10 +866,19 @@ def _process_table_renames(
for rename_row in RedshiftDataDictionary.get_alter_table_commands(
connection, query
):
schema, prev_name, new_name = parse_alter_table_rename(
default_schema=self.config.default_schema,
query=rename_row.query_text,
)
# Redshift's system table has some issues where it encodes newlines as \n instead a proper
# newline character. This can cause issues in our parser.
query_text = rename_row.query_text.replace("\\n", "\n")

try:
schema, prev_name, new_name = parse_alter_table_rename(
default_schema=self.config.default_schema,
query=query_text,
)
except ValueError as e:
logger.info(f"Failed to parse alter table rename: {e}")
self.report.num_alter_table_parse_errors += 1
continue

prev_urn = make_dataset_urn_with_platform_instance(
platform=LineageDatasetPlatform.REDSHIFT.value,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class RedshiftReport(
num_lineage_dropped_not_support_copy_path: int = 0
num_lineage_processed_temp_tables = 0
num_lineage_dropped_s3_path: int = 0
num_alter_table_parse_errors: int = 0

lineage_start_time: Optional[datetime] = None
lineage_end_time: Optional[datetime] = None
Expand Down

0 comments on commit f6cd95f

Please sign in to comment.