Skip to content

Commit

Permalink
Fix native snowflake load_file
Browse files Browse the repository at this point in the history
  • Loading branch information
tatiana authored and kaxil committed Jul 20, 2022
1 parent 9bca3bc commit a338e60
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 9 deletions.
20 changes: 20 additions & 0 deletions src/astro/databases/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
FileType.PARQUET: "MATCH_BY_COLUMN_NAME=CASE_INSENSITIVE",
}

NATIVE_LOAD_SUPPORTED_FILE_TYPES = (FileType.CSV, FileType.NDJSON, FileType.PARQUET)
NATIVE_LOAD_SUPPORTED_FILE_LOCATIONS = (FileLocation.GS, FileLocation.S3)


@dataclass
class SnowflakeStage:
Expand Down Expand Up @@ -292,6 +295,23 @@ def drop_stage(self, stage: SnowflakeStage) -> None:
# Table load methods
# ---------------------------------------------------------

def is_native_load_file_available(
self, source_file: File, target_table: Table
) -> bool:
"""
Check if there is an optimised path for source to destination.
:param source_file: File from which we need to transfer data
:param target_table: Table that needs to be populated with file data
"""
is_file_type_supported = (
source_file.type.name in NATIVE_LOAD_SUPPORTED_FILE_TYPES
)
is_file_location_supported = (
source_file.location.location_type in NATIVE_LOAD_SUPPORTED_FILE_LOCATIONS
)
return is_file_type_supported and is_file_location_supported

def load_file_to_table_natively(
self,
source_file: File,
Expand Down
7 changes: 5 additions & 2 deletions tests/benchmark/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ clean:
@rm -f unittests.cfg
@rm -f unittests.db
@rm -f webserver_config.py
@rm -f ../../unittests.cfg
@rm -f ../../unittests.db
@rm -f ../../airflow.cfg
@rm -f ../../airflow.db

# Takes approximately 7min
setup_gke:
Expand All @@ -45,8 +49,7 @@ local: check_google_credentials
benchmark
@rm -rf astro-sdk


run_job:
run_job:
@gcloud container clusters get-credentials astro-sdk --zone us-central1-a --project ${GCP_PROJECT}
@kubectl apply -f infrastructure/kubernetes/namespace.yaml
@kubectl apply -f infrastructure/kubernetes/postgres.yaml
Expand Down
14 changes: 7 additions & 7 deletions tests/benchmark/config.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
{
"databases": [
{
"name": "snowflake",
"params": {
"conn_id": "snowflake_conn"
}
},
{
"name": "postgres",
"params": {
"conn_id": "postgres_conn",
"conn_id": "postgres_conn_benchmark",
"metadata": {
"database": "postgres"
}
Expand All @@ -17,12 +23,6 @@
"database": "bigquery"
}
}
},
{
"name": "snowflake",
"params": {
"conn_id": "snowflake_conn"
}
}
],
"datasets": [
Expand Down
12 changes: 12 additions & 0 deletions tests/benchmark/debug.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: Pod
metadata:
name: troubleshoot
namespace: benchmark
spec:
containers:
- name: troubleshoot-benchmark
image: gcr.io/astronomer-dag-authoring/benchmark
# Just spin & wait forever
command: [ "/bin/bash", "-c", "--" ]
args: [ "while true; do sleep 30; done;" ]

0 comments on commit a338e60

Please sign in to comment.