This repository has been archived by the owner on Apr 11, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathexample_universal_transfer_operator.py
156 lines (138 loc) · 6.08 KB
/
example_universal_transfer_operator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import os
import pathlib
from datetime import datetime
from airflow import DAG
from universal_transfer_operator.constants import FileType
from universal_transfer_operator.datasets.file.base import File
from universal_transfer_operator.datasets.table import Metadata, Table
from universal_transfer_operator.universal_transfer_operator import UniversalTransferOperator
s3_bucket = os.getenv("S3_BUCKET", "s3://astro-sdk-test")
gcs_bucket = os.getenv("GCS_BUCKET", "gs://uto-test")
CWD = pathlib.Path(__file__).parent
DATA_DIR = str(CWD) + "/../../data/"
# [START dataset_file]
input_file = File(path=f"{gcs_bucket}/example_uto/", conn_id="google_cloud_default")
# [END dataset_file]
# [START dataset_table]
output_table = Table(
name="uto_gs_to_bigquery_table",
conn_id="google_cloud_default",
metadata=Metadata(schema="astro"),
)
# [END dataset_table]
with DAG(
"example_universal_transfer_operator",
schedule_interval=None,
start_date=datetime(2022, 1, 1),
catchup=False,
) as dag:
# [START transfer_non_native_gs_to_s3]
transfer_non_native_gs_to_s3 = UniversalTransferOperator(
task_id="transfer_non_native_gs_to_s3",
source_dataset=input_file,
destination_dataset=File(path=f"{s3_bucket}/example_uto/", conn_id="aws_default"),
)
# [END transfer_non_native_gs_to_s3]
# [START transfer_non_native_s3_to_gs]
transfer_non_native_s3_to_gs = UniversalTransferOperator(
task_id="transfer_non_native_s3_to_gs",
# [START dataset_individual_file]
source_dataset=File(path=f"{s3_bucket}/example_uto/", conn_id="aws_default"),
# [END dataset_individual_file]
destination_dataset=File(
path=f"{gcs_bucket}/example_uto/",
conn_id="google_cloud_default",
),
)
# [END transfer_non_native_s3_to_gs]
# [START transfer_non_native_s3_to_sqlite]
transfer_non_native_s3_to_sqlite = UniversalTransferOperator(
task_id="transfer_non_native_s3_to_sqlite",
source_dataset=File(
path=f"{s3_bucket}/example_uto/csv_files/", conn_id="aws_default", filetype=FileType.CSV
),
destination_dataset=Table(name="uto_s3_to_sqlite_table", conn_id="sqlite_default"),
)
# [END transfer_non_native_s3_to_sqlite]
# [START transfer_non_native_gs_to_sqlite]
transfer_non_native_gs_to_sqlite = UniversalTransferOperator(
task_id="transfer_non_native_gs_to_sqlite",
source_dataset=File(
path=f"{gcs_bucket}/example_uto/csv_files/", conn_id="google_cloud_default", filetype=FileType.CSV
),
destination_dataset=Table(name="uto_gs_to_sqlite_table", conn_id="sqlite_default"),
)
# [END transfer_non_native_gs_to_sqlite]
# [START transfer_non_native_s3_to_snowflake]
transfer_non_native_s3_to_snowflake = UniversalTransferOperator(
task_id="transfer_non_native_s3_to_snowflake",
source_dataset=File(
path="s3://astro-sdk-test/example_uto/csv_files/", conn_id="aws_default", filetype=FileType.CSV
),
destination_dataset=Table(name="uto_s3_table_to_snowflake", conn_id="snowflake_conn"),
)
# [END transfer_non_native_s3_to_snowflake]
# [START transfer_non_native_gs_to_snowflake]
transfer_non_native_gs_to_snowflake = UniversalTransferOperator(
task_id="transfer_non_native_gs_to_snowflake",
source_dataset=File(
path="gs://uto-test/example_uto/csv_files/", conn_id="google_cloud_default", filetype=FileType.CSV
),
destination_dataset=Table(name="uto_gs_to_snowflake_table", conn_id="snowflake_conn"),
)
# [END transfer_non_native_gs_to_snowflake]
# [START transfer_non_native_gs_to_bigquery]
transfer_non_native_gs_to_bigquery = UniversalTransferOperator(
task_id="transfer_non_native_gs_to_bigquery",
source_dataset=File(path="gs://uto-test/example_uto/homes_main.csv", conn_id="google_cloud_default"),
destination_dataset=output_table,
)
# [END transfer_non_native_gs_to_bigquery]
# [START transfer_non_native_s3_to_bigquery]
transfer_non_native_s3_to_bigquery = UniversalTransferOperator(
task_id="transfer_non_native_s3_to_bigquery",
source_dataset=File(
path="s3://astro-sdk-test/example_uto/csv_files/", conn_id="aws_default", filetype=FileType.CSV
),
destination_dataset=Table(
name="uto_s3_to_bigquery_destination_table",
conn_id="google_cloud_default",
metadata=Metadata(schema="astro"),
),
)
# [END transfer_non_native_s3_to_bigquery]
# [START transfer_non_native_bigquery_to_snowflake]
transfer_non_native_bigquery_to_snowflake = UniversalTransferOperator(
task_id="transfer_non_native_bigquery_to_snowflake",
source_dataset=Table(
name="uto_s3_to_bigquery_table",
conn_id="google_cloud_default",
metadata=Metadata(schema="astro"),
),
destination_dataset=Table(
name="uto_bigquery_to_snowflake_table",
conn_id="snowflake_conn",
),
)
# [END transfer_non_native_bigquery_to_snowflake]
# [START transfer_non_native_bigquery_to_sqlite]
transfer_non_native_bigquery_to_sqlite = UniversalTransferOperator(
task_id="transfer_non_native_bigquery_to_sqlite",
source_dataset=Table(
name="uto_s3_to_bigquery_table", conn_id="google_cloud_default", metadata=Metadata(schema="astro")
),
destination_dataset=Table(name="uto_bigquery_to_sqlite_table", conn_id="sqlite_default"),
)
# [END transfer_non_native_bigquery_to_sqlite]
transfer_non_native_local_to_sftp = UniversalTransferOperator(
task_id="transfer_non_native_local_to_sftp",
source_dataset=File(path=f"{DATA_DIR}sample.csv", filetype=FileType.CSV),
destination_dataset=File(
path="sftp://upload/sample_1.csv", conn_id="sftp_conn", filetype=FileType.CSV
),
)
transfer_non_native_s3_to_gs >> [
transfer_non_native_gs_to_sqlite,
transfer_non_native_gs_to_bigquery,
transfer_non_native_gs_to_snowflake,
]