Skip to content
This repository has been archived by the owner on Sep 4, 2024. It is now read-only.

Commit

Permalink
Migrate Databricks account to Azure Databricks (#82)
Browse files Browse the repository at this point in the history
* update azure databricks details
  • Loading branch information
vatsrahul1001 authored Jun 28, 2024
1 parent 8b27e03 commit d49e88c
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 61 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ jobs:
path: ./.coverage
env:
DATABRICKS_CONN_TOKEN: ${{ secrets.DATABRICKS_CONN_TOKEN }}
DATABRICKS_CONN_HOST: https://dbc-9c390870-65ef.cloud.databricks.com/
DATABRICKS_CONN_HOST: {{ secrets.DATABRICKS_CONN_HOST }}
DATABRICKS_CONN: ${{ secrets.AIRFLOW_CONN_DATABRICKS_DEFAULT }}


Code-Coverage:
if: github.event.action != 'labeled'
Expand Down
9 changes: 1 addition & 8 deletions dev/dags/basic_notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,7 @@
"new_cluster": {
"cluster_name": "",
"spark_version": "11.3.x-scala2.12",
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "us-east-2b",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0,
},
"node_type_id": "i3.xlarge",
"node_type_id": "Standard_DS3_v2",
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
"enable_elastic_disk": False,
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
Expand Down
11 changes: 2 additions & 9 deletions dev/dags/common_operator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from datetime import datetime

from airflow.decorators import dag
from astro_databricks.operators.notebook import DatabricksNotebookOperator
from astro_databricks.operators.common import DatabricksTaskOperator
from astro_databricks.operators.notebook import DatabricksNotebookOperator
from astro_databricks.operators.workflow import DatabricksWorkflowTaskGroup

job_clusters = [
Expand All @@ -11,14 +11,7 @@
"new_cluster": {
"cluster_name": "",
"spark_version": "11.3.x-scala2.12",
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "us-east-2b",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0,
},
"node_type_id": "i3.xlarge",
"node_type_id": "Standard_DS3_v2",
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
"enable_elastic_disk": False,
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
Expand Down
9 changes: 1 addition & 8 deletions dev/dags/task_group_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,7 @@
"new_cluster": {
"cluster_name": "",
"spark_version": "11.3.x-scala2.12",
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "us-east-2b",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0,
},
"node_type_id": "i3.xlarge",
"node_type_id": "Standard_DS3_v2",
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
"enable_elastic_disk": False,
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
Expand Down
9 changes: 1 addition & 8 deletions example_dags/example_databricks_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,7 @@
NEW_CLUSTER_SPEC = {
"cluster_name": "",
"spark_version": "11.3.x-scala2.12",
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "us-east-2b",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0,
},
"node_type_id": "i3.xlarge",
"node_type_id": "Standard_DS3_v2",
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
"enable_elastic_disk": False,
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
Expand Down
25 changes: 10 additions & 15 deletions example_dags/example_databricks_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
}

DATABRICKS_CONN_ID = os.getenv("ASTRO_DATABRICKS_CONN_ID", "databricks_conn")
DATABRICKS_NOTIFICATION_EMAIL = os.getenv(
"ASTRO_DATABRICKS_NOTIFICATION_EMAIL", "tatiana.alchueyr@astronomer.io"
)

# DATABRICKS_NOTIFICATION_EMAIL = os.getenv(
# "ASTRO_DATABRICKS_NOTIFICATION_EMAIL", "tatiana.alchueyr@astronomer.io"
# )
DATABRICKS_DESTINATION_ID = os.getenv(
"ASTRO_DATABRICKS_DESTINATION_ID", "b0aea8ab-ea8c-4a45-a2e9-9a26753fd702"
"ASTRO_DATABRICKS_DESTINATION_ID", "48c7315c-1d65-4ee3-b7d3-1692e8e8012d"
)

USER = os.environ.get("USER")
Expand All @@ -32,14 +33,7 @@
"new_cluster": {
"cluster_name": "",
"spark_version": "11.3.x-scala2.12",
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "us-east-2b",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0,
},
"node_type_id": "i3.xlarge",
"node_type_id": "Standard_DS3_v2",
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
"enable_elastic_disk": False,
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
Expand Down Expand Up @@ -72,9 +66,10 @@
},
],
extra_job_params={
"email_notifications": {
"on_start": [DATABRICKS_NOTIFICATION_EMAIL],
},
## Commented below to avoid spam; keeping this for example purposes.
# "email_notifications": {
# "on_start": [DATABRICKS_NOTIFICATION_EMAIL],
# },
"webhook_notifications": {
"on_start": [{"id": DATABRICKS_DESTINATION_ID}],
},
Expand Down
17 changes: 5 additions & 12 deletions example_dags/example_task_group.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import os
from datetime import datetime

from airflow.decorators import dag, task_group
from airflow.decorators import dag
from airflow.utils.task_group import TaskGroup

from astro_databricks.operators.notebook import DatabricksNotebookOperator
from astro_databricks.operators.workflow import DatabricksWorkflowTaskGroup


DATABRICKS_CONN = "databricks_conn"
USER = os.environ.get("USER")
GROUP_ID = os.getenv("DATABRICKS_GROUP_ID", "1234").replace(".", "_")
Expand All @@ -18,14 +16,7 @@
"new_cluster": {
"cluster_name": "",
"spark_version": "11.3.x-scala2.12",
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "us-east-2b",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0,
},
"node_type_id": "i3.xlarge",
"node_type_id": "Standard_DS3_v2",
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
"enable_elastic_disk": False,
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
Expand All @@ -40,7 +31,9 @@
schedule_interval="@daily",
start_date=datetime(2021, 1, 1),
catchup=False,
default_args={'retries': 0}, # Users are encouraged to use the repair feature, retries may fail
default_args={
"retries": 0
}, # Users are encouraged to use the repair feature, retries may fail
tags=["astro-provider-databricks"],
)
def example_task_group():
Expand Down

0 comments on commit d49e88c

Please sign in to comment.