diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 612ec2d..ad0d2a5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -172,7 +172,9 @@ jobs: path: ./.coverage env: DATABRICKS_CONN_TOKEN: ${{ secrets.DATABRICKS_CONN_TOKEN }} - DATABRICKS_CONN_HOST: https://dbc-9c390870-65ef.cloud.databricks.com/ + DATABRICKS_CONN_HOST: {{ secrets.DATABRICKS_CONN_HOST }} + DATABRICKS_CONN: ${{ secrets.AIRFLOW_CONN_DATABRICKS_DEFAULT }} + Code-Coverage: if: github.event.action != 'labeled' diff --git a/dev/dags/basic_notebooks.py b/dev/dags/basic_notebooks.py index a57e676..a301049 100644 --- a/dev/dags/basic_notebooks.py +++ b/dev/dags/basic_notebooks.py @@ -10,14 +10,7 @@ "new_cluster": { "cluster_name": "", "spark_version": "11.3.x-scala2.12", - "aws_attributes": { - "first_on_demand": 1, - "availability": "SPOT_WITH_FALLBACK", - "zone_id": "us-east-2b", - "spot_bid_price_percent": 100, - "ebs_volume_count": 0, - }, - "node_type_id": "i3.xlarge", + "node_type_id": "Standard_DS3_v2", "spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"}, "enable_elastic_disk": False, "data_security_mode": "LEGACY_SINGLE_USER_STANDARD", diff --git a/dev/dags/common_operator.py b/dev/dags/common_operator.py index e1a8fd1..4571b20 100644 --- a/dev/dags/common_operator.py +++ b/dev/dags/common_operator.py @@ -1,8 +1,8 @@ from datetime import datetime from airflow.decorators import dag -from astro_databricks.operators.notebook import DatabricksNotebookOperator from astro_databricks.operators.common import DatabricksTaskOperator +from astro_databricks.operators.notebook import DatabricksNotebookOperator from astro_databricks.operators.workflow import DatabricksWorkflowTaskGroup job_clusters = [ @@ -11,14 +11,7 @@ "new_cluster": { "cluster_name": "", "spark_version": "11.3.x-scala2.12", - "aws_attributes": { - "first_on_demand": 1, - "availability": "SPOT_WITH_FALLBACK", - "zone_id": "us-east-2b", - "spot_bid_price_percent": 100, - "ebs_volume_count": 0, - }, - "node_type_id": "i3.xlarge", + "node_type_id": "Standard_DS3_v2", "spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"}, "enable_elastic_disk": False, "data_security_mode": "LEGACY_SINGLE_USER_STANDARD", diff --git a/dev/dags/task_group_example.py b/dev/dags/task_group_example.py index bc15762..92ec326 100644 --- a/dev/dags/task_group_example.py +++ b/dev/dags/task_group_example.py @@ -10,14 +10,7 @@ "new_cluster": { "cluster_name": "", "spark_version": "11.3.x-scala2.12", - "aws_attributes": { - "first_on_demand": 1, - "availability": "SPOT_WITH_FALLBACK", - "zone_id": "us-east-2b", - "spot_bid_price_percent": 100, - "ebs_volume_count": 0, - }, - "node_type_id": "i3.xlarge", + "node_type_id": "Standard_DS3_v2", "spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"}, "enable_elastic_disk": False, "data_security_mode": "LEGACY_SINGLE_USER_STANDARD", diff --git a/example_dags/example_databricks_notebook.py b/example_dags/example_databricks_notebook.py index a478756..0c6b8db 100644 --- a/example_dags/example_databricks_notebook.py +++ b/example_dags/example_databricks_notebook.py @@ -18,14 +18,7 @@ NEW_CLUSTER_SPEC = { "cluster_name": "", "spark_version": "11.3.x-scala2.12", - "aws_attributes": { - "first_on_demand": 1, - "availability": "SPOT_WITH_FALLBACK", - "zone_id": "us-east-2b", - "spot_bid_price_percent": 100, - "ebs_volume_count": 0, - }, - "node_type_id": "i3.xlarge", + "node_type_id": "Standard_DS3_v2", "spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"}, "enable_elastic_disk": False, "data_security_mode": "LEGACY_SINGLE_USER_STANDARD", diff --git a/example_dags/example_databricks_workflow.py b/example_dags/example_databricks_workflow.py index 6f2e91e..58d485d 100644 --- a/example_dags/example_databricks_workflow.py +++ b/example_dags/example_databricks_workflow.py @@ -15,11 +15,12 @@ } DATABRICKS_CONN_ID = os.getenv("ASTRO_DATABRICKS_CONN_ID", "databricks_conn") -DATABRICKS_NOTIFICATION_EMAIL = os.getenv( - "ASTRO_DATABRICKS_NOTIFICATION_EMAIL", "tatiana.alchueyr@astronomer.io" -) + +# DATABRICKS_NOTIFICATION_EMAIL = os.getenv( +# "ASTRO_DATABRICKS_NOTIFICATION_EMAIL", "tatiana.alchueyr@astronomer.io" +# ) DATABRICKS_DESTINATION_ID = os.getenv( - "ASTRO_DATABRICKS_DESTINATION_ID", "b0aea8ab-ea8c-4a45-a2e9-9a26753fd702" + "ASTRO_DATABRICKS_DESTINATION_ID", "48c7315c-1d65-4ee3-b7d3-1692e8e8012d" ) USER = os.environ.get("USER") @@ -32,14 +33,7 @@ "new_cluster": { "cluster_name": "", "spark_version": "11.3.x-scala2.12", - "aws_attributes": { - "first_on_demand": 1, - "availability": "SPOT_WITH_FALLBACK", - "zone_id": "us-east-2b", - "spot_bid_price_percent": 100, - "ebs_volume_count": 0, - }, - "node_type_id": "i3.xlarge", + "node_type_id": "Standard_DS3_v2", "spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"}, "enable_elastic_disk": False, "data_security_mode": "LEGACY_SINGLE_USER_STANDARD", @@ -72,9 +66,10 @@ }, ], extra_job_params={ - "email_notifications": { - "on_start": [DATABRICKS_NOTIFICATION_EMAIL], - }, + ## Commented below to avoid spam; keeping this for example purposes. + # "email_notifications": { + # "on_start": [DATABRICKS_NOTIFICATION_EMAIL], + # }, "webhook_notifications": { "on_start": [{"id": DATABRICKS_DESTINATION_ID}], }, diff --git a/example_dags/example_task_group.py b/example_dags/example_task_group.py index 577fcd3..cbfb087 100644 --- a/example_dags/example_task_group.py +++ b/example_dags/example_task_group.py @@ -1,13 +1,11 @@ import os from datetime import datetime -from airflow.decorators import dag, task_group +from airflow.decorators import dag from airflow.utils.task_group import TaskGroup - from astro_databricks.operators.notebook import DatabricksNotebookOperator from astro_databricks.operators.workflow import DatabricksWorkflowTaskGroup - DATABRICKS_CONN = "databricks_conn" USER = os.environ.get("USER") GROUP_ID = os.getenv("DATABRICKS_GROUP_ID", "1234").replace(".", "_") @@ -18,14 +16,7 @@ "new_cluster": { "cluster_name": "", "spark_version": "11.3.x-scala2.12", - "aws_attributes": { - "first_on_demand": 1, - "availability": "SPOT_WITH_FALLBACK", - "zone_id": "us-east-2b", - "spot_bid_price_percent": 100, - "ebs_volume_count": 0, - }, - "node_type_id": "i3.xlarge", + "node_type_id": "Standard_DS3_v2", "spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"}, "enable_elastic_disk": False, "data_security_mode": "LEGACY_SINGLE_USER_STANDARD", @@ -40,7 +31,9 @@ schedule_interval="@daily", start_date=datetime(2021, 1, 1), catchup=False, - default_args={'retries': 0}, # Users are encouraged to use the repair feature, retries may fail + default_args={ + "retries": 0 + }, # Users are encouraged to use the repair feature, retries may fail tags=["astro-provider-databricks"], ) def example_task_group():