From b757bd8df824d4eba952f6e140bcb373bc3f1003 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Wed, 6 Nov 2024 12:25:59 +0100 Subject: [PATCH] Temporarily revert dag versioning changes (#43730) * Revert "Delete the Serialized Dag and DagCode before DagVersion migration (#43700)" This reverts commit 438f71df8d8621acdc3ddc6e6d3f06a51b66b8cb. * Revert "AIP-65: Add DAG versioning support (#42913)" This reverts commit 1116f286eec3c233fe45fe66a3e4515c42501b75. --- airflow/api/common/trigger_dag.py | 5 +- .../endpoints/dag_run_endpoint.py | 5 +- airflow/dag_processing/manager.py | 15 + .../example_dags/plugins/event_listener.py | 5 +- airflow/jobs/scheduler_job_runner.py | 22 +- .../versions/0047_3_0_0_add_dag_versioning.py | 158 - airflow/models/__init__.py | 1 - airflow/models/backfill.py | 6 +- airflow/models/dag.py | 26 +- airflow/models/dag_version.py | 167 - airflow/models/dagbag.py | 3 +- airflow/models/dagcode.py | 117 +- airflow/models/dagrun.py | 22 +- airflow/models/serialized_dag.py | 141 +- airflow/models/taskinstance.py | 14 +- airflow/models/taskinstancehistory.py | 2 - airflow/serialization/pydantic/dag_run.py | 2 +- airflow/serialization/schema.json | 1 - airflow/utils/db.py | 2 +- airflow/www/views.py | 4 +- docs/apache-airflow/img/airflow_erd.sha256 | 2 +- docs/apache-airflow/img/airflow_erd.svg | 3942 ++++++++--------- docs/apache-airflow/migrations-ref.rst | 4 +- hatch_build.py | 1 - .../api_endpoints/test_dag_run_endpoint.py | 1 - .../pre_commit/check_ti_vs_tis_attributes.py | 1 - task_sdk/src/airflow/sdk/definitions/dag.py | 6 - .../endpoints/test_dag_run_endpoint.py | 5 +- .../endpoints/test_task_endpoint.py | 6 +- tests/cli/commands/test_task_command.py | 2 +- tests/dag_processing/test_job_runner.py | 24 +- tests/dag_processing/test_processor.py | 1 - tests/jobs/test_scheduler_job.py | 219 +- tests/models/test_dag.py | 24 +- tests/models/test_dag_version.py | 113 - tests/models/test_dagbag.py | 14 +- tests/models/test_dagcode.py | 110 +- tests/models/test_dagrun.py | 1 - tests/models/test_serialized_dag.py | 111 +- tests/models/test_taskinstance.py | 4 +- tests/operators/test_trigger_dagrun.py | 13 +- tests/sensors/test_external_task_sensor.py | 3 +- tests/utils/test_db_cleanup.py | 1 - tests/www/views/test_views_tasks.py | 36 +- tests_common/pytest_plugin.py | 59 +- tests_common/test_utils/db.py | 1 - 46 files changed, 2366 insertions(+), 3056 deletions(-) delete mode 100644 airflow/migrations/versions/0047_3_0_0_add_dag_versioning.py delete mode 100644 airflow/models/dag_version.py delete mode 100644 tests/models/test_dag_version.py diff --git a/airflow/api/common/trigger_dag.py b/airflow/api/common/trigger_dag.py index 44beae3f1f78c..b18957261f3a0 100644 --- a/airflow/api/common/trigger_dag.py +++ b/airflow/api/common/trigger_dag.py @@ -25,7 +25,6 @@ from airflow.api_internal.internal_api_call import internal_api_call from airflow.exceptions import DagNotFound, DagRunAlreadyExists from airflow.models import DagBag, DagModel, DagRun -from airflow.models.dag_version import DagVersion from airflow.utils import timezone from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import DagRunState @@ -93,14 +92,14 @@ def _trigger_dag( run_conf = None if conf: run_conf = conf if isinstance(conf, dict) else json.loads(conf) - dag_version = DagVersion.get_latest_version(dag.dag_id) + dag_run = dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=DagRunState.QUEUED, conf=run_conf, external_trigger=True, - dag_version=dag_version, + dag_hash=dag_bag.dags_hash.get(dag_id), data_interval=data_interval, triggered_by=triggered_by, ) diff --git a/airflow/api_connexion/endpoints/dag_run_endpoint.py b/airflow/api_connexion/endpoints/dag_run_endpoint.py index 6a38eb27ff45c..8ebb2b44e2bb3 100644 --- a/airflow/api_connexion/endpoints/dag_run_endpoint.py +++ b/airflow/api_connexion/endpoints/dag_run_endpoint.py @@ -61,7 +61,6 @@ from airflow.auth.managers.models.resource_details import DagAccessEntity from airflow.exceptions import ParamValidationError from airflow.models import DagModel, DagRun -from airflow.models.dag_version import DagVersion from airflow.timetables.base import DataInterval from airflow.utils.airflow_flask_app import get_airflow_app from airflow.utils.api_migration import mark_fastapi_migration_done @@ -342,7 +341,7 @@ def post_dag_run(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: ) else: data_interval = dag.timetable.infer_manual_data_interval(run_after=logical_date) - dag_version = DagVersion.get_latest_version(dag.dag_id) + dag_run = dag.create_dagrun( run_type=DagRunType.MANUAL, run_id=run_id, @@ -351,7 +350,7 @@ def post_dag_run(*, dag_id: str, session: Session = NEW_SESSION) -> APIResponse: state=DagRunState.QUEUED, conf=post_body.get("conf"), external_trigger=True, - dag_version=dag_version, + dag_hash=get_airflow_app().dag_bag.dags_hash.get(dag_id), session=session, triggered_by=DagRunTriggeredByType.REST_API, ) diff --git a/airflow/dag_processing/manager.py b/airflow/dag_processing/manager.py index 3bc467e2f7063..0f3441a5d4d13 100644 --- a/airflow/dag_processing/manager.py +++ b/airflow/dag_processing/manager.py @@ -50,6 +50,7 @@ from airflow.models.dagwarning import DagWarning from airflow.models.db_callback_request import DbCallbackRequest from airflow.models.errors import ParseImportError +from airflow.models.serialized_dag import SerializedDagModel from airflow.secrets.cache import SecretCache from airflow.stats import Stats from airflow.traces.tracer import Trace, add_span @@ -538,6 +539,10 @@ def deactivate_stale_dags( if deactivated: cls.logger().info("Deactivated %i DAGs which are no longer present in file.", deactivated) + for dag_id in to_deactivate: + SerializedDagModel.remove_dag(dag_id) + cls.logger().info("Deleted DAG %s in serialized_dag table", dag_id) + def _run_parsing_loop(self): # In sync mode we want timeout=None -- wait forever until a message is received if self._async_mode: @@ -814,10 +819,20 @@ def _iter_dag_filelocs(fileloc: str) -> Iterator[str]: dag_filelocs = {full_loc for path in self._file_paths for full_loc in _iter_dag_filelocs(path)} + from airflow.models.dagcode import DagCode + + SerializedDagModel.remove_deleted_dags( + alive_dag_filelocs=dag_filelocs, + processor_subdir=self.get_dag_directory(), + ) DagModel.deactivate_deleted_dags( dag_filelocs, processor_subdir=self.get_dag_directory(), ) + DagCode.remove_deleted_code( + dag_filelocs, + processor_subdir=self.get_dag_directory(), + ) return True return False diff --git a/airflow/example_dags/plugins/event_listener.py b/airflow/example_dags/plugins/event_listener.py index 6d9fe2ff11735..4b9be307c4e7e 100644 --- a/airflow/example_dags/plugins/event_listener.py +++ b/airflow/example_dags/plugins/event_listener.py @@ -164,10 +164,9 @@ def on_dag_run_running(dag_run: DagRun, msg: str): """ print("Dag run in running state") queued_at = dag_run.queued_at + dag_hash_info = dag_run.dag_hash - version = dag_run.dag_version.version - - print(f"Dag information Queued at: {queued_at} version: {version}") + print(f"Dag information Queued at: {queued_at} hash info: {dag_hash_info}") # [END howto_listen_dagrun_running_task] diff --git a/airflow/jobs/scheduler_job_runner.py b/airflow/jobs/scheduler_job_runner.py index ffa250fc81441..fb85a4a73cc32 100644 --- a/airflow/jobs/scheduler_job_runner.py +++ b/airflow/jobs/scheduler_job_runner.py @@ -54,10 +54,10 @@ ) from airflow.models.backfill import Backfill from airflow.models.dag import DAG, DagModel -from airflow.models.dag_version import DagVersion from airflow.models.dagbag import DagBag from airflow.models.dagrun import DagRun from airflow.models.dagwarning import DagWarning, DagWarningType +from airflow.models.serialized_dag import SerializedDagModel from airflow.models.taskinstance import SimpleTaskInstance, TaskInstance from airflow.stats import Stats from airflow.ti_deps.dependencies_states import EXECUTION_STATES @@ -1338,7 +1338,7 @@ def _create_dag_runs(self, dag_models: Collection[DagModel], session: Session) - self.log.error("DAG '%s' not found in serialized_dag table", dag_model.dag_id) continue - latest_dag_version = DagVersion.get_latest_version(dag.dag_id, session=session) + dag_hash = self.dagbag.dags_hash.get(dag.dag_id) data_interval = dag.get_next_data_interval(dag_model) # Explicitly check if the DagRun already exists. This is an edge case @@ -1358,7 +1358,7 @@ def _create_dag_runs(self, dag_models: Collection[DagModel], session: Session) - data_interval=data_interval, external_trigger=False, session=session, - dag_version=latest_dag_version, + dag_hash=dag_hash, creating_job_id=self.job.id, triggered_by=DagRunTriggeredByType.TIMETABLE, ) @@ -1417,7 +1417,7 @@ def _create_dag_runs_asset_triggered( ) continue - latest_dag_version = DagVersion.get_latest_version(dag.dag_id, session=session) + dag_hash = self.dagbag.dags_hash.get(dag.dag_id) # Explicitly check if the DagRun already exists. This is an edge case # where a Dag Run is created but `DagModel.next_dagrun` and `DagModel.next_dagrun_create_after` @@ -1472,7 +1472,7 @@ def _create_dag_runs_asset_triggered( state=DagRunState.QUEUED, external_trigger=False, session=session, - dag_version=latest_dag_version, + dag_hash=dag_hash, creating_job_id=self.job.id, triggered_by=DagRunTriggeredByType.ASSET, ) @@ -1750,20 +1750,18 @@ def _verify_integrity_if_dag_changed(self, dag_run: DagRun, session: Session) -> Return True if we determine that DAG still exists. """ - latest_dag_version = DagVersion.get_latest_version(dag_run.dag_id, session=session) - if TYPE_CHECKING: - assert latest_dag_version - if dag_run.dag_version_id == latest_dag_version.id: + latest_version = SerializedDagModel.get_latest_version_hash(dag_run.dag_id, session=session) + if dag_run.dag_hash == latest_version: self.log.debug("DAG %s not changed structure, skipping dagrun.verify_integrity", dag_run.dag_id) return True + dag_run.dag_hash = latest_version + # Refresh the DAG dag_run.dag = self.dagbag.get_dag(dag_id=dag_run.dag_id, session=session) if not dag_run.dag: return False - dag_run.dag_version = latest_dag_version - # Verify integrity also takes care of session.flush dag_run.verify_integrity(session=session) return True @@ -2043,6 +2041,7 @@ def _cleanup_stale_dags(self, session: Session = NEW_SESSION) -> None: In case one of DagProcessors is stopped (in case there are multiple of them for different dag folders), its dags are never marked as inactive. + Also remove dags from SerializedDag table. Executed on schedule only if [scheduler]standalone_dag_processor is True. """ self.log.debug("Checking dags not parsed within last %s seconds.", self._dag_stale_not_seen_duration) @@ -2057,6 +2056,7 @@ def _cleanup_stale_dags(self, session: Session = NEW_SESSION) -> None: self.log.info("Found (%d) stales dags not parsed after %s.", len(stale_dags), limit_lpt) for dag in stale_dags: dag.is_active = False + SerializedDagModel.remove_dag(dag_id=dag.dag_id, session=session) session.flush() @provide_session diff --git a/airflow/migrations/versions/0047_3_0_0_add_dag_versioning.py b/airflow/migrations/versions/0047_3_0_0_add_dag_versioning.py deleted file mode 100644 index ca685ae4e071d..0000000000000 --- a/airflow/migrations/versions/0047_3_0_0_add_dag_versioning.py +++ /dev/null @@ -1,158 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -add dag versioning. - -Revision ID: 2b47dc6bc8df -Revises: d03e4a635aa3 -Create Date: 2024-10-09 05:44:04.670984 - -""" - -from __future__ import annotations - -import sqlalchemy as sa -from alembic import op -from sqlalchemy_utils import UUIDType - -from airflow.migrations.db_types import StringID -from airflow.models.base import naming_convention -from airflow.utils import timezone -from airflow.utils.sqlalchemy import UtcDateTime - -# revision identifiers, used by Alembic. -revision = "2b47dc6bc8df" -down_revision = "d03e4a635aa3" -branch_labels = None -depends_on = None -airflow_version = "3.0.0" - - -def _delete_serdag_and_code(): - op.execute(sa.text("DELETE FROM serialized_dag")) - op.execute(sa.text("DELETE FROM dag_code")) - - -def upgrade(): - """Apply add dag versioning.""" - # Before creating the dag_version table, we need to delete the existing serialized_dag and dag_code tables - _delete_serdag_and_code() - op.create_table( - "dag_version", - sa.Column("id", UUIDType(binary=False), nullable=False), - sa.Column("version_number", sa.Integer(), nullable=False), - sa.Column("version_name", StringID()), - sa.Column("dag_id", StringID(), nullable=False), - sa.Column("created_at", UtcDateTime(), nullable=False, default=timezone.utcnow), - sa.ForeignKeyConstraint( - ("dag_id",), ["dag.dag_id"], name=op.f("dag_version_dag_id_fkey"), ondelete="CASCADE" - ), - sa.PrimaryKeyConstraint("id", name=op.f("dag_version_pkey")), - sa.UniqueConstraint("dag_id", "version_number", name="dag_id_v_name_v_number_unique_constraint"), - ) - with op.batch_alter_table("dag_code", recreate="always", naming_convention=naming_convention) as batch_op: - batch_op.drop_constraint("dag_code_pkey", type_="primary") - batch_op.add_column( - sa.Column("id", UUIDType(binary=False), primary_key=True), insert_before="fileloc_hash" - ) - batch_op.create_primary_key("dag_code_pkey", ["id"]) - batch_op.add_column(sa.Column("dag_version_id", UUIDType(binary=False), nullable=False)) - batch_op.create_foreign_key( - batch_op.f("dag_code_dag_version_id_fkey"), - "dag_version", - ["dag_version_id"], - ["id"], - ondelete="CASCADE", - ) - batch_op.create_unique_constraint("dag_code_dag_version_id_uq", ["dag_version_id"]) - - with op.batch_alter_table( - "serialized_dag", recreate="always", naming_convention=naming_convention - ) as batch_op: - batch_op.drop_constraint("serialized_dag_pkey", type_="primary") - batch_op.add_column(sa.Column("id", UUIDType(binary=False), primary_key=True)) - batch_op.drop_index("idx_fileloc_hash") - batch_op.drop_column("fileloc_hash") - batch_op.drop_column("fileloc") - batch_op.create_primary_key("serialized_dag_pkey", ["id"]) - batch_op.add_column(sa.Column("dag_version_id", UUIDType(binary=False), nullable=False)) - batch_op.create_foreign_key( - batch_op.f("serialized_dag_dag_version_id_fkey"), - "dag_version", - ["dag_version_id"], - ["id"], - ondelete="CASCADE", - ) - batch_op.create_unique_constraint("serialized_dag_dag_version_id_uq", ["dag_version_id"]) - - with op.batch_alter_table("task_instance", schema=None) as batch_op: - batch_op.add_column(sa.Column("dag_version_id", UUIDType(binary=False))) - batch_op.create_foreign_key( - batch_op.f("task_instance_dag_version_id_fkey"), - "dag_version", - ["dag_version_id"], - ["id"], - ondelete="CASCADE", - ) - - with op.batch_alter_table("task_instance_history", schema=None) as batch_op: - batch_op.add_column(sa.Column("dag_version_id", UUIDType(binary=False))) - - with op.batch_alter_table("dag_run", schema=None) as batch_op: - batch_op.add_column(sa.Column("dag_version_id", UUIDType(binary=False))) - batch_op.create_foreign_key( - batch_op.f("dag_run_dag_version_id_fkey"), - "dag_version", - ["dag_version_id"], - ["id"], - ondelete="CASCADE", - ) - batch_op.drop_column("dag_hash") - - -def downgrade(): - """Unapply add dag versioning.""" - with op.batch_alter_table("task_instance_history", schema=None) as batch_op: - batch_op.drop_column("dag_version_id") - - with op.batch_alter_table("task_instance", schema=None) as batch_op: - batch_op.drop_constraint(batch_op.f("task_instance_dag_version_id_fkey"), type_="foreignkey") - batch_op.drop_column("dag_version_id") - - with op.batch_alter_table("dag_code", schema=None) as batch_op: - batch_op.drop_column("id") - batch_op.drop_constraint(batch_op.f("dag_code_dag_version_id_fkey"), type_="foreignkey") - batch_op.drop_column("dag_version_id") - batch_op.create_primary_key("dag_code_pkey", ["fileloc_hash"]) - - with op.batch_alter_table("serialized_dag", schema=None, naming_convention=naming_convention) as batch_op: - batch_op.drop_column("id") - batch_op.add_column(sa.Column("fileloc", sa.String(length=2000), autoincrement=False, nullable=False)) - batch_op.add_column(sa.Column("fileloc_hash", sa.BIGINT(), autoincrement=False, nullable=False)) - batch_op.create_index("idx_fileloc_hash", ["fileloc_hash"], unique=False) - batch_op.create_primary_key("serialized_dag_pkey", ["dag_id"]) - batch_op.drop_constraint(batch_op.f("serialized_dag_dag_version_id_fkey"), type_="foreignkey") - batch_op.drop_column("dag_version_id") - - with op.batch_alter_table("dag_run", schema=None) as batch_op: - batch_op.add_column(sa.Column("dag_hash", sa.String(length=32), autoincrement=False, nullable=True)) - batch_op.drop_constraint(batch_op.f("dag_run_dag_version_id_fkey"), type_="foreignkey") - batch_op.drop_column("dag_version_id") - - op.drop_table("dag_version") diff --git a/airflow/models/__init__.py b/airflow/models/__init__.py index 6d8803410532a..1ab4e5584c976 100644 --- a/airflow/models/__init__.py +++ b/airflow/models/__init__.py @@ -57,7 +57,6 @@ def import_all_models(): import airflow.models.asset import airflow.models.backfill - import airflow.models.dag_version import airflow.models.dagwarning import airflow.models.errors import airflow.models.serialized_dag diff --git a/airflow/models/backfill.py b/airflow/models/backfill.py index 11d677542fc0e..648b35c5bdebe 100644 --- a/airflow/models/backfill.py +++ b/airflow/models/backfill.py @@ -43,7 +43,6 @@ from airflow.api_connexion.exceptions import NotFound from airflow.exceptions import AirflowException from airflow.models.base import Base, StringID -from airflow.models.dag_version import DagVersion from airflow.settings import json from airflow.utils import timezone from airflow.utils.session import create_session @@ -201,7 +200,7 @@ def _create_backfill_dag_run( ) ) return - dag_version = DagVersion.get_latest_version(dag.dag_id, session=session) + dr = dag.create_dagrun( triggered_by=DagRunTriggeredByType.BACKFILL, execution_date=info.logical_date, @@ -214,7 +213,6 @@ def _create_backfill_dag_run( creating_job_id=None, session=session, backfill_id=backfill_id, - dag_version=dag_version, ) session.add( BackfillDagRun( @@ -255,7 +253,7 @@ def _create_backfill( from airflow.models.serialized_dag import SerializedDagModel with create_session() as session: - serdag = session.scalar(SerializedDagModel.latest_item_select_object(dag_id)) + serdag = session.get(SerializedDagModel, dag_id) if not serdag: raise NotFound(f"Could not find dag {dag_id}") # todo: if dag has no schedule, raise diff --git a/airflow/models/dag.py b/airflow/models/dag.py index e6a67c6ad7e5e..337fc5c8163e1 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -85,7 +85,7 @@ ) from airflow.models.base import Base, StringID from airflow.models.baseoperator import BaseOperator -from airflow.models.dag_version import DagVersion +from airflow.models.dagcode import DagCode from airflow.models.dagrun import RUN_ID_REGEX, DagRun from airflow.models.taskinstance import ( Context, @@ -257,7 +257,7 @@ def _create_orm_dagrun( conf, state, run_type, - dag_version, + dag_hash, creating_job_id, data_interval, backfill_id, @@ -273,7 +273,7 @@ def _create_orm_dagrun( conf=conf, state=state, run_type=run_type, - dag_version=dag_version, + dag_hash=dag_hash, creating_job_id=creating_job_id, data_interval=data_interval, triggered_by=triggered_by, @@ -424,7 +424,6 @@ class DAG(TaskSDKDag, LoggingMixin): **Warning**: A fail stop dag can only have tasks with the default trigger rule ("all_success"). An exception will be thrown if any task in a fail stop dag has a non default trigger rule. :param dag_display_name: The display name of the DAG which appears on the UI. - :param version_name: The version name to use in storing the dag to the DB. """ partial: bool = False @@ -1709,7 +1708,7 @@ def create_dagrun( conf: dict | None = None, run_type: DagRunType | None = None, session: Session = NEW_SESSION, - dag_version: DagVersion | None = None, + dag_hash: str | None = None, creating_job_id: int | None = None, data_interval: tuple[datetime, datetime] | None = None, backfill_id: int | None = None, @@ -1729,7 +1728,7 @@ def create_dagrun( :param conf: Dict containing configuration/parameters to pass to the DAG :param creating_job_id: id of the job creating this DagRun :param session: database session - :param dag_version: The DagVersion object for this run + :param dag_hash: Hash of Serialized DAG :param data_interval: Data interval of the DagRun :param backfill_id: id of the backfill run if one exists """ @@ -1801,7 +1800,7 @@ def create_dagrun( conf=conf, state=state, run_type=run_type, - dag_version=dag_version, + dag_hash=dag_hash, creating_job_id=creating_job_id, backfill_id=backfill_id, data_interval=data_interval, @@ -1834,6 +1833,7 @@ def bulk_write_to_db( orm_dags = dag_op.add_dags(session=session) dag_op.update_dags(orm_dags, processor_subdir=processor_subdir, session=session) + DagCode.bulk_sync_to_db((dag.fileloc for dag in dags), session=session) asset_op = AssetModelOperation.collect(dag_op.dags) @@ -2069,9 +2069,6 @@ class DagModel(Base): NUM_DAGS_PER_DAGRUN_QUERY = airflow_conf.getint( "scheduler", "max_dagruns_to_create_per_loop", fallback=10 ) - dag_versions = relationship( - "DagVersion", back_populates="dag_model", cascade="all, delete, delete-orphan" - ) def __init__(self, **kwargs): super().__init__(**kwargs) @@ -2278,10 +2275,9 @@ def dag_ready(dag_id: str, cond: BaseAsset, statuses: dict) -> bool | None: dag_statuses = {} for dag_id, records in by_dag.items(): dag_statuses[dag_id] = {x.asset.uri: True for x in records} - ser_dags = SerializedDagModel.get_latest_serialized_dags( - dag_ids=list(dag_statuses.keys()), session=session - ) - + ser_dags = session.scalars( + select(SerializedDagModel).where(SerializedDagModel.dag_id.in_(dag_statuses.keys())) + ).all() for ser_dag in ser_dags: dag_id = ser_dag.dag_id statuses = dag_statuses[dag_id] @@ -2456,7 +2452,6 @@ def _get_or_create_dagrun( if dr: session.delete(dr) session.commit() - dag_version = DagVersion.get_latest_version(dag.dag_id, session=session) dr = dag.create_dagrun( state=DagRunState.RUNNING, execution_date=execution_date, @@ -2466,7 +2461,6 @@ def _get_or_create_dagrun( conf=conf, data_interval=data_interval, triggered_by=triggered_by, - dag_version=dag_version, ) log.info("created dagrun %s", dr) return dr diff --git a/airflow/models/dag_version.py b/airflow/models/dag_version.py deleted file mode 100644 index 92511f93f5f6f..0000000000000 --- a/airflow/models/dag_version.py +++ /dev/null @@ -1,167 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import annotations - -import logging -from typing import TYPE_CHECKING - -import uuid6 -from sqlalchemy import Column, ForeignKey, Integer, UniqueConstraint, select -from sqlalchemy.orm import relationship -from sqlalchemy_utils import UUIDType - -from airflow.models.base import Base, StringID -from airflow.utils import timezone -from airflow.utils.session import NEW_SESSION, provide_session -from airflow.utils.sqlalchemy import UtcDateTime, with_row_locks - -if TYPE_CHECKING: - from sqlalchemy.orm import Session - from sqlalchemy.sql import Select - -log = logging.getLogger(__name__) - - -class DagVersion(Base): - """Model to track the versions of DAGs in the database.""" - - __tablename__ = "dag_version" - id = Column(UUIDType(binary=False), primary_key=True, default=uuid6.uuid7) - version_number = Column(Integer, nullable=False, default=1) - version_name = Column(StringID()) - dag_id = Column(StringID(), ForeignKey("dag.dag_id", ondelete="CASCADE"), nullable=False) - dag_model = relationship("DagModel", back_populates="dag_versions") - dag_code = relationship( - "DagCode", - back_populates="dag_version", - uselist=False, - cascade="all, delete, delete-orphan", - cascade_backrefs=False, - ) - serialized_dag = relationship( - "SerializedDagModel", - back_populates="dag_version", - uselist=False, - cascade="all, delete, delete-orphan", - cascade_backrefs=False, - ) - dag_runs = relationship("DagRun", back_populates="dag_version", cascade="all, delete, delete-orphan") - task_instances = relationship("TaskInstance", back_populates="dag_version") - created_at = Column(UtcDateTime, default=timezone.utcnow) - - __table_args__ = ( - UniqueConstraint("dag_id", "version_number", name="dag_id_v_name_v_number_unique_constraint"), - ) - - def __repr__(self): - """Represent the object as a string.""" - return f"" - - @classmethod - @provide_session - def write_dag( - cls, - *, - dag_id: str, - version_name: str | None = None, - version_number: int = 1, - session: Session = NEW_SESSION, - ) -> DagVersion: - """ - Write a new DagVersion into database. - - Checks if a version of the DAG exists and increments the version number if it does. - - :param dag_id: The DAG ID. - :param version_name: The version name. - :param version_number: The version number. - :param session: The database session. - :return: The DagVersion object. - """ - existing_dag_version = session.scalar( - with_row_locks(cls._latest_version_select(dag_id), of=DagVersion, session=session, nowait=True) - ) - if existing_dag_version: - version_number = existing_dag_version.version_number + 1 - - dag_version = DagVersion( - dag_id=dag_id, - version_number=version_number, - version_name=version_name, - ) - log.debug("Writing DagVersion %s to the DB", dag_version) - session.add(dag_version) - # Flush is necessary here due to the unique constraint and other linked tables - session.flush() - log.debug("DagVersion %s written to the DB", dag_version) - return dag_version - - @classmethod - def _latest_version_select(cls, dag_id: str) -> Select: - """ - Get the select object to get the latest version of the DAG. - - :param dag_id: The DAG ID. - :return: The select object. - """ - return select(cls).where(cls.dag_id == dag_id).order_by(cls.created_at.desc()).limit(1) - - @classmethod - @provide_session - def get_latest_version(cls, dag_id: str, *, session: Session = NEW_SESSION) -> DagVersion | None: - """ - Get the latest version of the DAG. - - :param dag_id: The DAG ID. - :param session: The database session. - :return: The latest version of the DAG or None if not found. - """ - return session.scalar(cls._latest_version_select(dag_id)) - - @classmethod - @provide_session - def get_version( - cls, - dag_id: str, - version_number: int = 1, - *, - session: Session = NEW_SESSION, - ) -> DagVersion | None: - """ - Get the version of the DAG. - - :param dag_id: The DAG ID. - :param version_number: The version number. - :param session: The database session. - :return: The version of the DAG or None if not found. - """ - version_select_obj = ( - select(cls) - .where(cls.dag_id == dag_id, cls.version_number == version_number) - .order_by(cls.version_number.desc()) - .limit(1) - ) - return session.scalar(version_select_obj) - - @property - def version(self) -> str: - """A human-friendly representation of the version.""" - name = f"{self.version_number}" - if self.version_name: - name = f"{self.version_name}-{self.version_number}" - return name diff --git a/airflow/models/dagbag.py b/airflow/models/dagbag.py index 5b57c7983ea14..c9ad8edaa4018 100644 --- a/airflow/models/dagbag.py +++ b/airflow/models/dagbag.py @@ -650,12 +650,13 @@ def _serialize_dag_capturing_errors(dag, session, processor_subdir): ) log.debug("Calling the DAG.bulk_sync_to_db method") try: - DAG.bulk_write_to_db(dags.values(), processor_subdir=processor_subdir, session=session) # Write Serialized DAGs to DB, capturing errors for dag in dags.values(): serialize_errors.extend( _serialize_dag_capturing_errors(dag, session, processor_subdir) ) + + DAG.bulk_write_to_db(dags.values(), processor_subdir=processor_subdir, session=session) except OperationalError: session.rollback() raise diff --git a/airflow/models/dagcode.py b/airflow/models/dagcode.py index c78f6cafaa6fa..321f819999bf6 100644 --- a/airflow/models/dagcode.py +++ b/airflow/models/dagcode.py @@ -17,30 +17,26 @@ from __future__ import annotations import logging +import os import struct -from typing import TYPE_CHECKING, Collection +from datetime import datetime +from typing import TYPE_CHECKING, Collection, Iterable -import uuid6 -from sqlalchemy import BigInteger, Column, ForeignKey, String, Text, delete, select +from sqlalchemy import BigInteger, Column, String, Text, delete, select from sqlalchemy.dialects.mysql import MEDIUMTEXT -from sqlalchemy.orm import relationship from sqlalchemy.sql.expression import literal -from sqlalchemy_utils import UUIDType from airflow.api_internal.internal_api_call import internal_api_call -from airflow.configuration import conf -from airflow.exceptions import DagCodeNotFound +from airflow.exceptions import AirflowException, DagCodeNotFound from airflow.models.base import Base from airflow.utils import timezone -from airflow.utils.file import open_maybe_zipped +from airflow.utils.file import correct_maybe_zipped, open_maybe_zipped from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.sqlalchemy import UtcDateTime if TYPE_CHECKING: from sqlalchemy.orm import Session - from airflow.models.dag_version import DagVersion - log = logging.getLogger(__name__) @@ -54,38 +50,84 @@ class DagCode(Base): """ __tablename__ = "dag_code" - id = Column(UUIDType(binary=False), primary_key=True, default=uuid6.uuid7) - fileloc_hash = Column(BigInteger, nullable=False) + + fileloc_hash = Column(BigInteger, nullable=False, primary_key=True, autoincrement=False) fileloc = Column(String(2000), nullable=False) # The max length of fileloc exceeds the limit of indexing. last_updated = Column(UtcDateTime, nullable=False) source_code = Column(Text().with_variant(MEDIUMTEXT(), "mysql"), nullable=False) - dag_version_id = Column( - UUIDType(binary=False), ForeignKey("dag_version.id", ondelete="CASCADE"), nullable=False, unique=True - ) - dag_version = relationship("DagVersion", back_populates="dag_code", uselist=False) - def __init__(self, dag_version, full_filepath: str, source_code: str | None = None): - self.dag_version = dag_version + def __init__(self, full_filepath: str, source_code: str | None = None): self.fileloc = full_filepath self.fileloc_hash = DagCode.dag_fileloc_hash(self.fileloc) self.last_updated = timezone.utcnow() self.source_code = source_code or DagCode.code(self.fileloc) - @classmethod @provide_session - def write_dag(cls, dag_version: DagVersion, fileloc: str, session: Session = NEW_SESSION) -> DagCode: + def sync_to_db(self, session: Session = NEW_SESSION) -> None: """ Write code into database. - :param fileloc: file path of DAG to sync :param session: ORM Session """ - log.debug("Writing DAG file %s into DagCode table", fileloc) - dag_code = DagCode(dag_version, fileloc, cls._get_code_from_file(fileloc)) - session.add(dag_code) - log.debug("DAG file %s written into DagCode table", fileloc) - return dag_code + self.bulk_sync_to_db([self.fileloc], session) + + @classmethod + @provide_session + def bulk_sync_to_db(cls, filelocs: Iterable[str], session: Session = NEW_SESSION) -> None: + """ + Write code in bulk into database. + + :param filelocs: file paths of DAGs to sync + :param session: ORM Session + """ + filelocs = set(filelocs) + filelocs_to_hashes = {fileloc: DagCode.dag_fileloc_hash(fileloc) for fileloc in filelocs} + existing_orm_dag_codes = session.scalars( + select(DagCode) + .filter(DagCode.fileloc_hash.in_(filelocs_to_hashes.values())) + .with_for_update(of=DagCode) + ).all() + + if existing_orm_dag_codes: + existing_orm_dag_codes_map = { + orm_dag_code.fileloc: orm_dag_code for orm_dag_code in existing_orm_dag_codes + } + else: + existing_orm_dag_codes_map = {} + + existing_orm_dag_codes_by_fileloc_hashes = {orm.fileloc_hash: orm for orm in existing_orm_dag_codes} + existing_orm_filelocs = {orm.fileloc for orm in existing_orm_dag_codes_by_fileloc_hashes.values()} + if not existing_orm_filelocs.issubset(filelocs): + conflicting_filelocs = existing_orm_filelocs.difference(filelocs) + hashes_to_filelocs = {DagCode.dag_fileloc_hash(fileloc): fileloc for fileloc in filelocs} + message = "" + for fileloc in conflicting_filelocs: + filename = hashes_to_filelocs[DagCode.dag_fileloc_hash(fileloc)] + message += ( + f"Filename '{filename}' causes a hash collision in the " + f"database with '{fileloc}'. Please rename the file." + ) + raise AirflowException(message) + + existing_filelocs = {dag_code.fileloc for dag_code in existing_orm_dag_codes} + missing_filelocs = filelocs.difference(existing_filelocs) + + for fileloc in missing_filelocs: + orm_dag_code = DagCode(fileloc, cls._get_code_from_file(fileloc)) + session.add(orm_dag_code) + + for fileloc in existing_filelocs: + current_version = existing_orm_dag_codes_by_fileloc_hashes[filelocs_to_hashes[fileloc]] + file_mod_time = datetime.fromtimestamp( + os.path.getmtime(correct_maybe_zipped(fileloc)), tz=timezone.utc + ) + + if file_mod_time > current_version.last_updated: + orm_dag_code = existing_orm_dag_codes_map[fileloc] + orm_dag_code.last_updated = file_mod_time + orm_dag_code.source_code = cls._get_code_from_file(orm_dag_code.fileloc) + session.merge(orm_dag_code) @classmethod @internal_api_call @@ -128,9 +170,7 @@ def has_dag(cls, fileloc: str, session: Session = NEW_SESSION) -> bool: """ fileloc_hash = cls.dag_fileloc_hash(fileloc) return ( - session.scalars( - select(literal(True)).where(cls.fileloc_hash == fileloc_hash).limit(1) - ).one_or_none() + session.scalars(select(literal(True)).where(cls.fileloc_hash == fileloc_hash)).one_or_none() is not None ) @@ -156,25 +196,14 @@ def code(cls, fileloc, session: Session = NEW_SESSION) -> str: @staticmethod def _get_code_from_file(fileloc): - try: - with open_maybe_zipped(fileloc, "r") as f: - code = f.read() - return code - except FileNotFoundError: - test_mode = conf.get("core", "unit_test_mode") - if test_mode: - return "source_code" - raise + with open_maybe_zipped(fileloc, "r") as f: + code = f.read() + return code @classmethod @provide_session def _get_code_from_db(cls, fileloc, session: Session = NEW_SESSION) -> str: - dag_code = session.scalar( - select(cls) - .where(cls.fileloc_hash == cls.dag_fileloc_hash(fileloc)) - .order_by(cls.last_updated.desc()) - .limit(1) - ) + dag_code = session.scalar(select(cls).where(cls.fileloc_hash == cls.dag_fileloc_hash(fileloc))) if not dag_code: raise DagCodeNotFound() else: diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index 635cd73ccd8d7..5de0466a6be0e 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -48,7 +48,6 @@ from sqlalchemy.orm import declared_attr, joinedload, relationship, synonym, validates from sqlalchemy.sql.expression import case, false, select, true from sqlalchemy.sql.functions import coalesce -from sqlalchemy_utils import UUIDType from airflow import settings from airflow.api_internal.internal_api_call import internal_api_call @@ -60,7 +59,6 @@ from airflow.models.abstractoperator import NotMapped from airflow.models.backfill import Backfill from airflow.models.base import Base, StringID -from airflow.models.dag_version import DagVersion from airflow.models.expandinput import NotFullyPopulated from airflow.models.taskinstance import TaskInstance as TI from airflow.models.tasklog import LogTemplate @@ -146,6 +144,7 @@ class DagRun(Base, LoggingMixin): data_interval_end = Column(UtcDateTime) # When a scheduler last attempted to schedule TIs for this DagRun last_scheduling_decision = Column(UtcDateTime) + dag_hash = Column(String(32)) # Foreign key to LogTemplate. DagRun rows created prior to this column's # existence have this set to NULL. Later rows automatically populate this on # insert to point to the latest LogTemplate entry. @@ -165,8 +164,6 @@ class DagRun(Base, LoggingMixin): It's possible this could change if e.g. the dag run is cleared to be rerun, or perhaps re-backfilled. """ - dag_version_id = Column(UUIDType(binary=False), ForeignKey("dag_version.id", ondelete="CASCADE")) - dag_version = relationship("DagVersion", back_populates="dag_runs") # Remove this `if` after upgrading Sphinx-AutoAPI if not TYPE_CHECKING and "BUILDING_AIRFLOW_DOCS" in os.environ: @@ -234,11 +231,11 @@ def __init__( conf: Any | None = None, state: DagRunState | None = None, run_type: str | None = None, + dag_hash: str | None = None, creating_job_id: int | None = None, data_interval: tuple[datetime, datetime] | None = None, triggered_by: DagRunTriggeredByType | None = None, backfill_id: int | None = None, - dag_version: DagVersion | None = None, ): if data_interval is None: # Legacy: Only happen for runs created prior to Airflow 2.2. @@ -259,11 +256,11 @@ def __init__( else: self.queued_at = queued_at self.run_type = run_type + self.dag_hash = dag_hash self.creating_job_id = creating_job_id self.backfill_id = backfill_id self.clear_number = 0 self.triggered_by = triggered_by - self.dag_version = dag_version super().__init__() def __repr__(self): @@ -997,9 +994,8 @@ def recalculate(self) -> _UnfinishedStates: "DagRun Finished: dag_id=%s, execution_date=%s, run_id=%s, " "run_start_date=%s, run_end_date=%s, run_duration=%s, " "state=%s, external_trigger=%s, run_type=%s, " - "data_interval_start=%s, data_interval_end=%s, dag_version_name=%s" + "data_interval_start=%s, data_interval_end=%s, dag_hash=%s" ) - dagv = session.scalar(select(DagVersion).where(DagVersion.id == self.dag_version_id)) self.log.info( msg, self.dag_id, @@ -1017,7 +1013,7 @@ def recalculate(self) -> _UnfinishedStates: self.run_type, self.data_interval_start, self.data_interval_end, - dagv.version if dagv else None, + self.dag_hash, ) with Trace.start_span_from_dagrun(dagrun=self) as span: @@ -1041,7 +1037,7 @@ def recalculate(self) -> _UnfinishedStates: "run_type": str(self.run_type), "data_interval_start": str(self.data_interval_start), "data_interval_end": str(self.data_interval_end), - "dag_version": str(dagv.version if dagv else None), + "dag_hash": str(self.dag_hash), "conf": str(self.conf), } if span.is_recording(): @@ -1458,9 +1454,7 @@ def _get_task_creator( def create_ti_mapping(task: Operator, indexes: Iterable[int]) -> Iterator[dict[str, Any]]: created_counts[task.task_type] += 1 for map_index in indexes: - yield TI.insert_mapping( - self.run_id, task, map_index=map_index, dag_version_id=self.dag_version_id - ) + yield TI.insert_mapping(self.run_id, task, map_index=map_index) creator = create_ti_mapping @@ -1468,7 +1462,7 @@ def create_ti_mapping(task: Operator, indexes: Iterable[int]) -> Iterator[dict[s def create_ti(task: Operator, indexes: Iterable[int]) -> Iterator[TI]: for map_index in indexes: - ti = TI(task, run_id=self.run_id, map_index=map_index, dag_version_id=self.dag_version_id) + ti = TI(task, run_id=self.run_id, map_index=map_index) ti_mutation_hook(ti) created_counts[ti.operator] += 1 yield ti diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 0d5667cd48fc9..32be31d721e34 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -25,17 +25,14 @@ from typing import TYPE_CHECKING, Any, Collection import sqlalchemy_jsonfield -import uuid6 -from sqlalchemy import Column, ForeignKey, LargeBinary, String, exc, or_, select +from sqlalchemy import BigInteger, Column, Index, LargeBinary, String, and_, exc, or_, select from sqlalchemy.orm import backref, foreign, relationship from sqlalchemy.sql.expression import func, literal -from sqlalchemy_utils import UUIDType from airflow.api_internal.internal_api_call import internal_api_call from airflow.exceptions import TaskNotFound from airflow.models.base import ID_LEN, Base from airflow.models.dag import DagModel -from airflow.models.dag_version import DagVersion from airflow.models.dagcode import DagCode from airflow.models.dagrun import DagRun from airflow.serialization.dag_dependency import DagDependency @@ -79,14 +76,19 @@ class SerializedDagModel(Base): """ __tablename__ = "serialized_dag" - id = Column(UUIDType(binary=False), primary_key=True, default=uuid6.uuid7) - dag_id = Column(String(ID_LEN), nullable=False) + + dag_id = Column(String(ID_LEN), primary_key=True) + fileloc = Column(String(2000), nullable=False) + # The max length of fileloc exceeds the limit of indexing. + fileloc_hash = Column(BigInteger(), nullable=False) _data = Column("data", sqlalchemy_jsonfield.JSONField(json=json), nullable=True) _data_compressed = Column("data_compressed", LargeBinary, nullable=True) last_updated = Column(UtcDateTime, nullable=False) dag_hash = Column(String(32), nullable=False) processor_subdir = Column(String(2000), nullable=True) + __table_args__ = (Index("idx_fileloc_hash", fileloc_hash, unique=False),) + dag_runs = relationship( DagRun, primaryjoin=dag_id == foreign(DagRun.dag_id), # type: ignore @@ -101,15 +103,13 @@ class SerializedDagModel(Base): innerjoin=True, backref=backref("serialized_dag", uselist=False, innerjoin=True), ) - dag_version_id = Column( - UUIDType(binary=False), ForeignKey("dag_version.id", ondelete="CASCADE"), nullable=False, unique=True - ) - dag_version = relationship("DagVersion", back_populates="serialized_dag") load_op_links = True def __init__(self, dag: DAG, processor_subdir: str | None = None) -> None: self.dag_id = dag.dag_id + self.fileloc = dag.fileloc + self.fileloc_hash = DagCode.dag_fileloc_hash(self.fileloc) self.last_updated = timezone.utcnow() self.processor_subdir = processor_subdir @@ -194,9 +194,7 @@ def write_dag( log.debug("Checking if DAG (%s) changed", dag.dag_id) new_serialized_dag = cls(dag, processor_subdir) serialized_dag_db = session.execute( - select(cls.dag_hash, cls.processor_subdir) - .where(cls.dag_id == dag.dag_id) - .order_by(cls.last_updated.desc()) + select(cls.dag_hash, cls.processor_subdir).where(cls.dag_id == dag.dag_id) ).first() if ( @@ -206,52 +204,12 @@ def write_dag( ): log.debug("Serialized DAG (%s) is unchanged. Skipping writing to DB", dag.dag_id) return False - dagv = DagVersion.write_dag( - version_name=dag.version_name, - dag_id=dag.dag_id, - session=session, - ) + log.debug("Writing Serialized DAG: %s to the DB", dag.dag_id) - new_serialized_dag.dag_version = dagv - session.add(new_serialized_dag) + session.merge(new_serialized_dag) log.debug("DAG: %s written to the DB", dag.dag_id) - - DagCode.write_dag(dagv, dag.fileloc, session=session) return True - @classmethod - def latest_item_select_object(cls, dag_id): - return select(cls).where(cls.dag_id == dag_id).order_by(cls.last_updated.desc()).limit(1) - - @classmethod - @provide_session - def get_latest_serialized_dags( - cls, *, dag_ids: list[str], session: Session = NEW_SESSION - ) -> list[SerializedDagModel]: - """ - Get the latest serialized dags of given DAGs. - - :param dag_ids: The list of DAG IDs. - :param session: The database session. - :return: The latest serialized dag of the DAGs. - """ - # Subquery to get the latest serdag per dag_id - latest_serdag_subquery = ( - session.query(cls.dag_id, func.max(cls.last_updated).label("last_updated")) - .filter(cls.dag_id.in_(dag_ids)) - .group_by(cls.dag_id) - .subquery() - ) - latest_serdags = session.scalars( - select(cls) - .join( - latest_serdag_subquery, - cls.last_updated == latest_serdag_subquery.c.last_updated, - ) - .where(cls.dag_id.in_(dag_ids)) - ).all() - return latest_serdags or [] - @classmethod @provide_session def read_all_dags(cls, session: Session = NEW_SESSION) -> dict[str, SerializedDAG]: @@ -261,18 +219,7 @@ def read_all_dags(cls, session: Session = NEW_SESSION) -> dict[str, SerializedDA :param session: ORM Session :returns: a dict of DAGs read from database """ - latest_serialized_dag_subquery = ( - session.query(cls.dag_id, func.max(cls.last_updated).label("max_updated")) - .group_by(cls.dag_id) - .subquery() - ) - serialized_dags = session.scalars( - select(cls).join( - latest_serialized_dag_subquery, - (cls.dag_id == latest_serialized_dag_subquery.c.dag_id) - and (cls.last_updated == latest_serialized_dag_subquery.c.max_updated), - ) - ) + serialized_dags = session.scalars(select(cls)) dags = {} for row in serialized_dags: @@ -340,17 +287,22 @@ def remove_deleted_dags( :param processor_subdir: dag processor subdir :param session: ORM Session """ + alive_fileloc_hashes = [DagCode.dag_fileloc_hash(fileloc) for fileloc in alive_dag_filelocs] + log.debug( "Deleting Serialized DAGs (for which DAG files are deleted) from %s table ", cls.__tablename__ ) - # Deleting the DagModel cascade deletes the serialized Dag through the dag version relationship + session.execute( - DagModel.__table__.delete().where( - DagModel.fileloc.notin_(alive_dag_filelocs), - or_( - DagModel.processor_subdir.is_(None), - DagModel.processor_subdir == processor_subdir, - ), + cls.__table__.delete().where( + and_( + cls.fileloc_hash.notin_(alive_fileloc_hashes), + cls.fileloc.notin_(alive_dag_filelocs), + or_( + cls.processor_subdir.is_(None), + cls.processor_subdir == processor_subdir, + ), + ) ) ) @@ -382,7 +334,11 @@ def get(cls, dag_id: str, session: Session = NEW_SESSION) -> SerializedDagModel :param dag_id: the DAG to fetch :param session: ORM Session """ - return session.scalar(cls.latest_item_select_object(dag_id)) + row = session.scalar(select(cls).where(cls.dag_id == dag_id)) + if row: + return row + + return session.scalar(select(cls).where(cls.dag_id == dag_id)) @staticmethod @provide_session @@ -417,9 +373,7 @@ def get_last_updated_datetime(cls, dag_id: str, session: Session = NEW_SESSION) :param dag_id: DAG ID :param session: ORM Session """ - return session.scalar( - select(cls.last_updated).where(cls.dag_id == dag_id).order_by(cls.last_updated.desc()).limit(1) - ) + return session.scalar(select(cls.last_updated).where(cls.dag_id == dag_id)) @classmethod @provide_session @@ -441,9 +395,7 @@ def get_latest_version_hash(cls, dag_id: str, session: Session = NEW_SESSION) -> :param session: ORM Session :return: DAG Hash, or None if the DAG is not found """ - return session.scalar( - select(cls.dag_hash).where(cls.dag_id == dag_id).order_by(cls.last_updated.desc()).limit(1) - ) + return session.scalar(select(cls.dag_hash).where(cls.dag_id == dag_id)) @classmethod def get_latest_version_hash_and_updated_datetime( @@ -461,10 +413,7 @@ def get_latest_version_hash_and_updated_datetime( :return: A tuple of DAG Hash and last updated datetime, or None if the DAG is not found """ return session.execute( - select(cls.dag_hash, cls.last_updated) - .where(cls.dag_id == dag_id) - .order_by(cls.last_updated.desc()) - .limit(1) + select(cls.dag_hash, cls.last_updated).where(cls.dag_id == dag_id) ).one_or_none() @classmethod @@ -475,27 +424,14 @@ def get_dag_dependencies(cls, session: Session = NEW_SESSION) -> dict[str, list[ :param session: ORM Session """ - latest_sdag_subquery = ( - session.query(cls.dag_id, func.max(cls.last_updated).label("max_updated")) - .group_by(cls.dag_id) - .subquery() - ) if session.bind.dialect.name in ["sqlite", "mysql"]: query = session.execute( - select(cls.dag_id, func.json_extract(cls._data, "$.dag.dag_dependencies")).join( - latest_sdag_subquery, - (cls.dag_id == latest_sdag_subquery.c.dag_id) - and (cls.last_updated == latest_sdag_subquery.c.max_updated), - ) + select(cls.dag_id, func.json_extract(cls._data, "$.dag.dag_dependencies")) ) iterator = ((dag_id, json.loads(deps_data) if deps_data else []) for dag_id, deps_data in query) else: iterator = session.execute( - select(cls.dag_id, func.json_extract_path(cls._data, "dag", "dag_dependencies")).join( - latest_sdag_subquery, - (cls.dag_id == latest_sdag_subquery.c.dag_id) - and (cls.last_updated == latest_sdag_subquery.c.max_updated), - ) + select(cls.dag_id, func.json_extract_path(cls._data, "dag", "dag_dependencies")) ) return {dag_id: [DagDependency(**d) for d in (deps_data or [])] for dag_id, deps_data in iterator} @@ -503,9 +439,10 @@ def get_dag_dependencies(cls, session: Session = NEW_SESSION) -> dict[str, list[ @internal_api_call @provide_session def get_serialized_dag(dag_id: str, task_id: str, session: Session = NEW_SESSION) -> Operator | None: + from airflow.models.serialized_dag import SerializedDagModel + try: - # get the latest version of the DAG - model = session.scalar(SerializedDagModel.latest_item_select_object(dag_id)) + model = session.get(SerializedDagModel, dag_id) if model: return model.dag.get_task(task_id) except (exc.NoResultFound, TaskNotFound): diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index a8a96a25f6298..c525a40a14ab5 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -45,7 +45,6 @@ Column, DateTime, Float, - ForeignKey, ForeignKeyConstraint, Index, Integer, @@ -70,7 +69,6 @@ from sqlalchemy.orm import lazyload, reconstructor, relationship from sqlalchemy.orm.attributes import NO_VALUE, set_committed_value from sqlalchemy.sql.expression import case, select -from sqlalchemy_utils import UUIDType from airflow import settings from airflow.api_internal.internal_api_call import InternalApiConfig, internal_api_call @@ -823,7 +821,6 @@ def _set_ti_attrs(target, source, include_dag_run=False): target.trigger_id = source.trigger_id target.next_method = source.next_method target.next_kwargs = source.next_kwargs - target.dag_version_id = source.dag_version_id if include_dag_run: target.execution_date = source.execution_date @@ -842,7 +839,7 @@ def _set_ti_attrs(target, source, include_dag_run=False): target.dag_run.data_interval_start = source.dag_run.data_interval_start target.dag_run.data_interval_end = source.dag_run.data_interval_end target.dag_run.last_scheduling_decision = source.dag_run.last_scheduling_decision - target.dag_run.dag_version_id = source.dag_run.dag_version_id + target.dag_run.dag_hash = source.dag_run.dag_hash target.dag_run.updated_at = source.dag_run.updated_at target.dag_run.log_template_id = source.dag_run.log_template_id @@ -1879,10 +1876,8 @@ class TaskInstance(Base, LoggingMixin): next_kwargs = Column(MutableDict.as_mutable(ExtendedJSON)) _task_display_property_value = Column("task_display_name", String(2000), nullable=True) - dag_version_id = Column(UUIDType(binary=False), ForeignKey("dag_version.id", ondelete="CASCADE")) - dag_version = relationship("DagVersion", back_populates="task_instances") # If adding new fields here then remember to add them to - # _set_ti_attrs() or they won't display in the UI correctly + # refresh_from_db() or they won't display in the UI correctly __table_args__ = ( Index("ti_dag_state", dag_id, state), @@ -1947,13 +1942,11 @@ def __init__( run_id: str | None = None, state: str | None = None, map_index: int = -1, - dag_version_id: UUIDType | None = None, ): super().__init__() self.dag_id = task.dag_id self.task_id = task.task_id self.map_index = map_index - self.dag_version_id = dag_version_id self.refresh_from_task(task) if TYPE_CHECKING: assert self.task @@ -1985,7 +1978,7 @@ def stats_tags(self) -> dict[str, str]: return _stats_tags(task_instance=self) @staticmethod - def insert_mapping(run_id: str, task: Operator, map_index: int, dag_version_id: int) -> dict[str, Any]: + def insert_mapping(run_id: str, task: Operator, map_index: int) -> dict[str, Any]: """ Insert mapping. @@ -2014,7 +2007,6 @@ def insert_mapping(run_id: str, task: Operator, map_index: int, dag_version_id: "custom_operator_name": getattr(task, "custom_operator_name", None), "map_index": map_index, "_task_display_property_value": task.task_display_name, - "dag_version_id": dag_version_id, } @reconstructor diff --git a/airflow/models/taskinstancehistory.py b/airflow/models/taskinstancehistory.py index e587cf083e3b5..8c77daf925793 100644 --- a/airflow/models/taskinstancehistory.py +++ b/airflow/models/taskinstancehistory.py @@ -33,7 +33,6 @@ text, ) from sqlalchemy.ext.mutable import MutableDict -from sqlalchemy_utils import UUIDType from airflow.models.base import Base, StringID from airflow.utils import timezone @@ -92,7 +91,6 @@ class TaskInstanceHistory(Base): next_kwargs = Column(MutableDict.as_mutable(ExtendedJSON)) task_display_name = Column("task_display_name", String(2000), nullable=True) - dag_version_id = Column(UUIDType(binary=False)) def __init__( self, diff --git a/airflow/serialization/pydantic/dag_run.py b/airflow/serialization/pydantic/dag_run.py index a0175e3749d9c..fd12ca12c0184 100644 --- a/airflow/serialization/pydantic/dag_run.py +++ b/airflow/serialization/pydantic/dag_run.py @@ -52,7 +52,7 @@ class DagRunPydantic(BaseModelPydantic): data_interval_start: Optional[datetime] data_interval_end: Optional[datetime] last_scheduling_decision: Optional[datetime] - dag_version_id: Optional[int] + dag_hash: Optional[str] updated_at: Optional[datetime] dag: Optional[PydanticDag] consumed_asset_events: List[AssetEventPydantic] # noqa: UP006 diff --git a/airflow/serialization/schema.json b/airflow/serialization/schema.json index b26b59339816c..32ccd3dfff9c1 100644 --- a/airflow/serialization/schema.json +++ b/airflow/serialization/schema.json @@ -158,7 +158,6 @@ }, "dag_display_name": { "type" : "string"}, "description": { "type" : "string"}, - "version_name": {"type": "string"}, "_concurrency": { "type" : "number"}, "max_active_tasks": { "type" : "number"}, "max_active_runs": { "type" : "number"}, diff --git a/airflow/utils/db.py b/airflow/utils/db.py index d5218b6050e6d..d23f54068b59e 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -97,7 +97,7 @@ class MappedClassProtocol(Protocol): "2.9.2": "686269002441", "2.10.0": "22ed7efa9da2", "2.10.3": "5f2621c13b39", - "3.0.0": "2b47dc6bc8df", + "3.0.0": "d03e4a635aa3", } diff --git a/airflow/www/views.py b/airflow/www/views.py index dd8279b560071..2c87c32cae614 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -105,7 +105,6 @@ from airflow.models import Connection, DagModel, DagTag, Log, Trigger, XCom from airflow.models.asset import AssetDagRunQueue, AssetEvent, AssetModel, DagScheduleAssetReference from airflow.models.dag import get_asset_triggered_next_run_info -from airflow.models.dag_version import DagVersion from airflow.models.dagrun import RUN_ID_REGEX, DagRun, DagRunType from airflow.models.errors import ParseImportError from airflow.models.serialized_dag import SerializedDagModel @@ -2202,7 +2201,6 @@ def trigger(self, dag_id: str, session: Session = NEW_SESSION): ) try: - dag_version = DagVersion.get_latest_version(dag.dag_id) dag_run = dag.create_dagrun( run_type=DagRunType.MANUAL, execution_date=execution_date, @@ -2210,7 +2208,7 @@ def trigger(self, dag_id: str, session: Session = NEW_SESSION): state=DagRunState.QUEUED, conf=run_conf, external_trigger=True, - dag_version=dag_version, + dag_hash=get_airflow_app().dag_bag.dags_hash.get(dag_id), run_id=run_id, triggered_by=DagRunTriggeredByType.UI, ) diff --git a/docs/apache-airflow/img/airflow_erd.sha256 b/docs/apache-airflow/img/airflow_erd.sha256 index 48700f5604e58..572ce439c231b 100644 --- a/docs/apache-airflow/img/airflow_erd.sha256 +++ b/docs/apache-airflow/img/airflow_erd.sha256 @@ -1 +1 @@ -4adae6a26c3378a5da5468eecfcbd27948c2af285a8dcf12ff8c3969e19f19f7 \ No newline at end of file +5ec1019b1b0f43b29fc83638c2a13c0bda90b7e4f0ff542aeab401bbfa9a83e4 \ No newline at end of file diff --git a/docs/apache-airflow/img/airflow_erd.svg b/docs/apache-airflow/img/airflow_erd.svg index b3caf10cba3e1..ba935dd6c4be4 100644 --- a/docs/apache-airflow/img/airflow_erd.svg +++ b/docs/apache-airflow/img/airflow_erd.svg @@ -4,2353 +4,2271 @@ - - + + %3 - + log - -log - -id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - -dttm - - [TIMESTAMP] - -event - - [VARCHAR(60)] - -execution_date - - [TIMESTAMP] - -extra - - [TEXT] - -map_index - - [INTEGER] - -owner - - [VARCHAR(500)] - -owner_display_name - - [VARCHAR(500)] - -run_id - - [VARCHAR(250)] - -task_id - - [VARCHAR(250)] - -try_number - - [INTEGER] + +log + +id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + +dttm + + [TIMESTAMP] + +event + + [VARCHAR(60)] + +execution_date + + [TIMESTAMP] + +extra + + [TEXT] + +map_index + + [INTEGER] + +owner + + [VARCHAR(500)] + +owner_display_name + + [VARCHAR(500)] + +run_id + + [VARCHAR(250)] + +task_id + + [VARCHAR(250)] + +try_number + + [INTEGER] slot_pool - -slot_pool - -id - - [INTEGER] - NOT NULL - -description - - [TEXT] - -include_deferred - - [BOOLEAN] - NOT NULL - -pool - - [VARCHAR(256)] - -slots - - [INTEGER] + +slot_pool + +id + + [INTEGER] + NOT NULL + +description + + [TEXT] + +include_deferred + + [BOOLEAN] + NOT NULL + +pool + + [VARCHAR(256)] + +slots + + [INTEGER] callback_request - -callback_request - -id - - [INTEGER] - NOT NULL - -callback_data - - [JSON] - NOT NULL - -callback_type - - [VARCHAR(20)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -priority_weight - - [INTEGER] - NOT NULL - -processor_subdir - - [VARCHAR(2000)] + +callback_request + +id + + [INTEGER] + NOT NULL + +callback_data + + [JSON] + NOT NULL + +callback_type + + [VARCHAR(20)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +priority_weight + + [INTEGER] + NOT NULL + +processor_subdir + + [VARCHAR(2000)] dag_priority_parsing_request - -dag_priority_parsing_request - -id - - [VARCHAR(32)] - NOT NULL - -fileloc - - [VARCHAR(2000)] - NOT NULL + +dag_priority_parsing_request + +id + + [VARCHAR(32)] + NOT NULL + +fileloc + + [VARCHAR(2000)] + NOT NULL - + +dag_code + +dag_code + +fileloc_hash + + [BIGINT] + NOT NULL + +fileloc + + [VARCHAR(2000)] + NOT NULL + +last_updated + + [TIMESTAMP] + NOT NULL + +source_code + + [TEXT] + NOT NULL + + + connection - -connection - -id - - [INTEGER] - NOT NULL - -conn_id - - [VARCHAR(250)] - NOT NULL - -conn_type - - [VARCHAR(500)] - NOT NULL - -description - - [TEXT] - -extra - - [TEXT] - -host - - [VARCHAR(500)] - -is_encrypted - - [BOOLEAN] - -is_extra_encrypted - - [BOOLEAN] - -login - - [TEXT] - -password - - [TEXT] - -port - - [INTEGER] - -schema - - [VARCHAR(500)] + +connection + +id + + [INTEGER] + NOT NULL + +conn_id + + [VARCHAR(250)] + NOT NULL + +conn_type + + [VARCHAR(500)] + NOT NULL + +description + + [TEXT] + +extra + + [TEXT] + +host + + [VARCHAR(500)] + +is_encrypted + + [BOOLEAN] + +is_extra_encrypted + + [BOOLEAN] + +login + + [TEXT] + +password + + [TEXT] + +port + + [INTEGER] + +schema + + [VARCHAR(500)] - + variable - -variable - -id - - [INTEGER] - NOT NULL - -description - - [TEXT] - -is_encrypted - - [BOOLEAN] - -key - - [VARCHAR(250)] - -val - - [TEXT] + +variable + +id + + [INTEGER] + NOT NULL + +description + + [TEXT] + +is_encrypted + + [BOOLEAN] + +key + + [VARCHAR(250)] + +val + + [TEXT] - + import_error - -import_error - -id - - [INTEGER] - NOT NULL - -filename - - [VARCHAR(1024)] - -processor_subdir - - [VARCHAR(2000)] - -stacktrace - - [TEXT] - -timestamp - - [TIMESTAMP] + +import_error + +id + + [INTEGER] + NOT NULL + +filename + + [VARCHAR(1024)] + +processor_subdir + + [VARCHAR(2000)] + +stacktrace + + [TEXT] + +timestamp + + [TIMESTAMP] - + job - -job - -id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - -end_date - - [TIMESTAMP] - -executor_class - - [VARCHAR(500)] - -hostname - - [VARCHAR(500)] - -job_type - - [VARCHAR(30)] - -latest_heartbeat - - [TIMESTAMP] - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -unixname - - [VARCHAR(1000)] + +job + +id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + +end_date + + [TIMESTAMP] + +executor_class + + [VARCHAR(500)] + +hostname + + [VARCHAR(500)] + +job_type + + [VARCHAR(30)] + +latest_heartbeat + + [TIMESTAMP] + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +unixname + + [VARCHAR(1000)] + + + +serialized_dag + +serialized_dag + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_hash + + [VARCHAR(32)] + NOT NULL + +data + + [JSON] + +data_compressed + + [BYTEA] + +fileloc + + [VARCHAR(2000)] + NOT NULL + +fileloc_hash + + [BIGINT] + NOT NULL + +last_updated + + [TIMESTAMP] + NOT NULL + +processor_subdir + + [VARCHAR(2000)] - + asset_alias - -asset_alias - -id - - [INTEGER] - NOT NULL - -group - - [VARCHAR(1500)] - NOT NULL - -name - - [VARCHAR(1500)] - NOT NULL + +asset_alias + +id + + [INTEGER] + NOT NULL + +group + + [VARCHAR(1500)] + NOT NULL + +name + + [VARCHAR(1500)] + NOT NULL - + asset_alias_asset - -asset_alias_asset - -alias_id - - [INTEGER] - NOT NULL - -asset_id - - [INTEGER] - NOT NULL + +asset_alias_asset + +alias_id + + [INTEGER] + NOT NULL + +asset_id + + [INTEGER] + NOT NULL asset_alias--asset_alias_asset - -0..N -1 + +0..N +1 - + asset_alias_asset_event - -asset_alias_asset_event - -alias_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +asset_alias_asset_event + +alias_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL asset_alias--asset_alias_asset_event - -0..N -1 + +0..N +1 - + dag_schedule_asset_alias_reference - -dag_schedule_asset_alias_reference - -alias_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_asset_alias_reference + +alias_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset_alias--dag_schedule_asset_alias_reference - -0..N -1 + +0..N +1 - + asset - -asset - -id - - [INTEGER] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -extra - - [JSON] - NOT NULL - -group - - [VARCHAR(1500)] - NOT NULL - -name - - [VARCHAR(1500)] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -uri - - [VARCHAR(1500)] - NOT NULL + +asset + +id + + [INTEGER] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +extra + + [JSON] + NOT NULL + +group + + [VARCHAR(1500)] + NOT NULL + +name + + [VARCHAR(1500)] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +uri + + [VARCHAR(1500)] + NOT NULL asset--asset_alias_asset - -0..N -1 + +0..N +1 - + asset_active - -asset_active - -name - - [VARCHAR(1500)] - NOT NULL - -uri - - [VARCHAR(1500)] - NOT NULL + +asset_active + +name + + [VARCHAR(1500)] + NOT NULL + +uri + + [VARCHAR(1500)] + NOT NULL asset--asset_active - -1 -1 + +1 +1 asset--asset_active - -1 -1 + +1 +1 - + dag_schedule_asset_reference - -dag_schedule_asset_reference - -asset_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +dag_schedule_asset_reference + +asset_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset--dag_schedule_asset_reference - -0..N -1 + +0..N +1 - + task_outlet_asset_reference - -task_outlet_asset_reference - -asset_id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +task_outlet_asset_reference + +asset_id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL asset--task_outlet_asset_reference - -0..N -1 + +0..N +1 - + asset_dag_run_queue - -asset_dag_run_queue - -asset_id - - [INTEGER] - NOT NULL - -target_dag_id - - [VARCHAR(250)] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL + +asset_dag_run_queue + +asset_id + + [INTEGER] + NOT NULL + +target_dag_id + + [VARCHAR(250)] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL asset--asset_dag_run_queue - -0..N -1 + +0..N +1 - + asset_event - -asset_event - -id - - [INTEGER] - NOT NULL - -asset_id - - [INTEGER] - NOT NULL - -extra - - [JSON] - NOT NULL - -source_dag_id - - [VARCHAR(250)] - -source_map_index - - [INTEGER] - -source_run_id - - [VARCHAR(250)] - -source_task_id - - [VARCHAR(250)] - -timestamp - - [TIMESTAMP] - NOT NULL + +asset_event + +id + + [INTEGER] + NOT NULL + +asset_id + + [INTEGER] + NOT NULL + +extra + + [JSON] + NOT NULL + +source_dag_id + + [VARCHAR(250)] + +source_map_index + + [INTEGER] + +source_run_id + + [VARCHAR(250)] + +source_task_id + + [VARCHAR(250)] + +timestamp + + [TIMESTAMP] + NOT NULL asset_event--asset_alias_asset_event - -0..N -1 + +0..N +1 - + dagrun_asset_event - -dagrun_asset_event - -dag_run_id - - [INTEGER] - NOT NULL - -event_id - - [INTEGER] - NOT NULL + +dagrun_asset_event + +dag_run_id + + [INTEGER] + NOT NULL + +event_id + + [INTEGER] + NOT NULL asset_event--dagrun_asset_event - -0..N -1 + +0..N +1 - + dag - -dag - -dag_id - - [VARCHAR(250)] - NOT NULL - -asset_expression - - [JSON] - -dag_display_name - - [VARCHAR(2000)] - -default_view - - [VARCHAR(25)] - -description - - [TEXT] - -fileloc - - [VARCHAR(2000)] - -has_import_errors - - [BOOLEAN] - -has_task_concurrency_limits - - [BOOLEAN] - NOT NULL - -is_active - - [BOOLEAN] - -is_paused - - [BOOLEAN] - -last_expired - - [TIMESTAMP] - -last_parsed_time - - [TIMESTAMP] - -max_active_runs - - [INTEGER] - -max_active_tasks - - [INTEGER] - NOT NULL - -max_consecutive_failed_dag_runs - - [INTEGER] - NOT NULL - -next_dagrun - - [TIMESTAMP] - -next_dagrun_create_after - - [TIMESTAMP] - -next_dagrun_data_interval_end - - [TIMESTAMP] - -next_dagrun_data_interval_start - - [TIMESTAMP] - -owners - - [VARCHAR(2000)] - -processor_subdir - - [VARCHAR(2000)] - -timetable_description - - [VARCHAR(1000)] - -timetable_summary - - [TEXT] + +dag + +dag_id + + [VARCHAR(250)] + NOT NULL + +asset_expression + + [JSON] + +dag_display_name + + [VARCHAR(2000)] + +default_view + + [VARCHAR(25)] + +description + + [TEXT] + +fileloc + + [VARCHAR(2000)] + +has_import_errors + + [BOOLEAN] + +has_task_concurrency_limits + + [BOOLEAN] + NOT NULL + +is_active + + [BOOLEAN] + +is_paused + + [BOOLEAN] + +last_expired + + [TIMESTAMP] + +last_parsed_time + + [TIMESTAMP] + +max_active_runs + + [INTEGER] + +max_active_tasks + + [INTEGER] + NOT NULL + +max_consecutive_failed_dag_runs + + [INTEGER] + NOT NULL + +next_dagrun + + [TIMESTAMP] + +next_dagrun_create_after + + [TIMESTAMP] + +next_dagrun_data_interval_end + + [TIMESTAMP] + +next_dagrun_data_interval_start + + [TIMESTAMP] + +owners + + [VARCHAR(2000)] + +processor_subdir + + [VARCHAR(2000)] + +timetable_description + + [VARCHAR(1000)] + +timetable_summary + + [TEXT] dag--dag_schedule_asset_alias_reference - -0..N -1 + +0..N +1 dag--dag_schedule_asset_reference - -0..N -1 + +0..N +1 dag--task_outlet_asset_reference - -0..N -1 + +0..N +1 dag--asset_dag_run_queue - -0..N -1 - - - -dag_version - -dag_version - -id - - [UUID] - NOT NULL - -created_at - - [TIMESTAMP] - -dag_id - - [VARCHAR(250)] - NOT NULL - -version_name - - [VARCHAR(250)] - -version_number - - [INTEGER] - NOT NULL - - - -dag--dag_version - -0..N -1 + +0..N +1 - + dag_tag - -dag_tag - -dag_id - - [VARCHAR(250)] - NOT NULL - -name - - [VARCHAR(100)] - NOT NULL + +dag_tag + +dag_id + + [VARCHAR(250)] + NOT NULL + +name + + [VARCHAR(100)] + NOT NULL - + dag--dag_tag - -0..N -1 + +0..N +1 - + dag_owner_attributes - -dag_owner_attributes - -dag_id - - [VARCHAR(250)] - NOT NULL - -owner - - [VARCHAR(500)] - NOT NULL - -link - - [VARCHAR(500)] - NOT NULL + +dag_owner_attributes + +dag_id + + [VARCHAR(250)] + NOT NULL + +owner + + [VARCHAR(500)] + NOT NULL + +link + + [VARCHAR(500)] + NOT NULL - + dag--dag_owner_attributes - -0..N -1 + +0..N +1 - + dag_warning - -dag_warning - -dag_id - - [VARCHAR(250)] - NOT NULL - -warning_type - - [VARCHAR(50)] - NOT NULL - -message - - [TEXT] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL + +dag_warning + +dag_id + + [VARCHAR(250)] + NOT NULL + +warning_type + + [VARCHAR(50)] + NOT NULL + +message + + [TEXT] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL - + dag--dag_warning - -0..N -1 + +0..N +1 + + + +log_template + +log_template + +id + + [INTEGER] + NOT NULL + +created_at + + [TIMESTAMP] + NOT NULL + +elasticsearch_id + + [TEXT] + NOT NULL + +filename + + [TEXT] + NOT NULL - + dag_run - -dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - -clear_number - - [INTEGER] - NOT NULL - -conf - - [BYTEA] - -creating_job_id - - [INTEGER] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -data_interval_end - - [TIMESTAMP] - -data_interval_start - - [TIMESTAMP] - -end_date - - [TIMESTAMP] - -external_trigger - - [BOOLEAN] - -last_scheduling_decision - - [TIMESTAMP] - -log_template_id - - [INTEGER] - -logical_date - - [TIMESTAMP] - NOT NULL - -queued_at - - [TIMESTAMP] - -run_id - - [VARCHAR(250)] - NOT NULL - -run_type - - [VARCHAR(50)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(50)] - -triggered_by - - [VARCHAR(50)] - -updated_at - - [TIMESTAMP] - - - -dag_version--dag_run - -0..N -{0,1} + +dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + +clear_number + + [INTEGER] + NOT NULL + +conf + + [BYTEA] + +creating_job_id + + [INTEGER] + +dag_hash + + [VARCHAR(32)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +data_interval_end + + [TIMESTAMP] + +data_interval_start + + [TIMESTAMP] + +end_date + + [TIMESTAMP] + +external_trigger + + [BOOLEAN] + +last_scheduling_decision + + [TIMESTAMP] + +log_template_id + + [INTEGER] + +logical_date + + [TIMESTAMP] + NOT NULL + +queued_at + + [TIMESTAMP] + +run_id + + [VARCHAR(250)] + NOT NULL + +run_type + + [VARCHAR(50)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(50)] + +triggered_by + + [VARCHAR(50)] + +updated_at + + [TIMESTAMP] - - -dag_code - -dag_code - -id - - [UUID] - NOT NULL - -dag_version_id - - [UUID] - NOT NULL - -fileloc - - [VARCHAR(2000)] - NOT NULL - -fileloc_hash - - [BIGINT] - NOT NULL - -last_updated - - [TIMESTAMP] - NOT NULL - -source_code - - [TEXT] - NOT NULL - - - -dag_version--dag_code - -0..N -1 + + +log_template--dag_run + +0..N +{0,1} - - -serialized_dag - -serialized_dag - -id - - [UUID] - NOT NULL - -dag_hash - - [VARCHAR(32)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - NOT NULL - -data - - [JSON] - -data_compressed - - [BYTEA] - -last_updated - - [TIMESTAMP] - NOT NULL - -processor_subdir - - [VARCHAR(2000)] - - - -dag_version--serialized_dag - -0..N -1 + + +dag_run--dagrun_asset_event + +0..N +1 task_instance - -task_instance - -id - - [UUID] - NOT NULL - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -last_heartbeat_at - - [TIMESTAMP] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSON] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] - - - -dag_version--task_instance - -0..N -{0,1} - - - -dag_run--dagrun_asset_event - -0..N -1 + +task_instance + +id + + [UUID] + NOT NULL + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +last_heartbeat_at + + [TIMESTAMP] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSON] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] - + dag_run--task_instance - -0..N -1 + +0..N +1 - + dag_run--task_instance - -0..N -1 + +0..N +1 - + backfill_dag_run - -backfill_dag_run - -id - - [INTEGER] - NOT NULL - -backfill_id - - [INTEGER] - NOT NULL - -dag_run_id - - [INTEGER] - -exception_reason - - [VARCHAR(250)] - -logical_date - - [TIMESTAMP] - NOT NULL - -sort_ordinal - - [INTEGER] - NOT NULL + +backfill_dag_run + +id + + [INTEGER] + NOT NULL + +backfill_id + + [INTEGER] + NOT NULL + +dag_run_id + + [INTEGER] + +exception_reason + + [VARCHAR(250)] + +logical_date + + [TIMESTAMP] + NOT NULL + +sort_ordinal + + [INTEGER] + NOT NULL - + dag_run--backfill_dag_run - -0..N -{0,1} + +0..N +{0,1} - + dag_run_note - -dag_run_note - -dag_run_id - - [INTEGER] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +dag_run_note + +dag_run_id + + [INTEGER] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] - + dag_run--dag_run_note - -1 -1 + +1 +1 - + task_reschedule - -task_reschedule - -id - - [INTEGER] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -duration - - [INTEGER] - NOT NULL - -end_date - - [TIMESTAMP] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -reschedule_date - - [TIMESTAMP] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -try_number - - [INTEGER] - NOT NULL + +task_reschedule + +id + + [INTEGER] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +duration + + [INTEGER] + NOT NULL + +end_date + + [TIMESTAMP] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +reschedule_date + + [TIMESTAMP] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +try_number + + [INTEGER] + NOT NULL - + dag_run--task_reschedule - -0..N -1 + +0..N +1 - + dag_run--task_reschedule - -0..N -1 + +0..N +1 - + task_instance--task_reschedule - -0..N -1 + +0..N +1 - + task_instance--task_reschedule - -0..N -1 + +0..N +1 - + task_instance--task_reschedule - -0..N -1 + +0..N +1 - + task_instance--task_reschedule - -0..N -1 + +0..N +1 - + rendered_task_instance_fields - -rendered_task_instance_fields - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -k8s_pod_yaml - - [JSON] - -rendered_fields - - [JSON] - NOT NULL + +rendered_task_instance_fields + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +k8s_pod_yaml + + [JSON] + +rendered_fields + + [JSON] + NOT NULL - + task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 - + task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 - + task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 - + task_instance--rendered_task_instance_fields - -0..N -1 + +0..N +1 - + task_map - -task_map - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -keys - - [JSON] - -length - - [INTEGER] - NOT NULL + +task_map + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +keys + + [JSON] + +length + + [INTEGER] + NOT NULL - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_instance--task_map - -0..N -1 + +0..N +1 - + xcom - -xcom - -dag_run_id - - [INTEGER] - NOT NULL - -key - - [VARCHAR(512)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL - -value - - [BYTEA] + +xcom + +dag_run_id + + [INTEGER] + NOT NULL + +key + + [VARCHAR(512)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL + +value + + [BYTEA] - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance_note - -task_instance_note - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +task_instance_note + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] - + task_instance--task_instance_note - -0..N -1 + +0..N +1 - + task_instance--task_instance_note - -0..N -1 + +0..N +1 - + task_instance--task_instance_note - -0..N -1 + +0..N +1 - + task_instance--task_instance_note - -0..N -1 + +0..N +1 - + task_instance_history - -task_instance_history - -id - - [INTEGER] - NOT NULL - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSON] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - NOT NULL - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance_history + +id + + [INTEGER] + NOT NULL + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSON] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + NOT NULL + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] - + task_instance--task_instance_history - -0..N -1 + +0..N +1 - + task_instance--task_instance_history - -0..N -1 + +0..N +1 - + task_instance--task_instance_history - -0..N -1 + +0..N +1 - + task_instance--task_instance_history - -0..N -1 - - - -log_template - -log_template - -id - - [INTEGER] - NOT NULL - -created_at - - [TIMESTAMP] - NOT NULL - -elasticsearch_id - - [TEXT] - NOT NULL - -filename - - [TEXT] - NOT NULL - - - -log_template--dag_run - -0..N -{0,1} + +0..N +1 - + backfill - -backfill - -id - - [INTEGER] - NOT NULL - -completed_at - - [TIMESTAMP] - -created_at - - [TIMESTAMP] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_run_conf - - [JSON] - NOT NULL - -from_date - - [TIMESTAMP] - NOT NULL - -is_paused - - [BOOLEAN] - -max_active_runs - - [INTEGER] - NOT NULL - -reprocess_behavior - - [VARCHAR(250)] - NOT NULL - -to_date - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL + +backfill + +id + + [INTEGER] + NOT NULL + +completed_at + + [TIMESTAMP] + +created_at + + [TIMESTAMP] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_run_conf + + [JSON] + NOT NULL + +from_date + + [TIMESTAMP] + NOT NULL + +is_paused + + [BOOLEAN] + +max_active_runs + + [INTEGER] + NOT NULL + +reprocess_behavior + + [VARCHAR(250)] + NOT NULL + +to_date + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL - + backfill--dag_run - -0..N -{0,1} + +0..N +{0,1} - + backfill--backfill_dag_run - -0..N -1 + +0..N +1 - + trigger - -trigger - -id - - [INTEGER] - NOT NULL - -classpath - - [VARCHAR(1000)] - NOT NULL - -created_date - - [TIMESTAMP] - NOT NULL - -kwargs - - [TEXT] - NOT NULL - -triggerer_id - - [INTEGER] + +trigger + +id + + [INTEGER] + NOT NULL + +classpath + + [VARCHAR(1000)] + NOT NULL + +created_date + + [TIMESTAMP] + NOT NULL + +kwargs + + [TEXT] + NOT NULL + +triggerer_id + + [INTEGER] - + trigger--task_instance - -0..N -{0,1} + +0..N +{0,1} + + + +alembic_version + +alembic_version + +version_num + + [VARCHAR(32)] + NOT NULL session - -session - -id - - [INTEGER] - NOT NULL - -data - - [BYTEA] - -expiry - - [TIMESTAMP] - -session_id - - [VARCHAR(255)] - - - -alembic_version - -alembic_version - -version_num - - [VARCHAR(32)] - NOT NULL + +session + +id + + [INTEGER] + NOT NULL + +data + + [BYTEA] + +expiry + + [TIMESTAMP] + +session_id + + [VARCHAR(255)] - + ab_user - -ab_user - -id - - [INTEGER] - NOT NULL - -active - - [BOOLEAN] - -changed_by_fk - - [INTEGER] - -changed_on - - [TIMESTAMP] - -created_by_fk - - [INTEGER] - -created_on - - [TIMESTAMP] - -email - - [VARCHAR(512)] - NOT NULL - -fail_login_count - - [INTEGER] - -first_name - - [VARCHAR(256)] - NOT NULL - -last_login - - [TIMESTAMP] - -last_name - - [VARCHAR(256)] - NOT NULL - -login_count - - [INTEGER] - -password - - [VARCHAR(256)] - -username - - [VARCHAR(512)] - NOT NULL + +ab_user + +id + + [INTEGER] + NOT NULL + +active + + [BOOLEAN] + +changed_by_fk + + [INTEGER] + +changed_on + + [TIMESTAMP] + +created_by_fk + + [INTEGER] + +created_on + + [TIMESTAMP] + +email + + [VARCHAR(512)] + NOT NULL + +fail_login_count + + [INTEGER] + +first_name + + [VARCHAR(256)] + NOT NULL + +last_login + + [TIMESTAMP] + +last_name + + [VARCHAR(256)] + NOT NULL + +login_count + + [INTEGER] + +password + + [VARCHAR(256)] + +username + + [VARCHAR(512)] + NOT NULL - + ab_user--ab_user - -0..N -{0,1} + +0..N +{0,1} - + ab_user--ab_user - -0..N -{0,1} + +0..N +{0,1} - + ab_user_role - -ab_user_role - -id - - [INTEGER] - NOT NULL - -role_id - - [INTEGER] - -user_id - - [INTEGER] + +ab_user_role + +id + + [INTEGER] + NOT NULL + +role_id + + [INTEGER] + +user_id + + [INTEGER] - + ab_user--ab_user_role - -0..N -{0,1} + +0..N +{0,1} - + ab_register_user - -ab_register_user - -id - - [INTEGER] - NOT NULL - -email - - [VARCHAR(512)] - NOT NULL - -first_name - - [VARCHAR(256)] - NOT NULL - -last_name - - [VARCHAR(256)] - NOT NULL - -password - - [VARCHAR(256)] - -registration_date - - [TIMESTAMP] - -registration_hash - - [VARCHAR(256)] - -username - - [VARCHAR(512)] - NOT NULL + +ab_register_user + +id + + [INTEGER] + NOT NULL + +email + + [VARCHAR(512)] + NOT NULL + +first_name + + [VARCHAR(256)] + NOT NULL + +last_name + + [VARCHAR(256)] + NOT NULL + +password + + [VARCHAR(256)] + +registration_date + + [TIMESTAMP] + +registration_hash + + [VARCHAR(256)] + +username + + [VARCHAR(512)] + NOT NULL - + ab_permission - -ab_permission - -id - - [INTEGER] - NOT NULL - -name - - [VARCHAR(100)] - NOT NULL + +ab_permission + +id + + [INTEGER] + NOT NULL + +name + + [VARCHAR(100)] + NOT NULL - + ab_permission_view - -ab_permission_view - -id - - [INTEGER] - NOT NULL - -permission_id - - [INTEGER] - -view_menu_id - - [INTEGER] + +ab_permission_view + +id + + [INTEGER] + NOT NULL + +permission_id + + [INTEGER] + +view_menu_id + + [INTEGER] - + ab_permission--ab_permission_view - -0..N -{0,1} + +0..N +{0,1} - + ab_permission_view_role - -ab_permission_view_role - -id - - [INTEGER] - NOT NULL - -permission_view_id - - [INTEGER] - -role_id - - [INTEGER] + +ab_permission_view_role + +id + + [INTEGER] + NOT NULL + +permission_view_id + + [INTEGER] + +role_id + + [INTEGER] - + ab_permission_view--ab_permission_view_role - -0..N -{0,1} + +0..N +{0,1} - + ab_view_menu - -ab_view_menu - -id - - [INTEGER] - NOT NULL - -name - - [VARCHAR(250)] - NOT NULL + +ab_view_menu + +id + + [INTEGER] + NOT NULL + +name + + [VARCHAR(250)] + NOT NULL - + ab_view_menu--ab_permission_view - -0..N -{0,1} + +0..N +{0,1} - + ab_role - -ab_role - -id - - [INTEGER] - NOT NULL - -name - - [VARCHAR(64)] - NOT NULL + +ab_role + +id + + [INTEGER] + NOT NULL + +name + + [VARCHAR(64)] + NOT NULL - + ab_role--ab_user_role - -0..N -{0,1} + +0..N +{0,1} - + ab_role--ab_permission_view_role - -0..N -{0,1} + +0..N +{0,1} - + alembic_version_fab - -alembic_version_fab - -version_num - - [VARCHAR(32)] - NOT NULL + +alembic_version_fab + +version_num + + [VARCHAR(32)] + NOT NULL diff --git a/docs/apache-airflow/migrations-ref.rst b/docs/apache-airflow/migrations-ref.rst index bc73f387a2d28..61dde39958e21 100644 --- a/docs/apache-airflow/migrations-ref.rst +++ b/docs/apache-airflow/migrations-ref.rst @@ -39,9 +39,7 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``2b47dc6bc8df`` (head) | ``d03e4a635aa3`` | ``3.0.0`` | add dag versioning. | -+-------------------------+------------------+-------------------+--------------------------------------------------------------+ -| ``d03e4a635aa3`` | ``d8cd3297971e`` | ``3.0.0`` | Drop DAG pickling. | +| ``d03e4a635aa3`` (head) | ``d8cd3297971e`` | ``3.0.0`` | Drop DAG pickling. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``d8cd3297971e`` | ``5f57a45b8433`` | ``3.0.0`` | Add last_heartbeat_at directly to TI. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ diff --git a/hatch_build.py b/hatch_build.py index 5e95775c62f26..91b9256b4d031 100644 --- a/hatch_build.py +++ b/hatch_build.py @@ -431,7 +431,6 @@ # The issue tracking it is https://github.com/apache/airflow/issues/28723 "sqlalchemy>=1.4.36,<2.0", "sqlalchemy-jsonfield>=1.0", - "sqlalchemy-utils>=0.41.2", "tabulate>=0.7.5", "tenacity>=8.0.0,!=8.2.0", "termcolor>=1.1.0", diff --git a/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py b/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py index c1f3e71f306bd..05bac3394ef0f 100644 --- a/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py +++ b/providers/tests/fab/auth_manager/api_endpoints/test_dag_run_endpoint.py @@ -138,7 +138,6 @@ def _create_dag(self, dag_id): session.add(dag_instance) dag = DAG(dag_id=dag_id, schedule=None, params={"validated_number": Param(1, minimum=1, maximum=10)}) self.app.dag_bag.bag_dag(dag) - self.app.dag_bag.sync_to_db() return dag_instance def _create_test_dag_run(self, state=DagRunState.RUNNING, extra_dag=False, commit=True, idx_start=1): diff --git a/scripts/ci/pre_commit/check_ti_vs_tis_attributes.py b/scripts/ci/pre_commit/check_ti_vs_tis_attributes.py index d1782e6290744..16c1df48a9e8b 100755 --- a/scripts/ci/pre_commit/check_ti_vs_tis_attributes.py +++ b/scripts/ci/pre_commit/check_ti_vs_tis_attributes.py @@ -54,7 +54,6 @@ def compare_attributes(path1, path2): "rendered_task_instance_fields", # Storing last heartbeat for historic TIs is not interesting/useful "last_heartbeat_at", - "dag_version", } # exclude attrs not necessary to be in TaskInstanceHistory if not diff: return diff --git a/task_sdk/src/airflow/sdk/definitions/dag.py b/task_sdk/src/airflow/sdk/definitions/dag.py index 9a124d237ed57..479c1ea09b80c 100644 --- a/task_sdk/src/airflow/sdk/definitions/dag.py +++ b/task_sdk/src/airflow/sdk/definitions/dag.py @@ -355,7 +355,6 @@ class DAG: **Warning**: A fail stop dag can only have tasks with the default trigger rule ("all_success"). An exception will be thrown if any task in a fail stop dag has a non default trigger rule. :param dag_display_name: The display name of the DAG which appears on the UI. - :param version_name: The version name of the DAG. This is used to identify the version of the DAG. """ __serialized_fields: ClassVar[frozenset[str] | None] = None @@ -438,10 +437,6 @@ class DAG: has_on_success_callback: bool = attrs.field(init=False) has_on_failure_callback: bool = attrs.field(init=False) - version_name: str | None = attrs.field( - default=None, - validator=attrs.validators.optional(attrs.validators.instance_of(str)), - ) def __attrs_post_init__(self): from airflow.utils import timezone @@ -1068,7 +1063,6 @@ def dag( auto_register: bool = True, fail_stop: bool = False, dag_display_name: str | None = None, - version_name: str | None = None, ) -> Callable[[Callable], Callable[..., DAG]]: """ Python dag decorator which wraps a function into an Airflow DAG. diff --git a/tests/api_connexion/endpoints/test_dag_run_endpoint.py b/tests/api_connexion/endpoints/test_dag_run_endpoint.py index 6bbd63fa8b49d..576b28b153531 100644 --- a/tests/api_connexion/endpoints/test_dag_run_endpoint.py +++ b/tests/api_connexion/endpoints/test_dag_run_endpoint.py @@ -89,7 +89,6 @@ def _create_dag(self, dag_id): session.add(dag_instance) dag = DAG(dag_id=dag_id, schedule=None, params={"validated_number": Param(1, minimum=1, maximum=10)}) self.app.dag_bag.bag_dag(dag) - self.app.dag_bag.sync_to_db() return dag_instance def _create_test_dag_run(self, state=DagRunState.RUNNING, extra_dag=False, commit=True, idx_start=1): @@ -1206,14 +1205,12 @@ def test_raises_validation_error_for_invalid_params(self): assert "Invalid input for param" in response.json["detail"] @mock.patch("airflow.api_connexion.endpoints.dag_run_endpoint.get_airflow_app") - @mock.patch("airflow.api_connexion.endpoints.dag_run_endpoint.DagVersion") - def test_dagrun_creation_exception_is_handled(self, mock_get_dag_version, mock_get_app, session): + def test_dagrun_creation_exception_is_handled(self, mock_get_app, session): self._create_dag("TEST_DAG_ID") error_message = "Encountered Error" mock_get_app.return_value.dag_bag.get_dag.return_value.create_dagrun.side_effect = ValueError( error_message ) - mock_get_dag_version.get_latest_version.return_value = mock.MagicMock() response = self.client.post( "api/v1/dags/TEST_DAG_ID/dagRuns", json={"execution_date": "2020-11-10T08:25:56Z"}, diff --git a/tests/api_connexion/endpoints/test_task_endpoint.py b/tests/api_connexion/endpoints/test_task_endpoint.py index a6e47f4f6a1bb..b558f8dbf161e 100644 --- a/tests/api_connexion/endpoints/test_task_endpoint.py +++ b/tests/api_connexion/endpoints/test_task_endpoint.py @@ -78,6 +78,7 @@ def setup_dag(self, configured_app): with DAG(self.unscheduled_dag_id, start_date=None, schedule=None) as unscheduled_dag: task4 = EmptyOperator(task_id=self.unscheduled_task_id1, params={"is_unscheduled": True}) task5 = EmptyOperator(task_id=self.unscheduled_task_id2, params={"is_unscheduled": True}) + task1 >> task2 task4 >> task5 dag_bag = DagBag(os.devnull, include_examples=False) @@ -86,7 +87,6 @@ def setup_dag(self, configured_app): mapped_dag.dag_id: mapped_dag, unscheduled_dag.dag_id: unscheduled_dag, } - DagBag._sync_to_db(dag_bag.dags) configured_app.dag_bag = dag_bag # type:ignore @staticmethod @@ -246,9 +246,7 @@ def test_unscheduled_task(self): def test_should_respond_200_serialized(self): # Get the dag out of the dagbag before we patch it to an empty one - dag = self.app.dag_bag.get_dag(self.dag_id) - dag.sync_to_db() - SerializedDagModel.write_dag(dag) + SerializedDagModel.write_dag(self.app.dag_bag.get_dag(self.dag_id)) dag_bag = DagBag(os.devnull, include_examples=False, read_dags_from_db=True) patcher = unittest.mock.patch.object(self.app, "dag_bag", dag_bag) diff --git a/tests/cli/commands/test_task_command.py b/tests/cli/commands/test_task_command.py index 50e3d393f8108..ed1a2c28754f8 100644 --- a/tests/cli/commands/test_task_command.py +++ b/tests/cli/commands/test_task_command.py @@ -227,7 +227,7 @@ def test_cli_test_different_path(self, session, tmp_path): .one() ) # confirm that the serialized dag location has not been updated - assert ser_dag.dag_version.dag_code.fileloc == orig_file_path.as_posix() + assert ser_dag.fileloc == orig_file_path.as_posix() assert ser_dag.data["dag"]["_processor_dags_folder"] == orig_dags_folder.as_posix() assert ser_dag.data["dag"]["fileloc"] == orig_file_path.as_posix() assert ser_dag.dag._processor_dags_folder == orig_dags_folder.as_posix() diff --git a/tests/dag_processing/test_job_runner.py b/tests/dag_processing/test_job_runner.py index 2bf0bcb6dbb0e..192a12358e8dd 100644 --- a/tests/dag_processing/test_job_runner.py +++ b/tests/dag_processing/test_job_runner.py @@ -53,7 +53,6 @@ from airflow.jobs.dag_processor_job_runner import DagProcessorJobRunner from airflow.jobs.job import Job from airflow.models import DagBag, DagModel, DbCallbackRequest -from airflow.models.dag_version import DagVersion from airflow.models.dagcode import DagCode from airflow.models.serialized_dag import SerializedDagModel from airflow.utils import timezone @@ -665,6 +664,13 @@ def test_scan_stale_dags(self): ) assert active_dag_count == 1 + serialized_dag_count = ( + session.query(func.count(SerializedDagModel.dag_id)) + .filter(SerializedDagModel.fileloc == test_dag_path) + .scalar() + ) + assert serialized_dag_count == 1 + manager.processor._scan_stale_dags() active_dag_count = ( @@ -676,12 +682,10 @@ def test_scan_stale_dags(self): serialized_dag_count = ( session.query(func.count(SerializedDagModel.dag_id)) - .filter(SerializedDagModel.dag_id == dag.dag_id) + .filter(SerializedDagModel.fileloc == test_dag_path) .scalar() ) - # Deactivating the DagModel should not delete the SerializedDagModel - # SerializedDagModel gives history about Dags - assert serialized_dag_count == 1 + assert serialized_dag_count == 0 @pytest.mark.skip_if_database_isolation_mode # Test is broken in db isolation mode @conf_vars( @@ -1084,12 +1088,10 @@ def test_refresh_dags_dir_deactivates_deleted_zipped_dags(self, tmp_path): with mock.patch("airflow.dag_processing.manager.might_contain_dag", return_value=False): manager.processor._refresh_dag_dir() - # Deleting the python file should not delete SDM for versioning sake - assert SerializedDagModel.has_dag("test_zip_dag") - # assert code not deleted for versioning sake - assert DagCode.has_dag(dag.fileloc) - # assert dagversion was not deleted - assert DagVersion.get_latest_version(dag.dag_id) + # Assert dag removed from SDM + assert not SerializedDagModel.has_dag("test_zip_dag") + # assert code deleted + assert not DagCode.has_dag(dag.fileloc) # assert dag deactivated assert not dag.get_is_active() diff --git a/tests/dag_processing/test_processor.py b/tests/dag_processing/test_processor.py index 29aa4e15a3888..f117b3ffe4581 100644 --- a/tests/dag_processing/test_processor.py +++ b/tests/dag_processing/test_processor.py @@ -161,7 +161,6 @@ def test_execute_on_failure_callbacks_without_dag(self, mock_ti_handle_failure, with create_session() as session: session.query(TaskInstance).delete() dag = dagbag.get_dag("example_branch_operator") - dag.sync_to_db() triggered_by_kwargs = {"triggered_by": DagRunTriggeredByType.TEST} if AIRFLOW_V_3_0_PLUS else {} dagrun = dag.create_dagrun( state=State.RUNNING, diff --git a/tests/jobs/test_scheduler_job.py b/tests/jobs/test_scheduler_job.py index d0b147a5c3727..da3ccc201eb4a 100644 --- a/tests/jobs/test_scheduler_job.py +++ b/tests/jobs/test_scheduler_job.py @@ -54,7 +54,6 @@ from airflow.models.asset import AssetActive, AssetDagRunQueue, AssetEvent, AssetModel from airflow.models.backfill import Backfill, _create_backfill from airflow.models.dag import DAG, DagModel -from airflow.models.dag_version import DagVersion from airflow.models.dagbag import DagBag from airflow.models.dagrun import DagRun from airflow.models.db_callback_request import DbCallbackRequest @@ -142,10 +141,11 @@ def clean_db(): clear_db_runs() clear_db_backfills() clear_db_pools() + clear_db_dags() clear_db_import_errors() clear_db_jobs() clear_db_assets() - # DO NOT try to run clear_db_serialized_dags() or clear_db_dags here - this will break the tests + # DO NOT try to run clear_db_serialized_dags() here - this will break the tests # The tests expect DAGs to be fully loaded here via setUpClass method below @pytest.fixture(autouse=True) @@ -167,7 +167,9 @@ def set_instance_attrs(self, dagbag) -> Generator: # enqueue! self.null_exec: MockExecutor | None = MockExecutor() # Since we don't want to store the code for the DAG defined in this file - with patch("airflow.models.serialized_dag.SerializedDagModel.remove_deleted_dags"): + with patch("airflow.dag_processing.manager.SerializedDagModel.remove_deleted_dags"), patch( + "airflow.models.dag.DagCode.bulk_sync_to_db" + ): yield self.null_exec = None @@ -2873,6 +2875,7 @@ def test_dagrun_root_after_dagrun_unfinished(self, mock_executor): Noted: the DagRun state could be still in running state during CI. """ + clear_db_dags() dag_id = "test_dagrun_states_root_future" dag = self.dagbag.get_dag(dag_id) dag.sync_to_db() @@ -3313,7 +3316,7 @@ def test_verify_integrity_if_dag_not_changed(self, dag_maker): assert tis_count == 1 latest_dag_version = SerializedDagModel.get_latest_version_hash(dr.dag_id, session=session) - assert dr.dag_version.serialized_dag.dag_hash == latest_dag_version + assert dr.dag_hash == latest_dag_version session.rollback() session.close() @@ -3347,7 +3350,7 @@ def test_verify_integrity_if_dag_changed(self, dag_maker): dr = drs[0] dag_version_1 = SerializedDagModel.get_latest_version_hash(dr.dag_id, session=session) - assert dr.dag_version.serialized_dag.dag_hash == dag_version_1 + assert dr.dag_hash == dag_version_1 assert self.job_runner.dagbag.dags == {"test_verify_integrity_if_dag_changed": dag} assert len(self.job_runner.dagbag.dags.get("test_verify_integrity_if_dag_changed").tasks) == 1 @@ -3364,7 +3367,7 @@ def test_verify_integrity_if_dag_changed(self, dag_maker): drs = DagRun.find(dag_id=dag.dag_id, session=session) assert len(drs) == 1 dr = drs[0] - assert dr.dag_version.serialized_dag.dag_hash == dag_version_2 + assert dr.dag_hash == dag_version_2 assert self.job_runner.dagbag.dags == {"test_verify_integrity_if_dag_changed": dag} assert len(self.job_runner.dagbag.dags.get("test_verify_integrity_if_dag_changed").tasks) == 2 @@ -3380,7 +3383,54 @@ def test_verify_integrity_if_dag_changed(self, dag_maker): assert tis_count == 2 latest_dag_version = SerializedDagModel.get_latest_version_hash(dr.dag_id, session=session) - assert dr.dag_version.serialized_dag.dag_hash == latest_dag_version + assert dr.dag_hash == latest_dag_version + + session.rollback() + session.close() + + def test_verify_integrity_if_dag_disappeared(self, dag_maker, caplog): + # CleanUp + with create_session() as session: + session.query(SerializedDagModel).filter( + SerializedDagModel.dag_id == "test_verify_integrity_if_dag_disappeared" + ).delete(synchronize_session=False) + + with dag_maker(dag_id="test_verify_integrity_if_dag_disappeared") as dag: + BashOperator(task_id="dummy", bash_command="echo hi") + + scheduler_job = Job() + self.job_runner = SchedulerJobRunner(job=scheduler_job, subdir=os.devnull) + + session = settings.Session() + orm_dag = dag_maker.dag_model + assert orm_dag is not None + + scheduler_job = Job() + self.job_runner = SchedulerJobRunner(job=scheduler_job, subdir=os.devnull) + + self.job_runner.processor_agent = mock.MagicMock() + dag = self.job_runner.dagbag.get_dag("test_verify_integrity_if_dag_disappeared", session=session) + self.job_runner._create_dag_runs([orm_dag], session) + dag_id = dag.dag_id + drs = DagRun.find(dag_id=dag_id, session=session) + assert len(drs) == 1 + dr = drs[0] + + dag_version_1 = SerializedDagModel.get_latest_version_hash(dag_id, session=session) + assert dr.dag_hash == dag_version_1 + assert self.job_runner.dagbag.dags == {"test_verify_integrity_if_dag_disappeared": dag} + assert len(self.job_runner.dagbag.dags.get("test_verify_integrity_if_dag_disappeared").tasks) == 1 + + SerializedDagModel.remove_dag(dag_id=dag_id) + dag = self.job_runner.dagbag.dags[dag_id] + self.job_runner.dagbag.dags = MagicMock() + self.job_runner.dagbag.dags.get.side_effect = [dag, None] + session.flush() + with caplog.at_level(logging.WARNING): + callback = self.job_runner._schedule_dag_run(dr, session) + assert "The DAG disappeared before verifying integrity" in caplog.text + + assert callback is None session.rollback() session.close() @@ -3965,7 +4015,6 @@ def test_create_dag_runs_assets(self, session, dag_maker): - That the run created is on QUEUED State - That dag_model has next_dagrun """ - clear_db_dags() asset1 = Asset(uri="ds1") asset2 = Asset(uri="ds2") @@ -4337,7 +4386,6 @@ def test_do_schedule_max_active_runs_dag_timed_out(self, dag_maker): session = settings.Session() data_interval = dag.infer_automated_data_interval(DEFAULT_LOGICAL_DATE) triggered_by_kwargs = {"triggered_by": DagRunTriggeredByType.TEST} if AIRFLOW_V_3_0_PLUS else {} - dag_version = DagVersion.get_latest_version(dag.dag_id) run1 = dag.create_dagrun( run_type=DagRunType.SCHEDULED, execution_date=DEFAULT_DATE, @@ -4345,7 +4393,6 @@ def test_do_schedule_max_active_runs_dag_timed_out(self, dag_maker): start_date=timezone.utcnow() - timedelta(seconds=2), session=session, data_interval=data_interval, - dag_version=dag_version, **triggered_by_kwargs, ) @@ -4358,7 +4405,6 @@ def test_do_schedule_max_active_runs_dag_timed_out(self, dag_maker): state=State.QUEUED, session=session, data_interval=data_interval, - dag_version=dag_version, **triggered_by_kwargs, ) @@ -4556,8 +4602,10 @@ def test_do_schedule_max_active_runs_and_manual_trigger(self, dag_maker, mock_ex BashOperator(task_id="dummy3", bash_command="true") session = settings.Session() - dag_version = DagVersion.get_latest_version(dag.dag_id) - dag_run = dag_maker.create_dagrun(state=State.QUEUED, session=session, dag_version=dag_version) + dag_run = dag_maker.create_dagrun( + state=State.QUEUED, + session=session, + ) dag.sync_to_db(session=session) # Update the date fields @@ -4599,31 +4647,23 @@ def test_max_active_runs_in_a_dag_doesnt_stop_running_dag_runs_in_other_dags(sel start_date=DEFAULT_DATE, schedule=timedelta(hours=1), max_active_runs=1, - ) as dag: + ): EmptyOperator(task_id="mytask") - dag_version = DagVersion.get_latest_version(dag.dag_id) - dr = dag_maker.create_dagrun( - run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + + dr = dag_maker.create_dagrun(run_type=DagRunType.SCHEDULED, state=State.QUEUED) for _ in range(29): - dr = dag_maker.create_dagrun_after( - dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun_after(dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED) with dag_maker( "test_dag2", start_date=timezone.datetime(2020, 1, 1), schedule=timedelta(hours=1), - ) as dag2: + ): EmptyOperator(task_id="mytask") - dag_version = DagVersion.get_latest_version(dag2.dag_id) - dr = dag_maker.create_dagrun( - run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + + dr = dag_maker.create_dagrun(run_type=DagRunType.SCHEDULED, state=State.QUEUED) for _ in range(9): - dr = dag_maker.create_dagrun_after( - dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun_after(dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED) scheduler_job = Job() self.job_runner = SchedulerJobRunner(job=scheduler_job, subdir=os.devnull) @@ -4655,29 +4695,20 @@ def test_max_active_runs_in_a_dag_doesnt_prevent_backfill_from_running(self, dag ) as dag: EmptyOperator(task_id="mytask") dag1_dag_id = dag.dag_id - dag_version = DagVersion.get_latest_version(dag1_dag_id) - dr = dag_maker.create_dagrun( - run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun(run_type=DagRunType.SCHEDULED, state=State.QUEUED) for _ in range(29): - dr = dag_maker.create_dagrun_after( - dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun_after(dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED) with dag_maker( "test_dag2", start_date=timezone.datetime(2020, 1, 1), schedule=timedelta(days=1), - ) as dag: + ): EmptyOperator(task_id="mytask") - dag_version = DagVersion.get_latest_version(dag.dag_id) - dr = dag_maker.create_dagrun( - run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + + dr = dag_maker.create_dagrun(run_type=DagRunType.SCHEDULED, state=State.QUEUED) for _ in range(9): - dr = dag_maker.create_dagrun_after( - dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun_after(dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED) scheduler_job = Job() self.job_runner = SchedulerJobRunner(job=scheduler_job, subdir=os.devnull) @@ -4811,30 +4842,19 @@ def _running_counts(): ) dag1_non_b_running, dag1_b_running, total_running = _running_counts() - dag_version = DagVersion.get_latest_version(dag1_dag_id) # now let's create some "normal" dag runs and verify that they can run - dr = dag_maker.create_dagrun( - run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun(run_type=DagRunType.SCHEDULED, state=State.QUEUED) for _ in range(29): - dr = dag_maker.create_dagrun_after( - dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun_after(dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED) with dag_maker( "test_dag2", start_date=timezone.datetime(2020, 1, 1), schedule=timedelta(days=1), - ) as dag2: + ): EmptyOperator(task_id="mytask") - - dag_version = DagVersion.get_latest_version(dag2.dag_id) - dr = dag_maker.create_dagrun( - run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun(run_type=DagRunType.SCHEDULED, state=State.QUEUED) for _ in range(9): - dr = dag_maker.create_dagrun_after( - dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun_after(dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED) # initial state -- nothing is running assert dag1_non_b_running == 0 @@ -4962,30 +4982,19 @@ def _running_counts(): assert session.scalar(select(func.count()).select_from(DagRun)) == 6 assert session.scalar(select(func.count()).where(DagRun.dag_id == dag1_dag_id)) == 6 - dag_version = DagVersion.get_latest_version(dag1_dag_id) # now let's create some "normal" dag runs and verify that they can run - dr = dag_maker.create_dagrun( - run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun(run_type=DagRunType.SCHEDULED, state=State.QUEUED) for _ in range(29): - dr = dag_maker.create_dagrun_after( - dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun_after(dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED) with dag_maker( "test_dag2", start_date=timezone.datetime(2020, 1, 1), schedule=timedelta(days=1), - ) as dag2: + ): EmptyOperator(task_id="mytask") - - dag_version = DagVersion.get_latest_version(dag2.dag_id) - dr = dag_maker.create_dagrun( - run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun(run_type=DagRunType.SCHEDULED, state=State.QUEUED) for _ in range(9): - dr = dag_maker.create_dagrun_after( - dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun_after(dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED) # ok at this point, there are new dag runs created, but no new running runs dag1_non_b_running, dag1_b_running, total_running = _running_counts() @@ -5121,14 +5130,9 @@ def test_start_queued_dagruns_do_follow_execution_date_order(self, dag_maker): with dag_maker("test_dag1", max_active_runs=1): EmptyOperator(task_id="mytask") date = DEFAULT_DATE - dag_version = DagVersion.get_latest_version("test_dag1") for i in range(30): dr = dag_maker.create_dagrun( - run_id=f"dagrun_{i}", - run_type=DagRunType.SCHEDULED, - state=State.QUEUED, - execution_date=date, - dag_version=dag_version, + run_id=f"dagrun_{i}", run_type=DagRunType.SCHEDULED, state=State.QUEUED, execution_date=date ) date = dr.execution_date + timedelta(hours=1) scheduler_job = Job() @@ -5171,15 +5175,11 @@ def test_no_dagruns_would_stuck_in_running(self, dag_maker): with dag_maker("test_dagrun_states_are_correct_1", max_active_runs=1, start_date=date) as dag: task1 = EmptyOperator(task_id="dummy_task") - dag_version = DagVersion.get_latest_version(dag.dag_id) - dr1_running = dag_maker.create_dagrun( - run_id="dr1_run_1", execution_date=date, dag_version=dag_version - ) + dr1_running = dag_maker.create_dagrun(run_id="dr1_run_1", execution_date=date) data_interval = dag.infer_automated_data_interval(logical_date) dag_maker.create_dagrun( run_id="dr1_run_2", state=State.QUEUED, - dag_version=dag_version, execution_date=dag.next_dagrun_info( last_automated_dagrun=data_interval, restricted=False ).data_interval.start, @@ -5188,48 +5188,26 @@ def test_no_dagruns_would_stuck_in_running(self, dag_maker): date = timezone.datetime(2020, 1, 1) with dag_maker("test_dagrun_states_are_correct_2", start_date=date) as dag: EmptyOperator(task_id="dummy_task") - dag_version = DagVersion.get_latest_version(dag.dag_id) for i in range(16): - dr = dag_maker.create_dagrun( - run_id=f"dr2_run_{i+1}", - state=State.RUNNING, - execution_date=date, - dag_version=dag_version, - ) + dr = dag_maker.create_dagrun(run_id=f"dr2_run_{i+1}", state=State.RUNNING, execution_date=date) date = dr.execution_date + timedelta(hours=1) dr16 = DagRun.find(run_id="dr2_run_16") date = dr16[0].execution_date + timedelta(hours=1) for i in range(16, 32): - dr = dag_maker.create_dagrun( - run_id=f"dr2_run_{i+1}", - state=State.QUEUED, - execution_date=date, - dag_version=dag_version, - ) + dr = dag_maker.create_dagrun(run_id=f"dr2_run_{i+1}", state=State.QUEUED, execution_date=date) date = dr.execution_date + timedelta(hours=1) # third dag and dagruns date = timezone.datetime(2021, 1, 1) with dag_maker("test_dagrun_states_are_correct_3", start_date=date) as dag: EmptyOperator(task_id="dummy_task") - dag_version = DagVersion.get_latest_version(dag.dag_id) for i in range(16): - dr = dag_maker.create_dagrun( - run_id=f"dr3_run_{i+1}", - state=State.RUNNING, - execution_date=date, - dag_version=dag_version, - ) + dr = dag_maker.create_dagrun(run_id=f"dr3_run_{i+1}", state=State.RUNNING, execution_date=date) date = dr.execution_date + timedelta(hours=1) dr16 = DagRun.find(run_id="dr3_run_16") date = dr16[0].execution_date + timedelta(hours=1) for i in range(16, 32): - dr = dag_maker.create_dagrun( - run_id=f"dr2_run_{i+1}", - state=State.QUEUED, - execution_date=date, - dag_version=dag_version, - ) + dr = dag_maker.create_dagrun(run_id=f"dr2_run_{i+1}", state=State.QUEUED, execution_date=date) date = dr.execution_date + timedelta(hours=1) scheduler_job = Job() @@ -5508,17 +5486,11 @@ def test_runs_respected_after_clear(self, dag_maker): self.job_runner = SchedulerJobRunner(job=scheduler_job, subdir=os.devnull) self.job_runner.processor_agent = mock.MagicMock() - dag_version = DagVersion.get_latest_version(dag.dag_id) + session = settings.Session() - dr = dag_maker.create_dagrun( - run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) - dr = dag_maker.create_dagrun_after( - dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) - dag_maker.create_dagrun_after( - dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED, dag_version=dag_version - ) + dr = dag_maker.create_dagrun(run_type=DagRunType.SCHEDULED, state=State.QUEUED) + dr = dag_maker.create_dagrun_after(dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED) + dag_maker.create_dagrun_after(dr, run_type=DagRunType.SCHEDULED, state=State.QUEUED) dag.clear() assert len(DagRun.find(dag_id=dag.dag_id, state=State.QUEUED, session=session)) == 3 @@ -5844,7 +5816,6 @@ def test_find_zombies_handle_failure_callbacks_are_correctly_passed_to_dag_proce assert expected_failure_callback_requests[0] == callback_requests[0] def test_cleanup_stale_dags(self): - clear_db_dags() dagbag = DagBag(TEST_DAG_FOLDER, read_dags_from_db=False) with create_session() as session: dag = dagbag.get_dag("test_example_bash_operator") diff --git a/tests/models/test_dag.py b/tests/models/test_dag.py index fc7a2b24b836a..e38beb2110ca9 100644 --- a/tests/models/test_dag.py +++ b/tests/models/test_dag.py @@ -62,7 +62,6 @@ dag as dag_decorator, get_asset_triggered_next_run_info, ) -from airflow.models.dag_version import DagVersion from airflow.models.dagrun import DagRun from airflow.models.param import DagParam, Param from airflow.models.serialized_dag import SerializedDagModel @@ -142,11 +141,14 @@ def setup_method(self) -> None: clear_db_runs() clear_db_dags() clear_db_assets() + self.patcher_dag_code = mock.patch("airflow.models.dag.DagCode.bulk_sync_to_db") + self.patcher_dag_code.start() def teardown_method(self) -> None: clear_db_runs() clear_db_dags() clear_db_assets() + self.patcher_dag_code.stop() @staticmethod def _clean_up(dag_id: str): @@ -1037,16 +1039,14 @@ def test_existing_dag_is_paused_config(self): assert dag.max_consecutive_failed_dag_runs == 2 def test_existing_dag_is_paused_after_limit(self): - def add_failed_dag_run(dag, id, execution_date): + def add_failed_dag_run(id, execution_date): triggered_by_kwargs = {"triggered_by": DagRunTriggeredByType.TEST} if AIRFLOW_V_3_0_PLUS else {} - dag_v = DagVersion.get_latest_version(dag_id=dag.dag_id) dr = dag.create_dagrun( run_type=DagRunType.MANUAL, run_id="run_id_" + id, execution_date=execution_date, state=State.FAILED, data_interval=(execution_date, execution_date), - dag_version=dag_v, **triggered_by_kwargs, ) ti_op1 = dr.get_task_instance(task_id=op1.task_id, session=session) @@ -1059,16 +1059,14 @@ def add_failed_dag_run(dag, id, execution_date): dag.add_task(op1) session = settings.Session() dag.sync_to_db(session=session) - SerializedDagModel.write_dag(dag) assert not dag.get_is_paused() # dag should be paused after 2 failed dag_runs add_failed_dag_run( - dag, "1", TEST_DATE, ) - add_failed_dag_run(dag, "2", TEST_DATE + timedelta(days=1)) + add_failed_dag_run("2", TEST_DATE + timedelta(days=1)) assert dag.get_is_paused() dag.clear() self._clean_up(dag_id) @@ -1087,7 +1085,8 @@ def test_dag_is_deactivated_upon_dagfile_deletion(self): dag = DAG(dag_id, schedule=None, is_paused_upon_creation=True) dag.fileloc = dag_fileloc session = settings.Session() - dag.sync_to_db(session=session, processor_subdir="/usr/local/airflow/dags/") + with mock.patch("airflow.models.dag.DagCode.bulk_sync_to_db"): + dag.sync_to_db(session=session, processor_subdir="/usr/local/airflow/dags/") orm_dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).one() @@ -2371,8 +2370,9 @@ def test_relative_fileloc_serialized( """ dag = DAG(dag_id="test", schedule=None) dag.fileloc = fileloc - dag.sync_to_db() - SerializedDagModel.write_dag(dag) + sdm = SerializedDagModel(dag) + session.add(sdm) + session.commit() session.expunge_all() sdm = SerializedDagModel.get(dag.dag_id, session) dag = sdm.dag @@ -2383,10 +2383,8 @@ def test__processor_dags_folder(self, session): """Only populated after deserializtion""" dag = DAG(dag_id="test", schedule=None) dag.fileloc = "/abc/test.py" - dag.sync_to_db() assert dag._processor_dags_folder is None - SerializedDagModel.write_dag(dag) - sdm = SerializedDagModel.get(dag.dag_id, session) + sdm = SerializedDagModel(dag) assert sdm.dag._processor_dags_folder == settings.DAGS_FOLDER @pytest.mark.need_serialized_dag diff --git a/tests/models/test_dag_version.py b/tests/models/test_dag_version.py deleted file mode 100644 index 42a33b4b66f13..0000000000000 --- a/tests/models/test_dag_version.py +++ /dev/null @@ -1,113 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import pytest -from sqlalchemy import func, select - -from airflow.models.dag_version import DagVersion -from airflow.models.serialized_dag import SerializedDagModel -from airflow.operators.empty import EmptyOperator - -from tests_common.test_utils.db import clear_db_dags - -pytestmark = [pytest.mark.db_test, pytest.mark.skip_if_database_isolation_mode] - - -class TestDagVersion: - def setup_method(self): - clear_db_dags() - - def teardown_method(self): - clear_db_dags() - - @pytest.mark.need_serialized_dag - def test_writing_dag_version(self, dag_maker, session): - with dag_maker("test_writing_dag_version") as dag: - pass - - latest_version = DagVersion.get_latest_version(dag.dag_id) - assert latest_version.version_number == 1 - assert not latest_version.version_name - assert latest_version.dag_id == dag.dag_id - - @pytest.mark.need_serialized_dag - def test_writing_dag_version_with_version_name(self, dag_maker, session): - version_name = "my_version" - with dag_maker(version_name=version_name) as dag: - pass - - latest_version = DagVersion.get_latest_version(dag.dag_id) - assert latest_version.version_number == 1 - assert latest_version.version_name == version_name - assert latest_version.dag_id == dag.dag_id - - def test_writing_dag_version_with_changes(self, dag_maker, session): - """This also tested the get_latest_version method""" - version_name = "my_version" - with dag_maker("test1", version_name=version_name) as dag: - EmptyOperator(task_id="task1") - dag.sync_to_db() - SerializedDagModel.write_dag(dag) - # Add extra task to change the dag - with dag_maker("test1", version_name=version_name) as dag2: - EmptyOperator(task_id="task1") - EmptyOperator(task_id="task2") - dag2.sync_to_db() - SerializedDagModel.write_dag(dag2) - - latest_version = DagVersion.get_latest_version(dag.dag_id) - assert latest_version.version_number == 2 - assert latest_version.version_name == version_name - assert 2 == session.scalar(select(func.count()).where(DagVersion.dag_id == dag.dag_id)) - - @pytest.mark.need_serialized_dag - def test_get_version(self, dag_maker, session): - """The two dags have the same version name and number but different dag ids""" - version_name = "my_version" - dag1_id = "test1" - with dag_maker(dag1_id, version_name=version_name): - EmptyOperator(task_id="task1") - - with dag_maker("test2", version_name=version_name): - EmptyOperator(task_id="task1") - - with dag_maker("test3"): - EmptyOperator(task_id="task1") - - version = DagVersion.get_version(dag1_id) - assert version.version_number == 1 - assert version.version_name == version_name - assert version.dag_id == dag1_id - assert version.version == "my_version-1" - - @pytest.mark.need_serialized_dag - def test_version_property(self, dag_maker): - version_name = "my_version" - with dag_maker("test1", version_name=version_name) as dag: - pass - - latest_version = DagVersion.get_latest_version(dag.dag_id) - assert latest_version.version == f"{version_name}-1" - - @pytest.mark.need_serialized_dag - def test_version_property_with_null_version_name(self, dag_maker): - with dag_maker("test1") as dag: - pass - - latest_version = DagVersion.get_latest_version(dag.dag_id) - assert latest_version.version == "1" diff --git a/tests/models/test_dagbag.py b/tests/models/test_dagbag.py index 6915e4df0c1fc..d91f6738822a1 100644 --- a/tests/models/test_dagbag.py +++ b/tests/models/test_dagbag.py @@ -64,10 +64,10 @@ def db_clean_up(): class TestDagBag: - def setup_class(cls): + def setup_class(self): db_clean_up() - def teardown_class(cls): + def teardown_class(self): db_clean_up() def test_get_existing_dag(self, tmp_path): @@ -723,7 +723,6 @@ def _sync_to_db(): dagbag.sync_to_db(session=session) dag = dagbag.dags["test_example_bash_operator"] - dag.sync_to_db() _sync_to_db() mock_sync_perm_for_dag.assert_called_once_with(dag, session=session) @@ -821,7 +820,6 @@ def test_get_dag_with_dag_serialization(self): with time_machine.travel((tz.datetime(2020, 1, 5, 0, 0, 0)), tick=False): example_bash_op_dag = DagBag(include_examples=True).dags.get("example_bash_operator") - example_bash_op_dag.sync_to_db() SerializedDagModel.write_dag(dag=example_bash_op_dag) dag_bag = DagBag(read_dags_from_db=True) @@ -839,7 +837,6 @@ def test_get_dag_with_dag_serialization(self): # Make a change in the DAG and write Serialized DAG to the DB with time_machine.travel((tz.datetime(2020, 1, 5, 0, 0, 6)), tick=False): example_bash_op_dag.tags.add("new_tag") - example_bash_op_dag.sync_to_db() SerializedDagModel.write_dag(dag=example_bash_op_dag) # Since min_serialized_dag_fetch_interval is passed verify that calling 'dag_bag.get_dag' @@ -855,16 +852,15 @@ def test_get_dag_with_dag_serialization(self): @pytest.mark.skip_if_database_isolation_mode # Does not work in db isolation mode @patch("airflow.models.dagbag.settings.MIN_SERIALIZED_DAG_UPDATE_INTERVAL", 5) @patch("airflow.models.dagbag.settings.MIN_SERIALIZED_DAG_FETCH_INTERVAL", 5) - def test_get_dag_refresh_race_condition(self, session): + def test_get_dag_refresh_race_condition(self): """ Test that DagBag.get_dag correctly refresh the Serialized DAG even if SerializedDagModel.last_updated is before DagBag.dags_last_fetched. """ - db_clean_up() + # serialize the initial version of the DAG with time_machine.travel((tz.datetime(2020, 1, 5, 0, 0, 0)), tick=False): example_bash_op_dag = DagBag(include_examples=True).dags.get("example_bash_operator") - example_bash_op_dag.sync_to_db() SerializedDagModel.write_dag(dag=example_bash_op_dag) # deserialize the DAG @@ -890,7 +886,6 @@ def test_get_dag_refresh_race_condition(self, session): # long before the transaction is committed with time_machine.travel((tz.datetime(2020, 1, 5, 1, 0, 0)), tick=False): example_bash_op_dag.tags.add("new_tag") - example_bash_op_dag.sync_to_db() SerializedDagModel.write_dag(dag=example_bash_op_dag) # Since min_serialized_dag_fetch_interval is passed verify that calling 'dag_bag.get_dag' @@ -911,7 +906,6 @@ def test_collect_dags_from_db(self): example_dags = dagbag.dags for dag in example_dags.values(): - dag.sync_to_db() SerializedDagModel.write_dag(dag) new_dagbag = DagBag(read_dags_from_db=True) diff --git a/tests/models/test_dagcode.py b/tests/models/test_dagcode.py index fd7d761f9103f..26b29ea8f9c02 100644 --- a/tests/models/test_dagcode.py +++ b/tests/models/test_dagcode.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +from datetime import timedelta from unittest.mock import patch import pytest @@ -24,16 +25,13 @@ import airflow.example_dags as example_dags_module from airflow.exceptions import AirflowException from airflow.models import DagBag -from airflow.models.dag import DAG -from airflow.models.dag_version import DagVersion from airflow.models.dagcode import DagCode -from airflow.models.serialized_dag import SerializedDagModel as SDM # To move it to a shared module. from airflow.utils.file import open_maybe_zipped from airflow.utils.session import create_session -from tests_common.test_utils.db import clear_db_dag_code, clear_db_dags +from tests_common.test_utils.db import clear_db_dag_code pytestmark = [pytest.mark.db_test, pytest.mark.skip_if_database_isolation_mode] @@ -41,7 +39,6 @@ def make_example_dags(module): """Loads DAGs from a module for test.""" dagbag = DagBag(module.__path__[0]) - DAG.bulk_write_to_db(dagbag.dags.values()) return dagbag.dags @@ -49,35 +46,55 @@ class TestDagCode: """Unit tests for DagCode.""" def setup_method(self): - clear_db_dags() clear_db_dag_code() def teardown_method(self): - clear_db_dags() clear_db_dag_code() def _write_two_example_dags(self): example_dags = make_example_dags(example_dags_module) bash_dag = example_dags["example_bash_operator"] - dag_version = DagVersion.get_latest_version("example_bash_operator") - DagCode(dag_version, bash_dag.fileloc).sync_to_db() + DagCode(bash_dag.fileloc).sync_to_db() xcom_dag = example_dags["example_xcom"] - dag_version = DagVersion.get_latest_version("example_xcom") - DagCode(dag_version, xcom_dag.fileloc).sync_to_db() + DagCode(xcom_dag.fileloc).sync_to_db() return [bash_dag, xcom_dag] def _write_example_dags(self): example_dags = make_example_dags(example_dags_module) for dag in example_dags.values(): - SDM.write_dag(dag) + dag.sync_to_db() return example_dags - def test_write_to_db(self): + def test_sync_to_db(self): """Dg code can be written into database.""" example_dags = self._write_example_dags() self._compare_example_dags(example_dags) + def test_bulk_sync_to_db(self): + """Dg code can be bulk written into database.""" + example_dags = make_example_dags(example_dags_module) + files = [dag.fileloc for dag in example_dags.values()] + with create_session() as session: + DagCode.bulk_sync_to_db(files, session=session) + session.commit() + + self._compare_example_dags(example_dags) + + def test_bulk_sync_to_db_half_files(self): + """Dg code can be bulk written into database.""" + example_dags = make_example_dags(example_dags_module) + files = [dag.fileloc for dag in example_dags.values()] + half_files = files[: len(files) // 2] + with create_session() as session: + DagCode.bulk_sync_to_db(half_files, session=session) + session.commit() + with create_session() as session: + DagCode.bulk_sync_to_db(files, session=session) + session.commit() + + self._compare_example_dags(example_dags) + @patch.object(DagCode, "dag_fileloc_hash") def test_detecting_duplicate_key(self, mock_hash): """Dag code detects duplicate key.""" @@ -95,8 +112,6 @@ def _compare_example_dags(self, example_dags): session.query(DagCode.fileloc, DagCode.fileloc_hash, DagCode.source_code) .filter(DagCode.fileloc == dag.fileloc) .filter(DagCode.fileloc_hash == dag_fileloc_hash) - .order_by(DagCode.last_updated.desc()) - .limit(1) .one() ) @@ -111,7 +126,7 @@ def test_code_can_be_read_when_no_access_to_file(self): Source Code should at least exist in one of DB or File. """ example_dag = make_example_dags(example_dags_module).get("example_bash_operator") - SDM.write_dag(example_dag) + example_dag.sync_to_db() # Mock that there is no access to the Dag File with patch("airflow.models.dagcode.open_maybe_zipped") as mock_open: @@ -121,50 +136,27 @@ def test_code_can_be_read_when_no_access_to_file(self): for test_string in ["example_bash_operator", "also_run_this", "run_this_last"]: assert test_string in dag_code - def test_db_code_created_on_serdag_change(self, session): - """Test new DagCode is created in DB when DAG file is changed""" + def test_db_code_updated_on_dag_file_change(self): + """Test if DagCode is updated in DB when DAG file is changed""" example_dag = make_example_dags(example_dags_module).get("example_bash_operator") - SDM.write_dag(example_dag) + example_dag.sync_to_db() + + with create_session() as session: + result = session.query(DagCode).filter(DagCode.fileloc == example_dag.fileloc).one() - result = ( - session.query(DagCode) - .filter(DagCode.fileloc == example_dag.fileloc) - .order_by(DagCode.last_updated.desc()) - .limit(1) - .one() - ) + assert result.fileloc == example_dag.fileloc + assert result.source_code is not None - assert result.fileloc == example_dag.fileloc - assert result.source_code is not None + with patch("airflow.models.dagcode.os.path.getmtime") as mock_mtime: + mock_mtime.return_value = (result.last_updated + timedelta(seconds=1)).timestamp() - example_dag = make_example_dags(example_dags_module).get("example_bash_operator") - SDM.write_dag(example_dag, processor_subdir="/tmp") - with patch("airflow.models.dagcode.DagCode._get_code_from_file") as mock_code: - mock_code.return_value = "# dummy code" - SDM.write_dag(example_dag) - - new_result = ( - session.query(DagCode) - .filter(DagCode.fileloc == example_dag.fileloc) - .order_by(DagCode.last_updated.desc()) - .limit(1) - .one() - ) - - assert new_result.fileloc == example_dag.fileloc - assert new_result.source_code != result.source_code - assert new_result.last_updated > result.last_updated - - def test_has_dag(self, dag_maker): - """Test has_dag method.""" - with dag_maker("test_has_dag") as dag: - pass - dag.sync_to_db() - SDM.write_dag(dag) - - with dag_maker() as dag2: - pass - dag2.sync_to_db() - SDM.write_dag(dag2) - - assert DagCode.has_dag(dag.fileloc) + with patch("airflow.models.dagcode.DagCode._get_code_from_file") as mock_code: + mock_code.return_value = "# dummy code" + example_dag.sync_to_db() + + with create_session() as session: + new_result = session.query(DagCode).filter(DagCode.fileloc == example_dag.fileloc).one() + + assert new_result.fileloc == example_dag.fileloc + assert new_result.source_code == "# dummy code" + assert new_result.last_updated > result.last_updated diff --git a/tests/models/test_dagrun.py b/tests/models/test_dagrun.py index dc5a8ab66e566..e8889fb102236 100644 --- a/tests/models/test_dagrun.py +++ b/tests/models/test_dagrun.py @@ -88,7 +88,6 @@ def _clean_db(): db.clear_db_variables() db.clear_db_assets() db.clear_db_xcom() - db.clear_db_dags() def create_dag_run( self, diff --git a/tests/models/test_serialized_dag.py b/tests/models/test_serialized_dag.py index d0bfe37a69cc0..cdff883760c92 100644 --- a/tests/models/test_serialized_dag.py +++ b/tests/models/test_serialized_dag.py @@ -23,7 +23,7 @@ import pendulum import pytest -from sqlalchemy import func, select +from sqlalchemy import select import airflow.example_dags as example_dags_module from airflow.assets import Asset @@ -31,7 +31,6 @@ from airflow.models.dagbag import DagBag from airflow.models.dagcode import DagCode from airflow.models.serialized_dag import SerializedDagModel as SDM -from airflow.operators.empty import EmptyOperator from airflow.providers.standard.operators.bash import BashOperator from airflow.serialization.serialized_objects import SerializedDAG from airflow.settings import json @@ -48,7 +47,6 @@ def make_example_dags(module): """Loads DAGs from a module for test.""" dagbag = DagBag(module.__path__[0]) - DAG.bulk_write_to_db(dagbag.dags.values()) return dagbag.dags @@ -63,7 +61,6 @@ class TestSerializedDagModel: ], ) def setup_test_cases(self, request, monkeypatch): - db.clear_db_dags() db.clear_db_serialized_dags() with mock.patch("airflow.models.serialized_dag.COMPRESS_SERIALIZED_DAGS", request.param): yield @@ -89,7 +86,7 @@ def test_write_dag(self): assert SDM.has_dag(dag.dag_id) result = session.query(SDM).filter(SDM.dag_id == dag.dag_id).one() - assert result.dag_version.dag_code.fileloc == dag.fileloc + assert result.fileloc == dag.fileloc # Verifies JSON schema. SerializedDAG.validate_schema(result.data) @@ -101,28 +98,29 @@ def test_serialized_dag_is_updated_if_dag_is_changed(self): dag_updated = SDM.write_dag(dag=example_bash_op_dag) assert dag_updated is True - s_dag = SDM.get(example_bash_op_dag.dag_id) + with create_session() as session: + s_dag = session.get(SDM, example_bash_op_dag.dag_id) - # Test that if DAG is not changed, Serialized DAG is not re-written and last_updated - # column is not updated - dag_updated = SDM.write_dag(dag=example_bash_op_dag) - s_dag_1 = SDM.get(example_bash_op_dag.dag_id) + # Test that if DAG is not changed, Serialized DAG is not re-written and last_updated + # column is not updated + dag_updated = SDM.write_dag(dag=example_bash_op_dag) + s_dag_1 = session.get(SDM, example_bash_op_dag.dag_id) - assert s_dag_1.dag_hash == s_dag.dag_hash - assert s_dag.last_updated == s_dag_1.last_updated - assert dag_updated is False + assert s_dag_1.dag_hash == s_dag.dag_hash + assert s_dag.last_updated == s_dag_1.last_updated + assert dag_updated is False - # Update DAG - example_bash_op_dag.tags.add("new_tag") - assert example_bash_op_dag.tags == {"example", "example2", "new_tag"} + # Update DAG + example_bash_op_dag.tags.add("new_tag") + assert example_bash_op_dag.tags == {"example", "example2", "new_tag"} - dag_updated = SDM.write_dag(dag=example_bash_op_dag) - s_dag_2 = SDM.get(example_bash_op_dag.dag_id) + dag_updated = SDM.write_dag(dag=example_bash_op_dag) + s_dag_2 = session.get(SDM, example_bash_op_dag.dag_id) - assert s_dag.last_updated != s_dag_2.last_updated - assert s_dag.dag_hash != s_dag_2.dag_hash - assert s_dag_2.data["dag"]["tags"] == ["example", "example2", "new_tag"] - assert dag_updated is True + assert s_dag.last_updated != s_dag_2.last_updated + assert s_dag.dag_hash != s_dag_2.dag_hash + assert s_dag_2.data["dag"]["tags"] == ["example", "example2", "new_tag"] + assert dag_updated is True @pytest.mark.skip_if_database_isolation_mode # Does not work in db isolation mode def test_serialized_dag_is_updated_if_processor_subdir_changed(self): @@ -133,12 +131,12 @@ def test_serialized_dag_is_updated_if_processor_subdir_changed(self): assert dag_updated is True with create_session() as session: - s_dag = SDM.get(example_bash_op_dag.dag_id) + s_dag = session.get(SDM, example_bash_op_dag.dag_id) # Test that if DAG is not changed, Serialized DAG is not re-written and last_updated # column is not updated dag_updated = SDM.write_dag(dag=example_bash_op_dag, processor_subdir="/tmp/test") - s_dag_1 = SDM.get(example_bash_op_dag.dag_id) + s_dag_1 = session.get(SDM, example_bash_op_dag.dag_id) assert s_dag_1.dag_hash == s_dag.dag_hash assert s_dag.last_updated == s_dag_1.last_updated @@ -147,7 +145,7 @@ def test_serialized_dag_is_updated_if_processor_subdir_changed(self): # Update DAG dag_updated = SDM.write_dag(dag=example_bash_op_dag, processor_subdir="/tmp/other") - s_dag_2 = SDM.get(example_bash_op_dag.dag_id) + s_dag_2 = session.get(SDM, example_bash_op_dag.dag_id) assert s_dag.processor_subdir != s_dag_2.processor_subdir assert dag_updated is True @@ -164,19 +162,6 @@ def test_read_dags(self): assert serialized_dag.dag_id == dag.dag_id assert set(serialized_dag.task_dict) == set(dag.task_dict) - @pytest.mark.skip_if_database_isolation_mode # Does not work in db isolation mode - def test_read_all_dags_only_picks_the_latest_serdags(self, session): - example_dags = self._write_example_dags() - serialized_dags = SDM.read_all_dags() - assert len(example_dags) == len(serialized_dags) - - ex_dags = make_example_dags(example_dags_module) - SDM.write_dag(ex_dags.get("example_bash_operator"), processor_subdir="/tmp/") - serialized_dags2 = SDM.read_all_dags() - sdags = session.query(SDM).all() - # assert only the latest SDM is returned - assert len(sdags) != len(serialized_dags2) - @pytest.mark.skip_if_database_isolation_mode # Does not work in db isolation mode def test_remove_dags_by_id(self): """DAGs can be removed from database.""" @@ -205,12 +190,26 @@ def test_bulk_sync_to_db(self): DAG("dag_2", schedule=None), DAG("dag_3", schedule=None), ] - DAG.bulk_write_to_db(dags) - # we also write to dag_version and dag_code tables - # in dag_version, we search for unique version_name too - with assert_queries_count(24): + with assert_queries_count(10): SDM.bulk_sync_to_db(dags) + @pytest.mark.skip_if_database_isolation_mode # Does not work in db isolation mode + @pytest.mark.parametrize("dag_dependencies_fields", [{"dag_dependencies": None}, {}]) + def test_get_dag_dependencies_default_to_empty(self, dag_dependencies_fields): + """Test a pre-2.1.0 serialized DAG can deserialize DAG dependencies.""" + example_dags = make_example_dags(example_dags_module) + + with create_session() as session: + sdms = [SDM(dag) for dag in example_dags.values()] + # Simulate pre-2.1.0 format. + for sdm in sdms: + del sdm.data["dag"]["dag_dependencies"] + sdm.data["dag"].update(dag_dependencies_fields) + session.bulk_save_objects(sdms) + + expected_dependencies = {dag_id: [] for dag_id in example_dags} + assert SDM.get_dag_dependencies() == expected_dependencies + @pytest.mark.skip_if_database_isolation_mode # Does not work in db isolation mode def test_order_of_dag_params_is_stable(self): """ @@ -285,31 +284,3 @@ def get_hash_set(): first_hashes = get_hash_set() # assert that the hashes are the same assert first_hashes == get_hash_set() - - def test_get_latest_serdag_versions(self, dag_maker, session): - # first dag - with dag_maker("dag1") as dag: - EmptyOperator(task_id="task1") - dag.sync_to_db() - SDM.write_dag(dag) - with dag_maker("dag1") as dag: - EmptyOperator(task_id="task1") - EmptyOperator(task_id="task2") - dag.sync_to_db() - SDM.write_dag(dag) - # second dag - with dag_maker("dag2") as dag: - EmptyOperator(task_id="task1") - dag.sync_to_db() - SDM.write_dag(dag) - with dag_maker("dag2") as dag: - EmptyOperator(task_id="task1") - EmptyOperator(task_id="task2") - dag.sync_to_db() - SDM.write_dag(dag) - - # Total serdags should be 4 - assert session.scalar(select(func.count()).select_from(SDM)) == 4 - - latest_versions = SDM.get_latest_serialized_dags(dag_ids=["dag1", "dag2"], session=session) - assert len(latest_versions) == 2 diff --git a/tests/models/test_taskinstance.py b/tests/models/test_taskinstance.py index 36b2d22f60fdc..8a1df0594e4e9 100644 --- a/tests/models/test_taskinstance.py +++ b/tests/models/test_taskinstance.py @@ -103,7 +103,7 @@ from tests_common.test_utils import db from tests_common.test_utils.compat import AIRFLOW_V_3_0_PLUS from tests_common.test_utils.config import conf_vars -from tests_common.test_utils.db import clear_db_connections, clear_db_dags, clear_db_runs +from tests_common.test_utils.db import clear_db_connections, clear_db_runs from tests_common.test_utils.mock_operators import MockOperator if AIRFLOW_V_3_0_PLUS: @@ -2992,7 +2992,6 @@ def test_changing_of_asset_when_adrq_is_already_populated(self, dag_maker): Test that when a task that produces asset has ran, that changing the consumer dag asset will not cause primary key blank-out """ - clear_db_dags() from airflow.assets import Asset with dag_maker(schedule=None, serialized=True) as dag1: @@ -4014,7 +4013,6 @@ def test_refresh_from_db(self, create_task_instance): "next_method": None, "updated_at": None, "task_display_name": "Test Refresh from DB Task", - "dag_version_id": None, } # Make sure we aren't missing any new value in our expected_values list. expected_keys = {f"task_instance.{key}" for key in expected_values} diff --git a/tests/operators/test_trigger_dagrun.py b/tests/operators/test_trigger_dagrun.py index 0b40154b21b05..52a11d10e5e33 100644 --- a/tests/operators/test_trigger_dagrun.py +++ b/tests/operators/test_trigger_dagrun.py @@ -28,6 +28,8 @@ from airflow.models.dag import DagModel from airflow.models.dagbag import DagBag from airflow.models.dagrun import DagRun +from airflow.models.log import Log +from airflow.models.serialized_dag import SerializedDagModel from airflow.models.taskinstance import TaskInstance from airflow.operators.trigger_dagrun import TriggerDagRunOperator from airflow.settings import TracebackSessionForTests @@ -37,8 +39,6 @@ from airflow.utils.state import DagRunState, State, TaskInstanceState from airflow.utils.types import DagRunType -from tests_common.test_utils.db import clear_db_dags, clear_db_logs, clear_db_runs - pytestmark = pytest.mark.db_test DEFAULT_DATE = datetime(2019, 1, 1, tzinfo=timezone.utc) @@ -82,9 +82,12 @@ def re_sync_triggered_dag_to_db(self, dag, dag_maker): def teardown_method(self): """Cleanup state after testing in DB.""" - clear_db_logs() - clear_db_runs() - clear_db_dags() + with create_session() as session: + session.query(Log).filter(Log.dag_id == TEST_DAG_ID).delete(synchronize_session=False) + for dbmodel in [DagModel, DagRun, TaskInstance, SerializedDagModel]: + session.query(dbmodel).filter(dbmodel.dag_id.in_([TRIGGERED_DAG_ID, TEST_DAG_ID])).delete( + synchronize_session=False + ) # pathlib.Path(self._tmpfile).unlink() diff --git a/tests/sensors/test_external_task_sensor.py b/tests/sensors/test_external_task_sensor.py index e2246a2f75132..43911c1a41d48 100644 --- a/tests/sensors/test_external_task_sensor.py +++ b/tests/sensors/test_external_task_sensor.py @@ -124,7 +124,6 @@ def add_fake_task_group(self, target_states=None): with self.dag as dag: with TaskGroup(group_id=TEST_TASK_GROUP_ID) as task_group: _ = [EmptyOperator(task_id=f"task{i}") for i in range(len(target_states))] - dag.sync_to_db() SerializedDagModel.write_dag(dag) for idx, task in enumerate(task_group): @@ -147,7 +146,7 @@ def fake_mapped_task(x: int): fake_task() fake_mapped_task.expand(x=list(map_indexes)) - dag.sync_to_db() + SerializedDagModel.write_dag(dag) for task in task_group: diff --git a/tests/utils/test_db_cleanup.py b/tests/utils/test_db_cleanup.py index 5df2c37cddb32..47e93c1616d63 100644 --- a/tests/utils/test_db_cleanup.py +++ b/tests/utils/test_db_cleanup.py @@ -352,7 +352,6 @@ def test_no_models_missing(self): "rendered_task_instance_fields", # foreign key with TI "dag_priority_parsing_request", # Records are purged once per DAG Processing loop, not a # significant source of data. - "dag_version", # self-maintaining } from airflow.utils.db_cleanup import config_dict diff --git a/tests/www/views/test_views_tasks.py b/tests/www/views/test_views_tasks.py index 424fb02979cdc..19caafe55bc63 100644 --- a/tests/www/views/test_views_tasks.py +++ b/tests/www/views/test_views_tasks.py @@ -28,9 +28,9 @@ import time_machine from airflow import settings -from airflow.models.dag import DAG +from airflow.models.dag import DAG, DagModel from airflow.models.dagbag import DagBag -from airflow.models.serialized_dag import SerializedDagModel +from airflow.models.dagcode import DagCode from airflow.models.taskinstance import TaskInstance from airflow.models.taskreschedule import TaskReschedule from airflow.models.xcom import XCom @@ -500,7 +500,7 @@ def test_code(admin_client): def test_code_from_db(admin_client): dag = DagBag(include_examples=True).get_dag("example_bash_operator") - SerializedDagModel.write_dag(dag) + DagCode(dag.fileloc, DagCode._get_code_from_file(dag.fileloc)).sync_to_db() url = "code?dag_id=example_bash_operator" resp = admin_client.get(url, follow_redirects=True) check_content_not_in_response("Failed to load DAG file Code", resp) @@ -510,7 +510,7 @@ def test_code_from_db(admin_client): def test_code_from_db_all_example_dags(admin_client): dagbag = DagBag(include_examples=True) for dag in dagbag.dags.values(): - SerializedDagModel.write_dag(dag) + DagCode(dag.fileloc, DagCode._get_code_from_file(dag.fileloc)).sync_to_db() url = "code?dag_id=example_bash_operator" resp = admin_client.get(url, follow_redirects=True) check_content_not_in_response("Failed to load DAG file Code", resp) @@ -614,12 +614,23 @@ def heartbeat(self): return True -def test_delete_dag_button_for_dag_on_scheduler_only(admin_client, dag_maker): - with dag_maker() as dag: - EmptyOperator(task_id="task") - dag.sync_to_db() +@pytest.fixture +def new_id_example_bash_operator(): + dag_id = "example_bash_operator" + test_dag_id = "non_existent_dag" + with create_session() as session: + dag_query = session.query(DagModel).filter(DagModel.dag_id == dag_id) + dag_query.first().tags = [] # To avoid "FOREIGN KEY constraint" error) + with create_session() as session: + dag_query.update({"dag_id": test_dag_id}) + yield test_dag_id + with create_session() as session: + session.query(DagModel).filter(DagModel.dag_id == test_dag_id).update({"dag_id": dag_id}) + + +def test_delete_dag_button_for_dag_on_scheduler_only(admin_client, new_id_example_bash_operator): # The delete-dag URL should be generated correctly - test_dag_id = dag.dag_id + test_dag_id = new_id_example_bash_operator resp = admin_client.get("/", follow_redirects=True) check_content_in_response(f"/delete?dag_id={test_dag_id}", resp) check_content_in_response(f"return confirmDeleteDag(this, '{test_dag_id}')", resp) @@ -1122,7 +1133,6 @@ def test_task_instances(admin_client): "try_number": 0, "unixname": getuser(), "updated_at": DEFAULT_DATE.isoformat(), - "dag_version_id": None, }, "run_after_loop": { "custom_operator_name": None, @@ -1159,7 +1169,6 @@ def test_task_instances(admin_client): "try_number": 0, "unixname": getuser(), "updated_at": DEFAULT_DATE.isoformat(), - "dag_version_id": None, }, "run_this_last": { "custom_operator_name": None, @@ -1196,7 +1205,6 @@ def test_task_instances(admin_client): "try_number": 0, "unixname": getuser(), "updated_at": DEFAULT_DATE.isoformat(), - "dag_version_id": None, }, "runme_0": { "custom_operator_name": None, @@ -1233,7 +1241,6 @@ def test_task_instances(admin_client): "try_number": 0, "unixname": getuser(), "updated_at": DEFAULT_DATE.isoformat(), - "dag_version_id": None, }, "runme_1": { "custom_operator_name": None, @@ -1270,7 +1277,6 @@ def test_task_instances(admin_client): "try_number": 0, "unixname": getuser(), "updated_at": DEFAULT_DATE.isoformat(), - "dag_version_id": None, }, "runme_2": { "custom_operator_name": None, @@ -1307,7 +1313,6 @@ def test_task_instances(admin_client): "try_number": 0, "unixname": getuser(), "updated_at": DEFAULT_DATE.isoformat(), - "dag_version_id": None, }, "this_will_skip": { "custom_operator_name": None, @@ -1344,6 +1349,5 @@ def test_task_instances(admin_client): "try_number": 0, "unixname": getuser(), "updated_at": DEFAULT_DATE.isoformat(), - "dag_version_id": None, }, } diff --git a/tests_common/pytest_plugin.py b/tests_common/pytest_plugin.py index 2a14511ebf239..2a35f79de14f5 100644 --- a/tests_common/pytest_plugin.py +++ b/tests_common/pytest_plugin.py @@ -884,30 +884,11 @@ def __exit__(self, type, value, traceback): self.serialized_model = SerializedDagModel( dag, processor_subdir=self.dag_model.processor_subdir ) - sdm = SerializedDagModel.get(dag.dag_id, session=self.session) - from tests_common.test_utils.compat import AIRFLOW_V_3_0_PLUS - - if AIRFLOW_V_3_0_PLUS and not sdm: - from airflow.models.dag_version import DagVersion - from airflow.models.dagcode import DagCode - - dagv = DagVersion.write_dag( - dag_id=dag.dag_id, - session=self.session, - version_name=dag.version_name, - ) - dag_code = DagCode(dagv, dag.fileloc, "Source") - self.session.merge(dag_code) - self.serialized_model.dag_version = dagv - if self.want_activate_assets: - self._activate_assets() - if sdm: - self.serialized_model = sdm - else: - self.session.merge(self.serialized_model) + self.session.merge(self.serialized_model) serialized_dag = self._serialized_dag() self._bag_dag_compat(serialized_dag) - + if AIRFLOW_V_3_0_PLUS and self.want_activate_assets: + self._activate_assets() self.session.flush() else: self._bag_dag_compat(self.dag) @@ -1026,30 +1007,16 @@ def cleanup(self): return # To isolate problems here with problems from elsewhere on the session object self.session.rollback() - from tests_common.test_utils.compat import AIRFLOW_V_3_0_PLUS - - if AIRFLOW_V_3_0_PLUS: - from airflow.models.dag_version import DagVersion - - self.session.query(DagRun).filter(DagRun.dag_id.in_(dag_ids)).delete( - synchronize_session=False, - ) - self.session.query(TaskInstance).filter(TaskInstance.dag_id.in_(dag_ids)).delete( - synchronize_session=False, - ) - self.session.query(DagVersion).filter(DagVersion.dag_id.in_(dag_ids)).delete( - synchronize_session=False - ) - else: - self.session.query(SerializedDagModel).filter( - SerializedDagModel.dag_id.in_(dag_ids) - ).delete(synchronize_session=False) - self.session.query(DagRun).filter(DagRun.dag_id.in_(dag_ids)).delete( - synchronize_session=False, - ) - self.session.query(TaskInstance).filter(TaskInstance.dag_id.in_(dag_ids)).delete( - synchronize_session=False, - ) + + self.session.query(SerializedDagModel).filter( + SerializedDagModel.dag_id.in_(dag_ids) + ).delete(synchronize_session=False) + self.session.query(DagRun).filter(DagRun.dag_id.in_(dag_ids)).delete( + synchronize_session=False, + ) + self.session.query(TaskInstance).filter(TaskInstance.dag_id.in_(dag_ids)).delete( + synchronize_session=False, + ) self.session.query(XCom).filter(XCom.dag_id.in_(dag_ids)).delete( synchronize_session=False, ) diff --git a/tests_common/test_utils/db.py b/tests_common/test_utils/db.py index 14f1007af9a8b..d37a8e942e111 100644 --- a/tests_common/test_utils/db.py +++ b/tests_common/test_utils/db.py @@ -114,7 +114,6 @@ def clear_db_dags(): session.query(DagTag).delete() session.query(DagOwnerAttributes).delete() session.query(DagModel).delete() - session.query(DagCode).delete() def drop_tables_with_prefix(prefix):