From 8c41a8f14d381e7b0f1068d2d2503408b97fc707 Mon Sep 17 00:00:00 2001 From: Daniel Standish <15932138+dstandish@users.noreply.github.com> Date: Fri, 12 Jan 2024 20:32:14 -0800 Subject: [PATCH] Remove usused index on task instance (#36737) Index is only helpful for a user's custom query -- not for airflow in general (see comment https://github.com/apache/airflow/pull/30762#issuecomment-1886658295). Noticed that this query had zero scans over a period of months. I also observed that it also takes up as much space as the table itself. Since it's not generally useful, it doesn't belong in airflow OSS. Reverts #30762 (cherry picked from commit e20b400317ae4eb41181c5b0cee466eff768b521) --- ..._2_7_0_add_index_to_task_instance_table.py | 16 +++--- .../0133_2_8_1_refactor_dag_run_indexes.py | 50 +++++++++++++++++++ airflow/models/taskinstance.py | 1 - airflow/utils/db.py | 1 + docs/apache-airflow/img/airflow_erd.sha256 | 2 +- docs/apache-airflow/img/airflow_erd.svg | 4 +- docs/apache-airflow/migrations-ref.rst | 4 +- scripts/in_container/run_mypy.sh | 3 ++ 8 files changed, 69 insertions(+), 12 deletions(-) create mode 100644 airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py diff --git a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py b/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py index 225776119e4b..6730611a8d64 100644 --- a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py +++ b/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py @@ -37,14 +37,16 @@ def upgrade(): """Apply Add index to task_instance table""" - op.create_index( - "ti_state_incl_start_date", - "task_instance", - ["dag_id", "task_id", "state"], - postgresql_include=["start_date"], - ) + # We don't add this index anymore because it's not useful. + pass def downgrade(): """Unapply Add index to task_instance table""" - op.drop_index("ti_state_incl_start_date", table_name="task_instance") + # At 2.8.1 we removed this index as it is not used, and changed this migration not to add it + # So we use drop if exists (cus it might not be there) + import sqlalchemy + from contextlib import suppress + + with suppress(sqlalchemy.exc.DatabaseError): # mysql does not support drop if exists index + op.drop_index("ti_state_incl_start_date", table_name="task_instance", if_exists=True) diff --git a/airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py b/airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py new file mode 100644 index 000000000000..43a24141ee19 --- /dev/null +++ b/airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Drop unused TI index + +Revision ID: 88344c1d9134 +Revises: 10b52ebd31f7 +Create Date: 2024-01-11 11:54:48.232030 + +""" + +import sqlalchemy as sa +from alembic import op + + +# revision identifiers, used by Alembic. +revision = "88344c1d9134" +down_revision = "10b52ebd31f7" +branch_labels = None +depends_on = None +airflow_version = "2.8.1" + + +def upgrade(): + """Apply refactor dag run indexes""" + # This index may have been created in 2.7 but we've since removed it from migrations + import sqlalchemy + from contextlib import suppress + + with suppress(sqlalchemy.exc.DatabaseError): # mysql does not support drop if exists index + op.drop_index("ti_state_incl_start_date", table_name="task_instance", if_exists=True) + + +def downgrade(): + """Unapply refactor dag run indexes""" diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index ae6b1e35c127..b653964580c5 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -1256,7 +1256,6 @@ class TaskInstance(Base, LoggingMixin): # Existing "ti_state_lkp" is not enough for such query when this table has millions of rows, since # rows have to be fetched in order to retrieve the start_date column. With this index, INDEX ONLY SCAN # is performed and that query runs within milliseconds. - Index("ti_state_incl_start_date", dag_id, task_id, state, postgresql_include=["start_date"]), Index("ti_pool", pool, state, priority_weight), Index("ti_job_id", job_id), Index("ti_trigger_id", trigger_id), diff --git a/airflow/utils/db.py b/airflow/utils/db.py index 25a889f639e5..967ff3aa5cd6 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -89,6 +89,7 @@ "2.6.2": "c804e5c76e3e", "2.7.0": "405de8318b3a", "2.8.0": "10b52ebd31f7", + "2.8.1": "88344c1d9134", } diff --git a/docs/apache-airflow/img/airflow_erd.sha256 b/docs/apache-airflow/img/airflow_erd.sha256 index ded2722d373c..fcef9254b78d 100644 --- a/docs/apache-airflow/img/airflow_erd.sha256 +++ b/docs/apache-airflow/img/airflow_erd.sha256 @@ -1 +1 @@ -a5677b0b603e8835f92da4b8b061ec268ce7257ef6b446f12593743ecf90710a \ No newline at end of file +58421282236b587ccbdc2ef49cbf5599e73d82074afc6d5f3cfcd038fd731c0f \ No newline at end of file diff --git a/docs/apache-airflow/img/airflow_erd.svg b/docs/apache-airflow/img/airflow_erd.svg index 497ef76975f7..8e85b5fa0cab 100644 --- a/docs/apache-airflow/img/airflow_erd.svg +++ b/docs/apache-airflow/img/airflow_erd.svg @@ -1342,14 +1342,14 @@ task_instance--xcom -0..N +1 1 task_instance--xcom -1 +0..N 1 diff --git a/docs/apache-airflow/migrations-ref.rst b/docs/apache-airflow/migrations-ref.rst index 2d54b5d9969f..0b068e5e53cf 100644 --- a/docs/apache-airflow/migrations-ref.rst +++ b/docs/apache-airflow/migrations-ref.rst @@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are executed via when you ru +---------------------------------+-------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=================================+===================+===================+==============================================================+ -| ``10b52ebd31f7`` (head) | ``bd5dfbe21f88`` | ``2.8.0`` | Add processor_subdir to ImportError. | +| ``88344c1d9134`` (head) | ``10b52ebd31f7`` | ``2.8.1`` | Drop unused TI index | ++---------------------------------+-------------------+-------------------+--------------------------------------------------------------+ +| ``10b52ebd31f7`` | ``bd5dfbe21f88`` | ``2.8.0`` | Add processor_subdir to ImportError. | +---------------------------------+-------------------+-------------------+--------------------------------------------------------------+ | ``bd5dfbe21f88`` | ``f7bf2a57d0a6`` | ``2.8.0`` | Make connection login/password TEXT | +---------------------------------+-------------------+-------------------+--------------------------------------------------------------+ diff --git a/scripts/in_container/run_mypy.sh b/scripts/in_container/run_mypy.sh index cee381d8cb2c..0245825a7264 100755 --- a/scripts/in_container/run_mypy.sh +++ b/scripts/in_container/run_mypy.sh @@ -22,6 +22,9 @@ export PYTHONPATH=${AIRFLOW_SOURCES} ADDITIONAL_MYPY_OPTIONS=() +export MYPY_FORCE_COLOR=true +export TERM=ansi + if [[ ${SUSPENDED_PROVIDERS_FOLDERS=} != "" ]]; then for folder in ${SUSPENDED_PROVIDERS_FOLDERS=}