From 8c41a8f14d381e7b0f1068d2d2503408b97fc707 Mon Sep 17 00:00:00 2001
From: Daniel Standish <15932138+dstandish@users.noreply.github.com>
Date: Fri, 12 Jan 2024 20:32:14 -0800
Subject: [PATCH] Remove usused index on task instance (#36737)
Index is only helpful for a user's custom query -- not for airflow in general (see comment https://github.com/apache/airflow/pull/30762#issuecomment-1886658295). Noticed that this query had zero scans over a period of months. I also observed that it also takes up as much space as the table itself. Since it's not generally useful, it doesn't belong in airflow OSS.
Reverts #30762
(cherry picked from commit e20b400317ae4eb41181c5b0cee466eff768b521)
---
..._2_7_0_add_index_to_task_instance_table.py | 16 +++---
.../0133_2_8_1_refactor_dag_run_indexes.py | 50 +++++++++++++++++++
airflow/models/taskinstance.py | 1 -
airflow/utils/db.py | 1 +
docs/apache-airflow/img/airflow_erd.sha256 | 2 +-
docs/apache-airflow/img/airflow_erd.svg | 4 +-
docs/apache-airflow/migrations-ref.rst | 4 +-
scripts/in_container/run_mypy.sh | 3 ++
8 files changed, 69 insertions(+), 12 deletions(-)
create mode 100644 airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py
diff --git a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py b/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
index 225776119e4b..6730611a8d64 100644
--- a/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
+++ b/airflow/migrations/versions/0126_2_7_0_add_index_to_task_instance_table.py
@@ -37,14 +37,16 @@
def upgrade():
"""Apply Add index to task_instance table"""
- op.create_index(
- "ti_state_incl_start_date",
- "task_instance",
- ["dag_id", "task_id", "state"],
- postgresql_include=["start_date"],
- )
+ # We don't add this index anymore because it's not useful.
+ pass
def downgrade():
"""Unapply Add index to task_instance table"""
- op.drop_index("ti_state_incl_start_date", table_name="task_instance")
+ # At 2.8.1 we removed this index as it is not used, and changed this migration not to add it
+ # So we use drop if exists (cus it might not be there)
+ import sqlalchemy
+ from contextlib import suppress
+
+ with suppress(sqlalchemy.exc.DatabaseError): # mysql does not support drop if exists index
+ op.drop_index("ti_state_incl_start_date", table_name="task_instance", if_exists=True)
diff --git a/airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py b/airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py
new file mode 100644
index 000000000000..43a24141ee19
--- /dev/null
+++ b/airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Drop unused TI index
+
+Revision ID: 88344c1d9134
+Revises: 10b52ebd31f7
+Create Date: 2024-01-11 11:54:48.232030
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "88344c1d9134"
+down_revision = "10b52ebd31f7"
+branch_labels = None
+depends_on = None
+airflow_version = "2.8.1"
+
+
+def upgrade():
+ """Apply refactor dag run indexes"""
+ # This index may have been created in 2.7 but we've since removed it from migrations
+ import sqlalchemy
+ from contextlib import suppress
+
+ with suppress(sqlalchemy.exc.DatabaseError): # mysql does not support drop if exists index
+ op.drop_index("ti_state_incl_start_date", table_name="task_instance", if_exists=True)
+
+
+def downgrade():
+ """Unapply refactor dag run indexes"""
diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py
index ae6b1e35c127..b653964580c5 100644
--- a/airflow/models/taskinstance.py
+++ b/airflow/models/taskinstance.py
@@ -1256,7 +1256,6 @@ class TaskInstance(Base, LoggingMixin):
# Existing "ti_state_lkp" is not enough for such query when this table has millions of rows, since
# rows have to be fetched in order to retrieve the start_date column. With this index, INDEX ONLY SCAN
# is performed and that query runs within milliseconds.
- Index("ti_state_incl_start_date", dag_id, task_id, state, postgresql_include=["start_date"]),
Index("ti_pool", pool, state, priority_weight),
Index("ti_job_id", job_id),
Index("ti_trigger_id", trigger_id),
diff --git a/airflow/utils/db.py b/airflow/utils/db.py
index 25a889f639e5..967ff3aa5cd6 100644
--- a/airflow/utils/db.py
+++ b/airflow/utils/db.py
@@ -89,6 +89,7 @@
"2.6.2": "c804e5c76e3e",
"2.7.0": "405de8318b3a",
"2.8.0": "10b52ebd31f7",
+ "2.8.1": "88344c1d9134",
}
diff --git a/docs/apache-airflow/img/airflow_erd.sha256 b/docs/apache-airflow/img/airflow_erd.sha256
index ded2722d373c..fcef9254b78d 100644
--- a/docs/apache-airflow/img/airflow_erd.sha256
+++ b/docs/apache-airflow/img/airflow_erd.sha256
@@ -1 +1 @@
-a5677b0b603e8835f92da4b8b061ec268ce7257ef6b446f12593743ecf90710a
\ No newline at end of file
+58421282236b587ccbdc2ef49cbf5599e73d82074afc6d5f3cfcd038fd731c0f
\ No newline at end of file
diff --git a/docs/apache-airflow/img/airflow_erd.svg b/docs/apache-airflow/img/airflow_erd.svg
index 497ef76975f7..8e85b5fa0cab 100644
--- a/docs/apache-airflow/img/airflow_erd.svg
+++ b/docs/apache-airflow/img/airflow_erd.svg
@@ -1342,14 +1342,14 @@
task_instance--xcom
-0..N
+1
1
task_instance--xcom
-1
+0..N
1
diff --git a/docs/apache-airflow/migrations-ref.rst b/docs/apache-airflow/migrations-ref.rst
index 2d54b5d9969f..0b068e5e53cf 100644
--- a/docs/apache-airflow/migrations-ref.rst
+++ b/docs/apache-airflow/migrations-ref.rst
@@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are executed via when you ru
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
| Revision ID | Revises ID | Airflow Version | Description |
+=================================+===================+===================+==============================================================+
-| ``10b52ebd31f7`` (head) | ``bd5dfbe21f88`` | ``2.8.0`` | Add processor_subdir to ImportError. |
+| ``88344c1d9134`` (head) | ``10b52ebd31f7`` | ``2.8.1`` | Drop unused TI index |
++---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
+| ``10b52ebd31f7`` | ``bd5dfbe21f88`` | ``2.8.0`` | Add processor_subdir to ImportError. |
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
| ``bd5dfbe21f88`` | ``f7bf2a57d0a6`` | ``2.8.0`` | Make connection login/password TEXT |
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
diff --git a/scripts/in_container/run_mypy.sh b/scripts/in_container/run_mypy.sh
index cee381d8cb2c..0245825a7264 100755
--- a/scripts/in_container/run_mypy.sh
+++ b/scripts/in_container/run_mypy.sh
@@ -22,6 +22,9 @@ export PYTHONPATH=${AIRFLOW_SOURCES}
ADDITIONAL_MYPY_OPTIONS=()
+export MYPY_FORCE_COLOR=true
+export TERM=ansi
+
if [[ ${SUSPENDED_PROVIDERS_FOLDERS=} != "" ]];
then
for folder in ${SUSPENDED_PROVIDERS_FOLDERS=}