Skip to content

Commit

Permalink
Remove usused index on task instance (#36737)
Browse files Browse the repository at this point in the history
Index is only helpful for a user's custom query -- not for airflow in general (see comment #30762 (comment)).  Noticed that this query had zero scans over a period of months.  I also observed that it also takes up as much space as the table itself.  Since it's not generally useful, it doesn't belong in airflow OSS.

Reverts #30762

(cherry picked from commit e20b400)
  • Loading branch information
dstandish authored and potiuk committed Jan 13, 2024
1 parent bf7fb4b commit 9e7ae3f
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,16 @@

def upgrade():
"""Apply Add index to task_instance table"""
op.create_index(
"ti_state_incl_start_date",
"task_instance",
["dag_id", "task_id", "state"],
postgresql_include=["start_date"],
)
# We don't add this index anymore because it's not useful.
pass


def downgrade():
"""Unapply Add index to task_instance table"""
op.drop_index("ti_state_incl_start_date", table_name="task_instance")
# At 2.8.1 we removed this index as it is not used, and changed this migration not to add it
# So we use drop if exists (cus it might not be there)
import sqlalchemy
from contextlib import suppress

with suppress(sqlalchemy.exc.DatabaseError): # mysql does not support drop if exists index
op.drop_index("ti_state_incl_start_date", table_name="task_instance", if_exists=True)
50 changes: 50 additions & 0 deletions airflow/migrations/versions/0133_2_8_1_refactor_dag_run_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Drop unused TI index
Revision ID: 88344c1d9134
Revises: 10b52ebd31f7
Create Date: 2024-01-11 11:54:48.232030
"""

import sqlalchemy as sa
from alembic import op


# revision identifiers, used by Alembic.
revision = "88344c1d9134"
down_revision = "10b52ebd31f7"
branch_labels = None
depends_on = None
airflow_version = "2.8.1"


def upgrade():
"""Apply refactor dag run indexes"""
# This index may have been created in 2.7 but we've since removed it from migrations
import sqlalchemy
from contextlib import suppress

with suppress(sqlalchemy.exc.DatabaseError): # mysql does not support drop if exists index
op.drop_index("ti_state_incl_start_date", table_name="task_instance", if_exists=True)


def downgrade():
"""Unapply refactor dag run indexes"""
1 change: 0 additions & 1 deletion airflow/models/taskinstance.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,7 +1256,6 @@ class TaskInstance(Base, LoggingMixin):
# Existing "ti_state_lkp" is not enough for such query when this table has millions of rows, since
# rows have to be fetched in order to retrieve the start_date column. With this index, INDEX ONLY SCAN
# is performed and that query runs within milliseconds.
Index("ti_state_incl_start_date", dag_id, task_id, state, postgresql_include=["start_date"]),
Index("ti_pool", pool, state, priority_weight),
Index("ti_job_id", job_id),
Index("ti_trigger_id", trigger_id),
Expand Down
1 change: 1 addition & 0 deletions airflow/utils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
"2.6.2": "c804e5c76e3e",
"2.7.0": "405de8318b3a",
"2.8.0": "10b52ebd31f7",
"2.8.1": "88344c1d9134",
}


Expand Down
2 changes: 1 addition & 1 deletion docs/apache-airflow/img/airflow_erd.sha256
Original file line number Diff line number Diff line change
@@ -1 +1 @@
a5677b0b603e8835f92da4b8b061ec268ce7257ef6b446f12593743ecf90710a
58421282236b587ccbdc2ef49cbf5599e73d82074afc6d5f3cfcd038fd731c0f
4 changes: 2 additions & 2 deletions docs/apache-airflow/img/airflow_erd.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 3 additions & 1 deletion docs/apache-airflow/migrations-ref.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are executed via when you ru
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
| Revision ID | Revises ID | Airflow Version | Description |
+=================================+===================+===================+==============================================================+
| ``10b52ebd31f7`` (head) | ``bd5dfbe21f88`` | ``2.8.0`` | Add processor_subdir to ImportError. |
| ``88344c1d9134`` (head) | ``10b52ebd31f7`` | ``2.8.1`` | Drop unused TI index |
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
| ``10b52ebd31f7`` | ``bd5dfbe21f88`` | ``2.8.0`` | Add processor_subdir to ImportError. |
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
| ``bd5dfbe21f88`` | ``f7bf2a57d0a6`` | ``2.8.0`` | Make connection login/password TEXT |
+---------------------------------+-------------------+-------------------+--------------------------------------------------------------+
Expand Down
3 changes: 3 additions & 0 deletions scripts/in_container/run_mypy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ export PYTHONPATH=${AIRFLOW_SOURCES}

ADDITIONAL_MYPY_OPTIONS=()

export MYPY_FORCE_COLOR=true
export TERM=ansi

if [[ ${SUSPENDED_PROVIDERS_FOLDERS=} != "" ]];
then
for folder in ${SUSPENDED_PROVIDERS_FOLDERS=}
Expand Down

0 comments on commit 9e7ae3f

Please sign in to comment.