Skip to content

Commit

Permalink
iMPlement per-provider tests with lowest-direct dependency resolution (
Browse files Browse the repository at this point in the history
…apache#39946)

With this change we are running tests of downgrading Airflow
dependencies to "lowest-direct" ones - separately for "core" tests
and for each provider (and run corresponding tests with it). This
should allows us to determine what are the lowest bounds for the
dependencies - for Airflow and for individual providers and continue
doing it while Airflow evolves in the future.

Fixes: apache#35549
Related: apache#39100
  • Loading branch information
potiuk authored and romsharon98 committed Jul 26, 2024
1 parent 07afd54 commit 9797095
Show file tree
Hide file tree
Showing 65 changed files with 787 additions and 336 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ jobs:
${{ steps.selective-checks.outputs.parallel-test-types-list-as-string }}
providers-test-types-list-as-string: >-
${{ steps.selective-checks.outputs.providers-test-types-list-as-string }}
separate-test-types-list-as-string: >-
${{ steps.selective-checks.outputs.separate-test-types-list-as-string }}
include-success-outputs: ${{ steps.selective-checks.outputs.include-success-outputs }}
postgres-exclude: ${{ steps.selective-checks.outputs.postgres-exclude }}
mysql-exclude: ${{ steps.selective-checks.outputs.mysql-exclude }}
Expand Down Expand Up @@ -455,8 +457,11 @@ jobs:
runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }}
image-tag: ${{ needs.build-info.outputs.image-tag }}
parallel-test-types-list-as-string: ${{ needs.build-info.outputs.parallel-test-types-list-as-string }}
separate-test-types-list-as-string: >-
${{ needs.build-info.outputs.separate-test-types-list-as-string }}
run-coverage: ${{ needs.build-info.outputs.run-coverage }}
default-python-version: ${{ needs.build-info.outputs.default-python-version }}
python-versions: ${{ needs.build-info.outputs.python-versions }}
default-postgres-version: ${{ needs.build-info.outputs.default-postgres-version }}
canary-run: ${{ needs.build-info.outputs.canary-run }}
upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }}
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/run-unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,16 @@ on: # yamllint disable-line rule:truthy
required: false
default: "true"
type: string
force-lowest-dependencies:
description: "Whether to force lowest dependencies for the tests or not (true/false)"
required: false
default: "false"
type: string
monitor-delay-time-in-seconds:
description: "How much time to wait between printing parallel monitor summary"
required: false
default: 20
type: number
jobs:
tests:
timeout-minutes: 120
Expand All @@ -128,6 +138,7 @@ jobs:
DOWNGRADE_SQLALCHEMY: "${{ inputs.downgrade-sqlalchemy }}"
DOWNGRADE_PENDULUM: "${{ inputs.downgrade-pendulum }}"
ENABLE_COVERAGE: "${{ inputs.run-coverage }}"
FORCE_LOWEST_DEPENDENCIES: "${{ inputs.force-lowest-dependencies }}"
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_USERNAME: ${{ github.actor }}
Expand All @@ -140,6 +151,7 @@ jobs:
PYDANTIC: "${{ inputs.pydantic }}"
PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python-version }}"
UPGRADE_BOTO: "${{ inputs.upgrade-boto }}"
AIRFLOW_MONITOR_DELAY_TIME_IN_SECONDS: "${{inputs.monitor-delay-time-in-seconds}}"
VERBOSE: "true"
steps:
- name: "Cleanup repo"
Expand Down
31 changes: 31 additions & 0 deletions .github/workflows/special-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ on: # yamllint disable-line rule:truthy
description: "The list of parallel test types to run separated by spaces"
required: true
type: string
separate-test-types-list-as-string:
description: "The list of separate provider test types to run separated by spaces"
required: true
type: string
run-coverage:
description: "Whether to run coverage or not (true/false)"
required: true
Expand All @@ -40,6 +44,10 @@ on: # yamllint disable-line rule:truthy
description: "Which version of python should be used by default"
required: true
type: string
python-versions:
description: "The list of python versions (stringified JSON array) to run the tests on."
required: true
type: string
default-postgres-version:
description: "The default version of the postgres to use"
required: true
Expand Down Expand Up @@ -189,6 +197,29 @@ jobs:
run-coverage: ${{ inputs.run-coverage }}
debug-resources: ${{ inputs.debug-resources }}

tests-with-lowest-direct-resolution:
name: "Lowest direct dependency resolution tests"
uses: ./.github/workflows/run-unit-tests.yml
permissions:
contents: read
packages: read
secrets: inherit
with:
runs-on-as-json-default: ${{ inputs.runs-on-as-json-default }}
test-name: "LowestDeps-Postgres"
force-lowest-dependencies: "true"
test-scope: "All"
backend: "postgres"
image-tag: ${{ inputs.image-tag }}
python-versions: ${{ inputs.python-versions }}
backend-versions: "['${{ inputs.default-postgres-version }}']"
excludes: "[]"
parallel-test-types-list-as-string: ${{ inputs.separate-test-types-list-as-string }}
include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }}
run-coverage: ${{ inputs.run-coverage }}
debug-resources: ${{ inputs.debug-resources }}
monitor-delay-time-in-seconds: 120

tests-quarantined:
name: "Quarantined test"
uses: ./.github/workflows/run-unit-tests.yml
Expand Down
37 changes: 33 additions & 4 deletions Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -1072,15 +1072,15 @@ function check_downgrade_sqlalchemy() {
}

function check_downgrade_pendulum() {
if [[ ${DOWNGRADE_PENDULUM=} != "true" ]]; then
if [[ ${DOWNGRADE_PENDULUM=} != "true" || ${PYTHON_MAJOR_MINOR_VERSION} == "3.12" ]]; then
return
fi
min_pendulum_version=$(grep "\"pendulum>=" hatch_build.py | sed "s/.*>=\([0-9\.]*\).*/\1/" | xargs)
local MIN_PENDULUM_VERSION="2.1.2"
echo
echo "${COLOR_BLUE}Downgrading pendulum to minimum supported version: ${min_pendulum_version}${COLOR_RESET}"
echo "${COLOR_BLUE}Downgrading pendulum to minimum supported version: ${MIN_PENDULUM_VERSION}${COLOR_RESET}"
echo
# shellcheck disable=SC2086
${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "pendulum==${min_pendulum_version}"
${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "pendulum==${MIN_PENDULUM_VERSION}"
pip check
}

Expand Down Expand Up @@ -1109,12 +1109,41 @@ function check_run_tests() {
fi
}

function check_force_lowest_dependencies() {
if [[ ${FORCE_LOWEST_DEPENDENCIES=} != "true" ]]; then
return
fi
EXTRA=""
if [[ ${TEST_TYPE=} =~ Providers\[.*\] ]]; then
# shellcheck disable=SC2001
EXTRA=$(echo "[${TEST_TYPE}]" | sed 's/Providers\[\(.*\)\]/\1/')
echo
echo "${COLOR_BLUE}Forcing dependencies to lowest versions for provider: ${EXTRA}${COLOR_RESET}"
echo
else
echo
echo "${COLOR_BLUE}Forcing dependencies to lowest versions for Airflow.${COLOR_RESET}"
echo
fi
set -x
# TODO: hard-code explicitly papermill on 3.12 but we should automate it
if [[ ${EXTRA} == "[papermill]" && ${PYTHON_MAJOR_MINOR_VERSION} == "3.12" ]]; then
echo
echo "Skipping papermill check on Python 3.12!"
echo
exit 0
fi
uv pip install --python "$(which python)" --resolution lowest-direct --upgrade --editable ".${EXTRA}"
set +x
}

determine_airflow_to_use
environment_initialization
check_boto_upgrade
check_pydantic
check_downgrade_sqlalchemy
check_downgrade_pendulum
check_force_lowest_dependencies
check_run_tests "${@}"

exec /bin/bash "${@}"
Expand Down
16 changes: 9 additions & 7 deletions airflow/providers/amazon/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,17 @@ dependencies:
# We should update minimum version of boto3 and here regularly to avoid `pip` backtracking with the number
# of candidates to consider. Make sure to configure boto3 version here as well as in all the tools below
# in the `devel-dependencies` section to be the same minimum version.
- boto3>=1.33.0
- botocore>=1.33.0
- boto3>=1.34.0
- botocore>=1.34.0
- inflection>=0.5.1
# Allow a wider range of watchtower versions for flexibility among users
- watchtower>=2.0.1,<4
- watchtower>=3.0.0,<4
- jsonpath_ng>=1.5.3
- redshift_connector>=2.0.918
- sqlalchemy_redshift>=0.8.6
- asgiref
- asgiref>=2.3.0
- PyAthena>=3.0.10
- jmespath
- jmespath>=0.7.0

additional-extras:
- name: pandas
Expand All @@ -111,13 +111,15 @@ additional-extras:
# https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies
# However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723
# In addition FAB also limit sqlalchemy to < 2.0
- pandas>=1.2.5,<2.2
- pandas>=1.5.3,<2.2;python_version<"3.12"
- pandas>=2.1.1,<2.2;python_version>="3.12"

# There is conflict between boto3 and aiobotocore dependency botocore.
# TODO: We can remove it once boto3 and aiobotocore both have compatible botocore version or
# boto3 have native async support and we move away from aio aiobotocore
- name: aiobotocore
dependencies:
- aiobotocore[boto3]>=2.5.3
- aiobotocore[boto3]>=2.10.0
- name: cncf.kubernetes
dependencies:
- apache-airflow-providers-cncf-kubernetes>=7.2.0
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/apache/flink/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ versions:

dependencies:
- apache-airflow>=2.7.0
- cryptography>=2.0.0
- cryptography>=41.0.0
- apache-airflow-providers-cncf-kubernetes>=5.1.0

integrations:
Expand Down
6 changes: 5 additions & 1 deletion airflow/providers/apache/hdfs/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ versions:

dependencies:
- apache-airflow>=2.7.0
- hdfs[avro,dataframe,kerberos]>=2.0.4
- hdfs[avro,dataframe,kerberos]>=2.5.4;python_version<"3.12"
- hdfs[avro,dataframe,kerberos]>=2.7.3;python_version>="3.12"
- pandas>=1.5.3,<2.2;python_version<"3.12"
- pandas>=2.1.1,<2.2;python_version>="3.12"


integrations:
- integration-name: Hadoop Distributed File System (HDFS)
Expand Down
7 changes: 5 additions & 2 deletions airflow/providers/apache/hive/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,12 @@ dependencies:
# https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies
# However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723
# In addition FAB also limit sqlalchemy to < 2.0
- pandas>=1.2.5,<2.2
- pandas>=1.5.3,<2.2;python_version<"3.12"
- pandas>=2.1.1,<2.2;python_version>="3.12"

- pyhive[hive_pure_sasl]>=0.7.0
- thrift>=0.9.2
- thrift>=0.11.0
- jmespath>=0.7.0

integrations:
- integration-name: Apache Hive
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/apache/kafka/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ versions:

dependencies:
- apache-airflow>=2.7.0
- asgiref
- confluent-kafka>=1.8.2
- asgiref>=2.3.0
- confluent-kafka>=2.3.0

integrations:
- integration-name: Apache Kafka
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/apache/kylin/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ versions:

dependencies:
- apache-airflow>=2.7.0
- kylinpy>=2.6
- kylinpy>=2.7.0

integrations:
- integration-name: Apache Kylin
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/apache/spark/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ versions:

dependencies:
- apache-airflow>=2.7.0
- pyspark
- pyspark>=3.1.3
- grpcio-status>=1.59.0

additional-extras:
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/cloudant/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ versions:

dependencies:
- apache-airflow>=2.7.0
- cloudant>=2.0
- cloudant>=2.13.0

integrations:
- integration-name: IBM Cloudant
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/cncf/kubernetes/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ dependencies:
- aiofiles>=23.2.0
- apache-airflow>=2.7.0
- asgiref>=3.5.2
- cryptography>=2.0.0
- cryptography>=41.0.0
# The Kubernetes API is known to introduce problems when upgraded to a MAJOR version. Airflow Core
# Uses Kubernetes for Kubernetes executor, and we also know that Kubernetes Python client follows SemVer
# (https://github.com/kubernetes-client/python#compatibility). This is a crucial component of Airflow
Expand Down
4 changes: 3 additions & 1 deletion airflow/providers/common/sql/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ additional-extras:
# https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies
# However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723
# In addition FAB also limit sqlalchemy to < 2.0
- pandas>=1.2.5,<2.2
- pandas>=1.5.3,<2.2;python_version<"3.12"
- pandas>=2.1.1,<2.2;python_version>="3.12"


integrations:
- integration-name: Common SQL
Expand Down
3 changes: 3 additions & 0 deletions airflow/providers/databricks/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ dependencies:
- databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0
- aiohttp>=3.9.2, <4
- mergedeep>=1.3.4
- pandas>=1.5.3,<2.2;python_version<"3.12"
- pandas>=2.1.1,<2.2;python_version>="3.12"
- pyarrow>=14.0.1

additional-extras:
# pip install apache-airflow-providers-databricks[sdk]
Expand Down
4 changes: 3 additions & 1 deletion airflow/providers/exasol/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ dependencies:
# https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies
# However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723
# In addition FAB also limit sqlalchemy to < 2.0
- pandas>=1.2.5,<2.2
- pandas>=1.5.3,<2.2;python_version<"3.12"
- pandas>=2.1.1,<2.2;python_version>="3.12"


integrations:
- integration-name: Exasol
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/fab/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ dependencies:
- flask-appbuilder==4.5.0
- flask-login>=0.6.2
- google-re2>=1.0
- jmespath
- jmespath>=0.7.0

config:
fab:
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/facebook/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ versions:

dependencies:
- apache-airflow>=2.7.0
- facebook-business>=6.0.2
- facebook-business>=15.0.2

integrations:
- integration-name: Facebook Ads
Expand Down
4 changes: 1 addition & 3 deletions airflow/providers/github/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ description: |
dependencies:
- apache-airflow>=2.7.0
# There was a change introduced in version 1.58 which breaks `pickle` serialization out of the box.
# See https://github.com/PyGithub/PyGithub/issues/2436.
- PyGithub!=1.58
- PyGithub>=2.1.1

state: ready
source-date-epoch: 1716287833
Expand Down
Loading

0 comments on commit 9797095

Please sign in to comment.