From 10976ee59f5804bdf29310aae8eb6bf15b89f6e8 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Fri, 13 Sep 2024 12:02:50 +0200 Subject: [PATCH 1/8] Skip musllinux_aarch64 wheels --- .github/workflows/wheel.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 489fd23cc..8847bed1a 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -38,8 +38,13 @@ jobs: - id: set-matrix env: - # skipping pypy for now, cp38-win was segfaulting on CI, numpy has no wheels for cp38-musllinux_aarch64 -> build from source -> CI timeouts - CIBW_SKIP: pp* cp38-win* cp38-musllinux_aarch64 + # skipping pypy for now + # cp38-win was segfaulting on CI -> skipping for now + # oldest-supported-numpy has no wheels for musllinux_aarch64 -> build numpy from source on QEMU -> CI timeouts -> skipping for now + CIBW_SKIP: > + pp* + cp38-win* + *musllinux_aarch64 run: | MATRIX_INCLUDE=$( { From be5fcf2ff75934b3e58ac1369e126cee33415b52 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:27:52 +0300 Subject: [PATCH 2/8] Build using backward-compatible numpy API (#5) * Build using backward-compatible numpy API * Add deprecated oldest-supported-numpy --- packages/vaex-core/pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/vaex-core/pyproject.toml b/packages/vaex-core/pyproject.toml index df5666945..e5b2b5efc 100644 --- a/packages/vaex-core/pyproject.toml +++ b/packages/vaex-core/pyproject.toml @@ -1,7 +1,8 @@ [build-system] # Minimum requirements for the build system to execute. requires = [ - "oldest-supported-numpy", + "oldest-supported-numpy; python_version=='3.8'", # deprecated ref https://github.com/scipy/oldest-supported-numpy + "numpy~=1.25; python_version>'3.8'", # backward compatible build-system as of v1.25 ref https://numpy.org/doc/2.1/dev/depending_on_numpy.html#build-time-dependency# "scikit-build", "cmake", "ninja" From 7e1c1280f73e9f386654672c66359927e162c500 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:30:44 +0300 Subject: [PATCH 3/8] Bring back cp39+ musllinux_aarch64 wheels --- .github/workflows/wheel.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 8847bed1a..07f43ad00 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -40,11 +40,11 @@ jobs: env: # skipping pypy for now # cp38-win was segfaulting on CI -> skipping for now - # oldest-supported-numpy has no wheels for musllinux_aarch64 -> build numpy from source on QEMU -> CI timeouts -> skipping for now + # oldest-supported-numpy has no wheels for cp38-musllinux_aarch64 -> build numpy from source on QEMU -> CI timeouts -> skipping for now CIBW_SKIP: > pp* cp38-win* - *musllinux_aarch64 + cp38-musllinux_aarch64 run: | MATRIX_INCLUDE=$( { From 75f3279e9bb7463ab190666ca14ddaa0eed6eb59 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Fri, 13 Sep 2024 19:17:49 +0300 Subject: [PATCH 4/8] Note that numpy~=2.0 fails currently --- packages/vaex-core/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/vaex-core/pyproject.toml b/packages/vaex-core/pyproject.toml index e5b2b5efc..fa4386c41 100644 --- a/packages/vaex-core/pyproject.toml +++ b/packages/vaex-core/pyproject.toml @@ -2,7 +2,7 @@ # Minimum requirements for the build system to execute. requires = [ "oldest-supported-numpy; python_version=='3.8'", # deprecated ref https://github.com/scipy/oldest-supported-numpy - "numpy~=1.25; python_version>'3.8'", # backward compatible build-system as of v1.25 ref https://numpy.org/doc/2.1/dev/depending_on_numpy.html#build-time-dependency# + "numpy~=1.25; python_version>'3.8'", # numpy~=2.0 fails, backward compatible build-system as of v1.25 ref https://numpy.org/doc/2.1/dev/depending_on_numpy.html#build-time-dependency "scikit-build", "cmake", "ninja" From 9509a380675feb5efdb72b307701f500bb2a748c Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Wed, 18 Sep 2024 08:36:00 +0200 Subject: [PATCH 5/8] Add explicit upper bound on python 3.12 --- packages/vaex-core/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/vaex-core/setup.py b/packages/vaex-core/setup.py index 669875ffa..c486c4886 100644 --- a/packages/vaex-core/setup.py +++ b/packages/vaex-core/setup.py @@ -263,7 +263,7 @@ def __str__(self): if not use_skbuild else [], zip_safe=False, - python_requires=">=3.8", + python_requires=">=3.8,<3.13", # 3.13 needs numpy 2.1 support ref https://github.com/vaexio/vaex/pull/2434 classifiers=[ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", From c966a6ea94f99ca1ef24dffa0f0b53771551d5c6 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Wed, 18 Sep 2024 12:44:19 +0200 Subject: [PATCH 6/8] Quick fix for fingerprints --- tests/fingerprint_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/fingerprint_test.py b/tests/fingerprint_test.py index 769ac7132..136da3a7d 100644 --- a/tests/fingerprint_test.py +++ b/tests/fingerprint_test.py @@ -96,10 +96,10 @@ def test_dataset_arrays(): 'y': '4d48c88e587db8f3855eed9f5d5f51eea769451b7371ecf7bdee4e0258238631', 'z': 'a4cead13bef1fd1ec5974d1a2f5ceffd243a7aa6c6b08b80e09a7454b7d04293' } - assert ds.fingerprint in ['dataset-arrays-hashed-88244cf38fe91c6bf435caa6160b089b', 'dataset-arrays-hashed-148c30472b155430f46bfb94d5509cf4'] + assert ds.fingerprint in ['dataset-arrays-hashed-88244cf38fe91c6bf435caa6160b089b', 'dataset-arrays-hashed-148c30472b155430f46bfb94d5509cf4', 'dataset-arrays-hashed-ed88ca5523bad737bbdbee53eaba3ca1'] -df_fingerprints_xy = ['dataframe-943761acaa2ff2060d21ef519c77e1b9', 'dataframe-1c2f7e9c53dbd30220792e425418e343'] +df_fingerprints_xy = ['dataframe-943761acaa2ff2060d21ef519c77e1b9', 'dataframe-1c2f7e9c53dbd30220792e425418e343', 'dataframe-9f81a4cd8df8f65d5d1ee3368ec8e4ba'] def test_df(): @@ -114,7 +114,7 @@ def test_df_different_virtual_columns(): y = x**2 df1 = vaex.from_arrays(x=x, y=y, z=x+y) df1['z'] = df1.x + df1.z - assert df1.fingerprint() in ['dataframe-8f2202e2b4e7845c8ace767db5a49bc4', 'dataframe-b72cf197307aa4b9806e6ce3199b2960'] + assert df1.fingerprint() in ['dataframe-8f2202e2b4e7845c8ace767db5a49bc4', 'dataframe-b72cf197307aa4b9806e6ce3199b2960', 'dataframe-3dccec28d5db6bc592576116ef05d805'] df2 = vaex.from_arrays(x=x, y=y, z=x+y) df2['z'] = df2.x - df2.z assert df2.fingerprint() in ['dataframe-81043a3c5b32eaa4b18bf4a915492e23', 'dataframe-0e9a4e2753715ff592527dcee1f1e8c2'] @@ -137,7 +137,7 @@ def test_df_project(): df_a = df[['x', 'y']] df_b = df[['x', 'y']] assert df_a.fingerprint() == df_b.fingerprint() - assert df_a.fingerprint() in ['dataframe-c13a4ab588272f03855ae5627731f7e5', 'dataframe-d4565ca8187231a051a9ff888ba16e7c'] + assert df_a.fingerprint() in ['dataframe-c13a4ab588272f03855ae5627731f7e5', 'dataframe-d4565ca8187231a051a9ff888ba16e7c', 'dataframe-2029c62052149fa7a811f4bb6170a2b6'] def test_df_selection_references_virtual_column(): From 61f16e3a9605155861cd8b1c111eb6c48748995c Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:33:08 +0200 Subject: [PATCH 7/8] Add more fingerprints --- tests/fingerprint_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/fingerprint_test.py b/tests/fingerprint_test.py index 136da3a7d..b05e221bd 100644 --- a/tests/fingerprint_test.py +++ b/tests/fingerprint_test.py @@ -99,7 +99,7 @@ def test_dataset_arrays(): assert ds.fingerprint in ['dataset-arrays-hashed-88244cf38fe91c6bf435caa6160b089b', 'dataset-arrays-hashed-148c30472b155430f46bfb94d5509cf4', 'dataset-arrays-hashed-ed88ca5523bad737bbdbee53eaba3ca1'] -df_fingerprints_xy = ['dataframe-943761acaa2ff2060d21ef519c77e1b9', 'dataframe-1c2f7e9c53dbd30220792e425418e343', 'dataframe-9f81a4cd8df8f65d5d1ee3368ec8e4ba'] +df_fingerprints_xy = ['dataframe-943761acaa2ff2060d21ef519c77e1b9', 'dataframe-1c2f7e9c53dbd30220792e425418e343', 'dataframe-9f81a4cd8df8f65d5d1ee3368ec8e4ba', 'dataframe-5c6b97012243d0f534be85ed88d4da43', 'dataframe-5c6b97012243d0f534be85ed88d4da43'] def test_df(): @@ -114,10 +114,10 @@ def test_df_different_virtual_columns(): y = x**2 df1 = vaex.from_arrays(x=x, y=y, z=x+y) df1['z'] = df1.x + df1.z - assert df1.fingerprint() in ['dataframe-8f2202e2b4e7845c8ace767db5a49bc4', 'dataframe-b72cf197307aa4b9806e6ce3199b2960', 'dataframe-3dccec28d5db6bc592576116ef05d805'] + assert df1.fingerprint() in ['dataframe-8f2202e2b4e7845c8ace767db5a49bc4', 'dataframe-b72cf197307aa4b9806e6ce3199b2960', 'dataframe-3dccec28d5db6bc592576116ef05d805', 'dataframe-c48e8490b3ddf553a0184967376334c1'] df2 = vaex.from_arrays(x=x, y=y, z=x+y) df2['z'] = df2.x - df2.z - assert df2.fingerprint() in ['dataframe-81043a3c5b32eaa4b18bf4a915492e23', 'dataframe-0e9a4e2753715ff592527dcee1f1e8c2'] + assert df2.fingerprint() in ['dataframe-81043a3c5b32eaa4b18bf4a915492e23', 'dataframe-0e9a4e2753715ff592527dcee1f1e8c2', ''] def test_df_with_dependencies(): @@ -137,7 +137,7 @@ def test_df_project(): df_a = df[['x', 'y']] df_b = df[['x', 'y']] assert df_a.fingerprint() == df_b.fingerprint() - assert df_a.fingerprint() in ['dataframe-c13a4ab588272f03855ae5627731f7e5', 'dataframe-d4565ca8187231a051a9ff888ba16e7c', 'dataframe-2029c62052149fa7a811f4bb6170a2b6'] + assert df_a.fingerprint() in ['dataframe-c13a4ab588272f03855ae5627731f7e5', 'dataframe-d4565ca8187231a051a9ff888ba16e7c', 'dataframe-2029c62052149fa7a811f4bb6170a2b6', 'dataframe-3eff215820906d4a751eddda1a58a1d7'] def test_df_selection_references_virtual_column(): From a275339803c91672c3d52499c9f014ba5f8147b7 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:55:32 +0200 Subject: [PATCH 8/8] Pin dask instead of adding fingerprints --- packages/vaex-core/setup.py | 2 +- tests/fingerprint_test.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/vaex-core/setup.py b/packages/vaex-core/setup.py index c486c4886..83855c33a 100644 --- a/packages/vaex-core/setup.py +++ b/packages/vaex-core/setup.py @@ -29,7 +29,7 @@ "numpy~=1.17", "aplus", "tabulate>=0.8.3", - "dask!=2022.4.0", + "dask!=2022.4.0,<2024.9", # fingerprinting in no longer deterministic as of 2024.9.0 "future>=0.15.2", "pyyaml", "six", diff --git a/tests/fingerprint_test.py b/tests/fingerprint_test.py index b05e221bd..769ac7132 100644 --- a/tests/fingerprint_test.py +++ b/tests/fingerprint_test.py @@ -96,10 +96,10 @@ def test_dataset_arrays(): 'y': '4d48c88e587db8f3855eed9f5d5f51eea769451b7371ecf7bdee4e0258238631', 'z': 'a4cead13bef1fd1ec5974d1a2f5ceffd243a7aa6c6b08b80e09a7454b7d04293' } - assert ds.fingerprint in ['dataset-arrays-hashed-88244cf38fe91c6bf435caa6160b089b', 'dataset-arrays-hashed-148c30472b155430f46bfb94d5509cf4', 'dataset-arrays-hashed-ed88ca5523bad737bbdbee53eaba3ca1'] + assert ds.fingerprint in ['dataset-arrays-hashed-88244cf38fe91c6bf435caa6160b089b', 'dataset-arrays-hashed-148c30472b155430f46bfb94d5509cf4'] -df_fingerprints_xy = ['dataframe-943761acaa2ff2060d21ef519c77e1b9', 'dataframe-1c2f7e9c53dbd30220792e425418e343', 'dataframe-9f81a4cd8df8f65d5d1ee3368ec8e4ba', 'dataframe-5c6b97012243d0f534be85ed88d4da43', 'dataframe-5c6b97012243d0f534be85ed88d4da43'] +df_fingerprints_xy = ['dataframe-943761acaa2ff2060d21ef519c77e1b9', 'dataframe-1c2f7e9c53dbd30220792e425418e343'] def test_df(): @@ -114,10 +114,10 @@ def test_df_different_virtual_columns(): y = x**2 df1 = vaex.from_arrays(x=x, y=y, z=x+y) df1['z'] = df1.x + df1.z - assert df1.fingerprint() in ['dataframe-8f2202e2b4e7845c8ace767db5a49bc4', 'dataframe-b72cf197307aa4b9806e6ce3199b2960', 'dataframe-3dccec28d5db6bc592576116ef05d805', 'dataframe-c48e8490b3ddf553a0184967376334c1'] + assert df1.fingerprint() in ['dataframe-8f2202e2b4e7845c8ace767db5a49bc4', 'dataframe-b72cf197307aa4b9806e6ce3199b2960'] df2 = vaex.from_arrays(x=x, y=y, z=x+y) df2['z'] = df2.x - df2.z - assert df2.fingerprint() in ['dataframe-81043a3c5b32eaa4b18bf4a915492e23', 'dataframe-0e9a4e2753715ff592527dcee1f1e8c2', ''] + assert df2.fingerprint() in ['dataframe-81043a3c5b32eaa4b18bf4a915492e23', 'dataframe-0e9a4e2753715ff592527dcee1f1e8c2'] def test_df_with_dependencies(): @@ -137,7 +137,7 @@ def test_df_project(): df_a = df[['x', 'y']] df_b = df[['x', 'y']] assert df_a.fingerprint() == df_b.fingerprint() - assert df_a.fingerprint() in ['dataframe-c13a4ab588272f03855ae5627731f7e5', 'dataframe-d4565ca8187231a051a9ff888ba16e7c', 'dataframe-2029c62052149fa7a811f4bb6170a2b6', 'dataframe-3eff215820906d4a751eddda1a58a1d7'] + assert df_a.fingerprint() in ['dataframe-c13a4ab588272f03855ae5627731f7e5', 'dataframe-d4565ca8187231a051a9ff888ba16e7c'] def test_df_selection_references_virtual_column():