fix(deps): bump dependencies' lower bounds to reflect tested minimum …

…version (#8977) Bump the lower bound of pandas to 1.5.3 to reflect the actual lower bound that we test against in CI. Lower bound of pyarrow is now 10.0.1 (required by pandas and in practice >7 required for a lot of Ibis functionality) Also bump the upper bound in conda environmentl YAMLs for recent fixes to support pandas 2.2. Closes #8795. --------- Co-authored-by: Gil Forsyth <gil@forsyth.dev>
ibis-project · Apr 16, 2024 · 9c29f28 · 9c29f28
1 parent 88d4b7b
commit 9c29f28
Show file tree

Hide file tree

Showing 14 changed files with 79 additions and 44 deletions.
diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml
@@ -469,21 +469,29 @@ jobs:
           - windows-latest
         python-version:
           - "3.9"
-          - "3.11.8" # https://github.com/dask/dask/issues/11038
+          - "3.11"
         backend:
           - name: dask
             title: Dask
             deps:
-              - "dask[array,dataframe]@2022.9.1"
-              - "pandas@1.5.3"
+              required:
+                - "numpy@1.23.2"
+                - "pyarrow@10.0.1"
+              optional:
+                - "dask[array,dataframe]@2022.9.1"
+                - "pandas@1.5.3"
             extras:
               - dask
           - name: postgres
             title: PostgreSQL
             deps:
-              - "psycopg2@2.8.4"
-              - "geopandas@0.6"
-              - "Shapely@2"
+              required:
+                - "numpy@1.23.2"
+                - "pyarrow@10.0.1"
+              optional:
+                - "psycopg2@2.8.4"
+                - "geopandas@0.6"
+                - "Shapely@2"
             services:
               - postgres
             extras:
@@ -495,27 +503,48 @@ jobs:
               name: postgres
               title: PostgreSQL
               deps:
-                - "psycopg2@2.8.4"
-                - "geopandas@0.6"
-                - "Shapely@2"
+                required:
+                  - "numpy@1.23.2"
+                  - "pyarrow@10.0.1"
+                optional:
+                  - "psycopg2@2.8.4"
+                  - "geopandas@0.6"
+                  - "Shapely@2"
               services:
                 - postgres
               extras:
                 - postgres
                 - geospatial
-          - python-version: "3.11.8"
+          - python-version: "3.11"
             backend:
               name: postgres
               title: PostgreSQL
               deps:
-                - "psycopg2@2.8.4"
-                - "geopandas@0.6"
-                - "Shapely@2"
+                required:
+                  - "numpy@1.23.2"
+                  - "pyarrow@10.0.1"
+                optional:
+                  - "psycopg2@2.8.4"
+                  - "geopandas@0.6"
+                  - "Shapely@2"
               services:
                 - postgres
               extras:
                 - postgres
                 - geospatial
+          - python-version: "3.11"
+            backend:
+              name: dask
+              title: Dask
+              deps:
+                required:
+                  - "numpy@1.23.2"
+                  - "pyarrow@10.0.1"
+                optional:
+                  - "dask[array,dataframe]@2022.9.1"
+                  - "pandas@1.5.3"
+              extras:
+                - dask
     steps:
       - name: checkout
         uses: actions/checkout@v4
@@ -546,12 +575,15 @@ jobs:
       - name: install poetry
         run: python -m pip install --upgrade pip 'poetry==1.8.2'
 
-      - name: remove lonboard
+      - name: remove incompatible deps
         # it requires a version of pandas that min versions are not compatible with
-        run: poetry remove lonboard
+        run: poetry remove lonboard deltalake
+
+      - name: install minimum versions of required deps
+        run: poetry add --lock ${{ join(matrix.backend.deps.required, ' ') }} --python="==${{ steps.install_python.outputs.python-version }}"
 
-      - name: install minimum versions
-        run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }}
+      - name: install minimum versions of optional deps
+        run: poetry add --lock --optional ${{ join(matrix.backend.deps.optional, ' ') }} --python="==${{ steps.install_python.outputs.python-version }}"
 
       - name: checkout the lock file
         run: git checkout poetry.lock
@@ -674,7 +706,7 @@ jobs:
     # this job exists so that we can use a single job from this workflow to gate merging
     runs-on: ubuntu-latest
     needs:
-      # - test_backends_min_version
+      - test_backends_min_version
       - test_backends
       - test_pyspark
     steps:

diff --git a/conda/environment-arm64-flink.yml b/conda/environment-arm64-flink.yml
@@ -19,10 +19,10 @@ dependencies:
   - google-cloud-bigquery >=3,<4
   - google-cloud-bigquery-storage >=2,<3
   - impyla >=0.17
-  - numpy >=1.15,<2
+  - numpy >=1.23.2,<2
   - oracledb >=1.3.1
   - packaging >=21.3
-  - pandas >=1.2.5,<2.2
+  - pandas >=1.5.3,<3
   - parsy >=2
   - pins >=0.8.2
   - poetry-core >=1.0.0

diff --git a/conda/environment-arm64.yml b/conda/environment-arm64.yml
@@ -19,17 +19,17 @@ dependencies:
   - google-cloud-bigquery >=3,<4
   - google-cloud-bigquery-storage >=2,<3
   - impyla >=0.17
-  - numpy >=1.15,<2
+  - numpy >=1.23.2,<2
   - oracledb >=1.3.1
   - packaging >=21.3
-  - pandas >=1.2.5,<2.2
+  - pandas >=1.5.3,<3
   - parsy >=2
   - pins >=0.8.2
   - poetry-core >=1.0.0
   - poetry-dynamic-versioning >=0.18.0
   - polars >=0.20.17
   - psycopg2 >=2.8.4
-  - pyarrow >=2
+  - pyarrow >=10.0.1
   - pyarrow-tests
   - pyarrow-hotfix >=0.4
   - pydata-google-auth

diff --git a/conda/environment.yml b/conda/environment.yml
@@ -19,18 +19,18 @@ dependencies:
   - google-cloud-bigquery >=3,<4
   - google-cloud-bigquery-storage >=2,<3
   - impyla >=0.17
-  - numpy >=1.15,<2
+  - numpy >=1.23.2,<2
   - oracledb >=1.3.1
   - packaging >=21.3
-  - pandas >=1.2.5,<2.2
+  - pandas >=1.5.3,<3
   - parsy >=2
   - pins >=0.8.2
   - pip
   - poetry-core >=1.0.0
   - poetry-dynamic-versioning >=0.18.0
   - polars >=0.20.17
   - psycopg2 >=2.8.4
-  - pyarrow >=2
+  - pyarrow >=10.0.1
   - pyarrow-hotfix >=0.4
   - pydata-google-auth
   - pydruid >=0.6.5

diff --git a/ibis/backends/bigquery/__init__.py b/ibis/backends/bigquery/__init__.py
@@ -831,7 +831,7 @@ def to_pyarrow_batches(
             ),
             chunk_size=chunk_size,
         )
-        return pa.RecordBatchReader.from_batches(schema.to_pyarrow(), batch_iter)
+        return pa.ipc.RecordBatchReader.from_batches(schema.to_pyarrow(), batch_iter)
 
     def _gen_udf_name(self, name: str, schema: Optional[str]) -> str:
         func = ".".join(filter(None, (schema, name)))

diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py
@@ -363,7 +363,9 @@ def batcher(sql: str, *, schema: pa.Schema) -> Iterator[pa.RecordBatch]:
 
         self._log(sql)
         schema = table.schema().to_pyarrow()
-        return pa.RecordBatchReader.from_batches(schema, batcher(sql, schema=schema))
+        return pa.ipc.RecordBatchReader.from_batches(
+            schema, batcher(sql, schema=schema)
+        )
 
     def execute(
         self,

diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py
@@ -828,7 +828,7 @@ def _read_parquet_pyarrow_dataset(
 
     def read_in_memory(
         self,
-        source: pd.DataFrame | pa.Table | pa.RecordBatchReader,
+        source: pd.DataFrame | pa.Table | pa.ipc.RecordBatchReader,
         table_name: str | None = None,
     ) -> ir.Table:
         """Register a Pandas DataFrame or pyarrow object as a table in the current database.
@@ -850,7 +850,7 @@ def read_in_memory(
         table_name = table_name or util.gen_name("read_in_memory")
         self.con.register(table_name, source)
 
-        if isinstance(source, pa.RecordBatchReader):
+        if isinstance(source, pa.ipc.RecordBatchReader):
             # Ensure the reader isn't marked as started, in case the name is
             # being overwritten.
             self._record_batch_readers_consumed[table_name] = False
@@ -1288,7 +1288,7 @@ def to_pyarrow_batches(
         limit: int | str | None = None,
         chunk_size: int = 1_000_000,
         **_: Any,
-    ) -> pa.RecordBatchReader:
+    ) -> pa.ipc.RecordBatchReader:
         """Return a stream of record batches.
 
         The returned `RecordBatchReader` contains a cursor with an unbounded lifetime.
@@ -1318,7 +1318,7 @@ def batch_producer(cur):
             yield from cur.fetch_record_batch(rows_per_batch=chunk_size)
 
         result = self.raw_sql(sql)
-        return pa.RecordBatchReader.from_batches(
+        return pa.ipc.RecordBatchReader.from_batches(
             expr.as_table().schema().to_pyarrow(), batch_producer(result)
         )
 

diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py
@@ -1028,6 +1028,6 @@ def _from_pyflink_table_to_pyarrow_batches(
             arrow_schema, pyflink_schema.to_row_data_type(), timezone
         )
 
-        return pa.RecordBatchReader.from_batches(
+        return pa.ipc.RecordBatchReader.from_batches(
             arrow_schema, serializer.load_from_iterator(batches_iterator)
         )
diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py
@@ -1174,7 +1174,7 @@ def to_pyarrow_batches(
         pa_table = self.to_pyarrow(
             expr.as_table(), params=params, limit=limit, **kwargs
         )
-        return pa.RecordBatchReader.from_batches(
+        return pa.ipc.RecordBatchReader.from_batches(
             pa_table.schema, pa_table.to_batches(max_chunksize=chunk_size)
         )
 

diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py
@@ -310,7 +310,7 @@ def to_pyarrow_batches(
         pa_table = self.to_pyarrow(
             expr.as_table(), params=params, limit=limit, **kwargs
         )
-        return pa.RecordBatchReader.from_batches(
+        return pa.ipc.RecordBatchReader.from_batches(
             pa_table.schema, pa_table.to_batches(max_chunksize=chunk_size)
         )
 

diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py
@@ -809,6 +809,6 @@ def to_pyarrow_batches(
         pa_table = self.to_pyarrow(
             expr.as_table(), params=params, limit=limit, **kwargs
         )
-        return pa.RecordBatchReader.from_batches(
+        return pa.ipc.RecordBatchReader.from_batches(
             pa_table.schema, pa_table.to_batches(max_chunksize=chunk_size)
         )
diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py
@@ -499,7 +499,7 @@ def to_pyarrow_batches(
         sql = self.compile(expr, limit=limit, params=params, **kwargs)
         target_schema = expr.as_table().schema().to_pyarrow()
 
-        return pa.RecordBatchReader.from_batches(
+        return pa.ipc.RecordBatchReader.from_batches(
             target_schema,
             self._make_batch_iter(
                 sql, target_schema=target_schema, chunk_size=chunk_size

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -39,10 +39,10 @@ classifiers = [
 python = "^3.9"
 atpublic = ">=2.3,<5"
 bidict = ">=0.22.1,<1"
-numpy = ">=1,<2"
-pandas = ">=1.2.5,<3"
+numpy = ">=1.23.2,<2"
+pandas = ">=1.5.3,<3"
 parsy = ">=2,<3"
-pyarrow = ">=2,<16"
+pyarrow = ">=10.0.1,<16"
 pyarrow-hotfix = ">=0.4,<1"
 python-dateutil = ">=2.8.2,<3"
 pytz = ">=2022.7"