From 00c487cae095e1ada86e3069ae59bbbae63fa76c Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 1 Oct 2023 09:40:29 +0200 Subject: [PATCH 01/15] Add support for fids filter with use_arrow=True --- pyogrio/_io.pyx | 17 +++++++++++++++-- pyogrio/tests/test_geopandas_io.py | 8 +++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 71e389b5..cd789f80 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1253,7 +1253,11 @@ def ogr_open_arrow( raise ValueError("forcing 2D is not supported for Arrow") if fids is not None: - raise ValueError("reading by FID is not supported for Arrow") + if where is not None or bbox is not None or mask is not None or sql is not None or skip_features or max_features: + raise ValueError( + "cannot set both 'fids' and any of 'where', 'bbox', 'mask', " + "'sql', 'skip_features' or 'max_features'" + ) IF CTE_GDAL_VERSION < (3, 8, 0): if skip_features: @@ -1315,10 +1319,19 @@ def ogr_open_arrow( geometry_name = get_string(OGR_L_GetGeometryColumn(ogr_layer)) fid_column = get_string(OGR_L_GetFIDColumn(ogr_layer)) + fid_column_where = fid_column # OGR_L_GetFIDColumn returns the column name if it is a custom column, - # or "" if not. For arrow, the default column name is "OGC_FID". + # or "" if not. For arrow, the default column name used to return the FID data + # read is "OGC_FID". When accessing the underlying datasource like when using a + # where clause, the default column name is "FID". if fid_column == "": fid_column = "OGC_FID" + fid_column_where = "FID" + + # Use fids list to create a where clause, as arrow doesn't support direct fid + # filtering. + if fids is not None: + where = f"{fid_column_where} IN ({','.join(fids.astype(str))})" # Apply the attribute filter if where is not None and where != "": diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index fc7cf16e..a1378b71 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -408,10 +408,12 @@ def test_read_mask_where(naturalearth_lowres_all_ext, use_arrow): assert np.array_equal(df.iso_a3, ["CAN"]) -def test_read_fids(naturalearth_lowres_all_ext): +def test_read_fids(naturalearth_lowres_all_ext, use_arrow): # ensure keyword is properly passed through - fids = np.array([1, 10, 5], dtype=np.int64) - df = read_dataframe(naturalearth_lowres_all_ext, fids=fids, fid_as_index=True) + fids = np.array([1, 5, 10], dtype=np.int64) + df = read_dataframe( + naturalearth_lowres_all_ext, fids=fids, fid_as_index=True, use_arrow=use_arrow + ) assert len(df) == 3 assert np.array_equal(fids, df.index.values) From 9315845ef6f4f22fa5235135e688d065b16543f9 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 1 Oct 2023 09:56:04 +0200 Subject: [PATCH 02/15] Update CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 409db072..6957bbc5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -23,6 +23,7 @@ intersect the bounding box of the mask when using the Arrow interface for some drivers; this has been fixed in GDAL 3.8.0. - Removed warning when no features are read from the data source (#299). +- Add support for `fids` filter with `use_arrow=True` (#304) ### Other changes From d9cfebd8b58fc4a38fb5e7c1807f86298069ec61 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Tue, 3 Oct 2023 15:10:50 +0200 Subject: [PATCH 03/15] Add to docstring that the order is undefined --- pyogrio/geopandas.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index 522d574f..18861e25 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -115,7 +115,9 @@ def read_dataframe( the starting index is driver and file specific (e.g. typically 0 for Shapefile and 1 for GeoPackage, but can still depend on the specific file). The performance of reading a large number of features usings FIDs - is also driver specific. + is also driver specific. The order of the rows returned is undefined. If you + would like to sort based on FID, use ``fid_as_index=True`` to have the index of + the GeoDataFrame returned set to the FIDs of the features read. sql : str, optional (default: None) The SQL statement to execute. Look at the sql_dialect parameter for more information on the syntax to use for the query. When combined with other From db573d308307119cd28715855a49f613385df6e2 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Fri, 13 Oct 2023 01:38:16 +0200 Subject: [PATCH 04/15] use_arrow can also impact performance --- pyogrio/geopandas.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index 5047046a..86eede18 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -116,10 +116,11 @@ def read_dataframe( ``max_features``, ``where``, ``bbox``, ``mask``, or ``sql``). Note that the starting index is driver and file specific (e.g. typically 0 for Shapefile and 1 for GeoPackage, but can still depend on the specific - file). The performance of reading a large number of features usings FIDs - is also driver specific. The order of the rows returned is undefined. If you - would like to sort based on FID, use ``fid_as_index=True`` to have the index of - the GeoDataFrame returned set to the FIDs of the features read. + file). The performance of reading (a large number of) features usings FIDs + is also driver specific and dependent on the value of ``use_arrow``. The order + of the rows returned is undefined. If you would like to sort based on FID, use + ``fid_as_index=True`` to have the index of the GeoDataFrame returned set to the + FIDs of the features read. sql : str, optional (default: None) The SQL statement to execute. Look at the sql_dialect parameter for more information on the syntax to use for the query. When combined with other From 6f782e18cfc93cc3276ce05d7b7731974b506e83 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 2 Dec 2023 23:36:27 +0100 Subject: [PATCH 05/15] Make sure fids can be arraylike --- pyogrio/_io.pyx | 3 ++- pyogrio/tests/test_geopandas_io.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index fb717f7a..25bd6f44 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1362,7 +1362,8 @@ def ogr_open_arrow( # Use fids list to create a where clause, as arrow doesn't support direct fid # filtering. if fids is not None: - where = f"{fid_column_where} IN ({','.join(fids.astype(str))})" + fids_str = ",".join([str(fid) for fid in fids]) + where = f"{fid_column_where} IN ({fids_str})" # Apply the attribute filter if where is not None and where != "": diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 2d1b839a..7bf77506 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -473,9 +473,9 @@ def test_read_mask_where(naturalearth_lowres_all_ext, use_arrow): assert np.array_equal(df.iso_a3, ["CAN"]) -def test_read_fids(naturalearth_lowres_all_ext, use_arrow): +@pytest.mark.parametrize("fids", [[1, 5, 10], np.array([1, 5, 10], dtype=np.int64)]) +def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow): # ensure keyword is properly passed through - fids = np.array([1, 5, 10], dtype=np.int64) df = read_dataframe( naturalearth_lowres_all_ext, fids=fids, fid_as_index=True, use_arrow=use_arrow ) From 4a2cc94d25eab45429f4ec0620e72a150bcb69fd Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 3 Dec 2023 01:32:00 +0100 Subject: [PATCH 06/15] Raise explicit error if too many fids asked --- pyogrio/_io.pyx | 11 ++++++++++- pyogrio/tests/test_geopandas_io.py | 9 +++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 25bd6f44..f404df49 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1367,7 +1367,16 @@ def ogr_open_arrow( # Apply the attribute filter if where is not None and where != "": - apply_where_filter(ogr_layer, where) + try: + apply_where_filter(ogr_layer, where) + except ValueError as ex: + if fids is not None and str(ex).startswith("Invalid SQL query"): + raise ValueError( + f"error applying filter for {len(fids)} fids. Max. number for " + f"drivers with default SQL dialect 'OGRSQL' is 4997: {ex}" + ) + + raise # Apply the spatial filter if bbox is not None: diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 7bf77506..db279ddb 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -483,6 +483,15 @@ def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow): assert np.array_equal(fids, df.index.values) +def test_read_fids_arrow_many_error(naturalearth_lowres): + # Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based + # formats like Geopackage, there is no limit. + nb_fids = 4998 + fids = range(nb_fids) + with pytest.raises(ValueError, match=f"error applying filter for {nb_fids} fids"): + _ = read_dataframe(naturalearth_lowres, fids=fids, use_arrow=True) + + def test_read_fids_force_2d(test_fgdb_vsi): with pytest.warns( UserWarning, match=r"Measured \(M\) geometry types are not supported" From 49e478b04f75228e309e1033f262e015565fbd1d Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 3 Dec 2023 01:38:23 +0100 Subject: [PATCH 07/15] Add @requires_arrow_api to test --- pyogrio/tests/test_geopandas_io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index db279ddb..8125b833 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -483,6 +483,7 @@ def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow): assert np.array_equal(fids, df.index.values) +@requires_arrow_api def test_read_fids_arrow_many_error(naturalearth_lowres): # Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based # formats like Geopackage, there is no limit. From a28ac9cbbd7afdd3a3ea275149e4ea1a143ac864 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sun, 3 Dec 2023 02:12:17 +0100 Subject: [PATCH 08/15] Improve doc --- pyogrio/geopandas.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index 42b50e9e..7d3bb436 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -155,7 +155,8 @@ def read_dataframe( is also driver specific and dependent on the value of ``use_arrow``. The order of the rows returned is undefined. If you would like to sort based on FID, use ``fid_as_index=True`` to have the index of the GeoDataFrame returned set to the - FIDs of the features read. + FIDs of the features read. If ``use_arrow=True``, the number of FIDs is limited + to 4997 for drivers with 'OGRSQL' as default SQL dialect. sql : str, optional (default: None) The SQL statement to execute. Look at the sql_dialect parameter for more information on the syntax to use for the query. When combined with other @@ -340,7 +341,7 @@ def write_dataframe( in the output file. path : str path to file - layer :str, optional (default: None) + layer : str, optional (default: None) layer name driver : string, optional (default: None) The OGR format driver used to write the vector file. By default write_dataframe @@ -540,7 +541,7 @@ def write_dataframe( # if possible use EPSG codes instead epsg = geometry.crs.to_epsg() if epsg: - crs = f"EPSG:{epsg}" + crs = f"EPSG:{epsg}" # noqa: E231 else: crs = geometry.crs.to_wkt(WktVersion.WKT1_GDAL) From 34250e2c644ace71a784a542dd73d8083c989ab6 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 27 Jan 2024 10:15:47 +0100 Subject: [PATCH 09/15] Cleanup the exception being raised. --- pyogrio/_io.pyx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index f404df49..9e6726c7 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1371,10 +1371,12 @@ def ogr_open_arrow( apply_where_filter(ogr_layer, where) except ValueError as ex: if fids is not None and str(ex).startswith("Invalid SQL query"): + # If fids is not None, the where being applied is the one formatted + # above. raise ValueError( - f"error applying filter for {len(fids)} fids. Max. number for " - f"drivers with default SQL dialect 'OGRSQL' is 4997: {ex}" - ) + f"error applying filter for {len(fids)} fids; max. number for " + f"drivers with default SQL dialect 'OGRSQL' is 4997" + ) from ex raise From 70da3f9d6ed03d689b8bba7314c8c0c5486d8b01 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Fri, 5 Apr 2024 10:12:32 +0200 Subject: [PATCH 10/15] Move change description in changelog from 0.7 to 0.8 --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index d232f193..25999f7a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,7 @@ ### Improvements +- Add support for `fids` filter with `use_arrow=True` (#304). - `read_arrow` and `open_arrow` now provide [GeoArrow-compliant extension metadata](https://geoarrow.org/extension-types.html), including the CRS, when using GDAL 3.8 or higher (#366). @@ -62,7 +63,6 @@ intersect the bounding box of the mask when using the Arrow interface for some drivers; this has been fixed in GDAL 3.8.0. - Removed warning when no features are read from the data source (#299). -- Add support for `fids` filter with `use_arrow=True` (#304) - Add support for `force_2d=True` with `use_arrow=True` in `read_dataframe` (#300). ### Other changes From 9ce488ae7ccf5187bb82450f543b04ab42bf3d54 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Wed, 10 Apr 2024 20:22:56 +0200 Subject: [PATCH 11/15] Apply feedback on PR --- CHANGES.md | 3 ++- pyogrio/_io.pyx | 2 +- pyogrio/geopandas.py | 7 ++++--- pyogrio/tests/test_geopandas_io.py | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ccec0fb9..ec998670 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,7 +4,8 @@ ### Improvements -- Add support for `fids` filter with `use_arrow=True` (#304). +- Add support for `fids` filter to `read_arrow` and `open_arrow`, and to + `read_dataframe` with `use_arrow=True` (#304). - Add some missing properties to `read_info`, including layer name, geometry name and FID column name (#365). - `read_arrow` and `open_arrow` now provide diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index fd20ef15..fd333132 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1302,7 +1302,7 @@ def ogr_open_arrow( if where is not None or bbox is not None or mask is not None or sql is not None or skip_features or max_features: raise ValueError( "cannot set both 'fids' and any of 'where', 'bbox', 'mask', " - "'sql', 'skip_features' or 'max_features'" + "'sql', 'skip_features', or 'max_features'" ) IF CTE_GDAL_VERSION < (3, 8, 0): diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index bdbd6977..f1342abe 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -156,12 +156,13 @@ def read_dataframe( ``max_features``, ``where``, ``bbox``, ``mask``, or ``sql``). Note that the starting index is driver and file specific (e.g. typically 0 for Shapefile and 1 for GeoPackage, but can still depend on the specific - file). The performance of reading (a large number of) features usings FIDs - is also driver specific and dependent on the value of ``use_arrow``. The order + file). The performance of reading a large number of features usings FIDs + is also driver specific and depends on the value of ``use_arrow``. The order of the rows returned is undefined. If you would like to sort based on FID, use ``fid_as_index=True`` to have the index of the GeoDataFrame returned set to the FIDs of the features read. If ``use_arrow=True``, the number of FIDs is limited - to 4997 for drivers with 'OGRSQL' as default SQL dialect. + to 4997 for drivers with 'OGRSQL' as default SQL dialect. To read a larger + number of FIDs, set ``user_arrow=False``. sql : str, optional (default: None) The SQL statement to execute. Look at the sql_dialect parameter for more information on the syntax to use for the query. When combined with other diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 1897cdc8..fdaac14e 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -514,7 +514,7 @@ def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow): @requires_arrow_api -def test_read_fids_arrow_many_error(naturalearth_lowres): +def test_read_fids_arrow_max_exception(naturalearth_lowres): # Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based # formats like Geopackage, there is no limit. nb_fids = 4998 From 1e0c24964207041dfa5454a839a6ace3f218cb0e Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Fri, 12 Apr 2024 00:43:23 +0200 Subject: [PATCH 12/15] Add warning for old GDAL versions + test --- pyogrio/_io.pyx | 10 ++++++++++ pyogrio/tests/test_geopandas_io.py | 15 +++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index fd333132..b4ba6e52 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1389,6 +1389,16 @@ def ogr_open_arrow( # Use fids list to create a where clause, as arrow doesn't support direct fid # filtering. if fids is not None: + IF CTE_GDAL_VERSION < (3, 8, 0): + driver = get_driver(ogr_dataset) + if driver not in {"GPKG", "GeoJSON"}: + warnings.warn( + "Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow " + "for some drivers. Upgrading GDAL or using 'use_arrow=False' " + "can avoid this.", + stacklevel=2, + ) + fids_str = ",".join([str(fid) for fid in fids]) where = f"{fid_column_where} IN ({fids_str})" diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index fdaac14e..c897dbab 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -536,6 +536,21 @@ def test_read_fids_force_2d(test_fgdb_vsi): assert not df.iloc[0].geometry.has_z +@pytest.mark.parametrize("old_gdal", [__gdal_version__ < (3, 8, 0)]) +def test_read_fids_warning_old_gdal(naturalearth_lowres_all_ext, old_gdal): + if old_gdal: + handler = pytest.warns( + UserWarning, + match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow", + ) + else: + handler = contextlib.nullcontext() + + with handler: + df = read_dataframe(naturalearth_lowres_all_ext, fids=[22], use_arrow=True) + assert len(df) == 1 + + @pytest.mark.parametrize("skip_features", [10, 200]) def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_features): ext = naturalearth_lowres_all_ext.suffix From 602e30f17031efba47efae8f8b054a36ccb284a0 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Fri, 12 Apr 2024 01:24:53 +0200 Subject: [PATCH 13/15] Fix + improve test --- pyogrio/tests/test_geopandas_io.py | 32 ++++++++++++++++-------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index c897dbab..29614322 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -523,22 +523,11 @@ def test_read_fids_arrow_max_exception(naturalearth_lowres): _ = read_dataframe(naturalearth_lowres, fids=fids, use_arrow=True) -def test_read_fids_force_2d(test_fgdb_vsi): - with pytest.warns( - UserWarning, match=r"Measured \(M\) geometry types are not supported" - ): - df = read_dataframe(test_fgdb_vsi, layer="test_lines", fids=[22]) - assert len(df) == 1 - assert df.iloc[0].geometry.has_z - - df = read_dataframe(test_fgdb_vsi, layer="test_lines", force_2d=True, fids=[22]) - assert len(df) == 1 - assert not df.iloc[0].geometry.has_z - - +@requires_arrow_api @pytest.mark.parametrize("old_gdal", [__gdal_version__ < (3, 8, 0)]) -def test_read_fids_warning_old_gdal(naturalearth_lowres_all_ext, old_gdal): - if old_gdal: +def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext, old_gdal): + # A warning should be given for old GDAL versions, except for some file formats. + if old_gdal and naturalearth_lowres_all_ext not in [".gpkg", ".geojson"]: handler = pytest.warns( UserWarning, match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow", @@ -551,6 +540,19 @@ def test_read_fids_warning_old_gdal(naturalearth_lowres_all_ext, old_gdal): assert len(df) == 1 +def test_read_fids_force_2d(test_fgdb_vsi): + with pytest.warns( + UserWarning, match=r"Measured \(M\) geometry types are not supported" + ): + df = read_dataframe(test_fgdb_vsi, layer="test_lines", fids=[22]) + assert len(df) == 1 + assert df.iloc[0].geometry.has_z + + df = read_dataframe(test_fgdb_vsi, layer="test_lines", force_2d=True, fids=[22]) + assert len(df) == 1 + assert not df.iloc[0].geometry.has_z + + @pytest.mark.parametrize("skip_features", [10, 200]) def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_features): ext = naturalearth_lowres_all_ext.suffix From b437caff9d1a03332b44ce038f09bd6560b86e4b Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Fri, 12 Apr 2024 01:48:56 +0200 Subject: [PATCH 14/15] Fix test --- pyogrio/tests/test_geopandas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 29614322..229287ce 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -527,7 +527,7 @@ def test_read_fids_arrow_max_exception(naturalearth_lowres): @pytest.mark.parametrize("old_gdal", [__gdal_version__ < (3, 8, 0)]) def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext, old_gdal): # A warning should be given for old GDAL versions, except for some file formats. - if old_gdal and naturalearth_lowres_all_ext not in [".gpkg", ".geojson"]: + if old_gdal and naturalearth_lowres_all_ext.suffix not in [".gpkg", ".geojson"]: handler = pytest.warns( UserWarning, match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow", From 599b0684b333076926492e97f1eb719d9e029236 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Wed, 17 Apr 2024 21:21:40 +0200 Subject: [PATCH 15/15] Applied feedback --- pyogrio/tests/test_geopandas_io.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index ae7be32f..4bc90dd9 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -528,7 +528,7 @@ def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow): assert np.array_equal(fids, df.index.values) -@requires_arrow_api +@requires_pyarrow_api def test_read_fids_arrow_max_exception(naturalearth_lowres): # Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based # formats like Geopackage, there is no limit. @@ -538,11 +538,13 @@ def test_read_fids_arrow_max_exception(naturalearth_lowres): _ = read_dataframe(naturalearth_lowres, fids=fids, use_arrow=True) -@requires_arrow_api -@pytest.mark.parametrize("old_gdal", [__gdal_version__ < (3, 8, 0)]) -def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext, old_gdal): +@requires_pyarrow_api +@pytest.mark.skipif( + __gdal_version__ >= (3, 8, 0), reason="GDAL >= 3.8.0 does not need to warn" +) +def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext): # A warning should be given for old GDAL versions, except for some file formats. - if old_gdal and naturalearth_lowres_all_ext.suffix not in [".gpkg", ".geojson"]: + if naturalearth_lowres_all_ext.suffix not in [".gpkg", ".geojson"]: handler = pytest.warns( UserWarning, match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow",