Skip to content

Commit

Permalink
Fixes an error when reading GPKG with bbox filter + that no error is …
Browse files Browse the repository at this point in the history
…raised for an invalid where clause for GPKG (#150)
  • Loading branch information
theroggy authored Sep 21, 2022
1 parent acf500c commit b60dbd5
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 24 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

- register GDAL drivers during initial import of pyogrio (#145)
- support writing "not a time" (NaT) values in a datetime column (#146)
- fixes an error when reading GPKG with bbox filter (#150)
- properly raises error when invalid where clause is used on a GPKG (#150)
- avoid duplicate count of available features (#151)

## 0.4.1
Expand Down
58 changes: 53 additions & 5 deletions pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -638,17 +638,33 @@ cdef get_features(
]

field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
i = 0
while True:
if num_features > 0 and i == num_features:
break

for i in range(num_features):
try:
ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))

except NullPointerError:
raise FeatureError(f"Could not read feature at index {i}") from None
# No more rows available, so stop reading
break

except CPLE_BaseError as exc:
if "failed to prepare SQL" in str(exc):
raise ValueError(f"Invalid SQL query") from exc

raise FeatureError(str(exc))

if i >= num_features:
raise FeatureError(
"GDAL returned more records than expected based on the count of "
"records that may meet your combination of filters against this "
"dataset. Please open an issue on Github "
"(https://github.com/geopandas/pyogrio/issues) to report encountering "
"this error."
) from None

if return_fids:
fid_view[i] = OGR_F_GetFID(ogr_feature)

Expand All @@ -659,6 +675,18 @@ cdef get_features(
ogr_feature, i, n_fields, field_data, field_data_view,
field_indexes, field_ogr_types, encoding
)
i += 1

# There may be fewer rows available than expected from OGR_L_GetFeatureCount,
# such as features with bounding boxes that intersect the bbox
# but do not themselves intersect the bbox.
# Empty rows are dropped.
if i < num_features:
if return_fids:
fid_data = fid_data[:i]
if read_geometry:
geometries = geometries[:i]
field_data = [data_field[:i] for data_field in field_data]

return fid_data, geometries, field_data

Expand Down Expand Up @@ -749,15 +777,28 @@ cdef get_bounds(
bounds_data = np.empty(shape=(4, num_features), dtype='float64')
bounds_view = bounds_data[:]

for i in range(num_features):
i = 0
while True:
if num_features > 0 and i == num_features:
break

try:
ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))

except NullPointerError:
raise FeatureError(f"Could not read feature at index {i}") from None
# No more rows available, so stop reading
break

except CPLE_BaseError as exc:
raise FeatureError(str(exc))
if "failed to prepare SQL" in str(exc):
raise ValueError(f"Invalid SQL query") from exc
else:
raise FeatureError(str(exc))

if i >= num_features:
raise FeatureError(
"Reading more features than indicated by OGR_L_GetFeatureCount is not supported"
) from None

fid_view[i] = OGR_F_GetFID(ogr_feature)

Expand All @@ -773,6 +814,13 @@ cdef get_bounds(
bounds_view[2, i] = ogr_envelope.MaxX
bounds_view[3, i] = ogr_envelope.MaxY

i += 1

# Less rows read than anticipated, so drop empty rows
if i < num_features:
fid_data = fid_data[:i]
bounds_data = bounds_data[:, :i]

return fid_data, bounds_data


Expand Down
19 changes: 13 additions & 6 deletions pyogrio/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ def test_read_bounds_skip_features(naturalearth_lowres):
assert fids[0] == 10


def test_read_bounds_where_invalid(naturalearth_lowres):
def test_read_bounds_where_invalid(naturalearth_lowres_all_ext):
with pytest.raises(ValueError, match="Invalid SQL"):
read_bounds(naturalearth_lowres, where="invalid")
read_bounds(naturalearth_lowres_all_ext, where="invalid")


def test_read_bounds_where(naturalearth_lowres):
Expand All @@ -128,18 +128,25 @@ def test_read_bounds_bbox_invalid(naturalearth_lowres, bbox):
read_bounds(naturalearth_lowres, bbox=bbox)


def test_read_bounds_bbox(naturalearth_lowres):
def test_read_bounds_bbox(naturalearth_lowres_all_ext):
# should return no features
with pytest.warns(UserWarning, match="does not have any features to read"):
fids, bounds = read_bounds(naturalearth_lowres, bbox=(0, 0, 0.00001, 0.00001))
fids, bounds = read_bounds(
naturalearth_lowres_all_ext, bbox=(0, 0, 0.00001, 0.00001)
)

assert fids.shape == (0,)
assert bounds.shape == (4, 0)

fids, bounds = read_bounds(naturalearth_lowres, bbox=(-140, 20, -100, 40))
fids, bounds = read_bounds(naturalearth_lowres_all_ext, bbox=(-140, 20, -100, 45))

assert fids.shape == (2,)
assert array_equal(fids, [4, 27]) # USA, MEX
if naturalearth_lowres_all_ext.suffix == ".gpkg":
# fid in gpkg is 1-based
assert array_equal(fids, [5, 28]) # USA, MEX
else:
# fid in other formats is 0-based
assert array_equal(fids, [4, 27]) # USA, MEX

assert bounds.shape == (4, 2)
assert allclose(
Expand Down
13 changes: 4 additions & 9 deletions pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,13 +179,8 @@ def test_read_where(naturalearth_lowres_all_ext):

@pytest.mark.filterwarnings("ignore:.*Layer .* does not have any features to read")
def test_read_where_invalid(naturalearth_lowres_all_ext):
if naturalearth_lowres_all_ext.suffix in [".gpkg"]:
# Geopackage doesn't raise, but returns empty df?
gdf = read_dataframe(naturalearth_lowres_all_ext, where="invalid")
assert len(gdf) == 0
else:
with pytest.raises(ValueError, match="Invalid SQL"):
read_dataframe(naturalearth_lowres_all_ext, where="invalid")
with pytest.raises(ValueError, match="Invalid SQL"):
read_dataframe(naturalearth_lowres_all_ext, where="invalid")


@pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
Expand All @@ -200,7 +195,7 @@ def test_read_bbox(naturalearth_lowres_all_ext):
df = read_dataframe(naturalearth_lowres_all_ext, bbox=(0, 0, 0.00001, 0.00001))
assert len(df) == 0

df = read_dataframe(naturalearth_lowres_all_ext, bbox=(-140, 20, -100, 40))
df = read_dataframe(naturalearth_lowres_all_ext, bbox=(-140, 20, -100, 45))
assert len(df) == 2
assert np.array_equal(df.iso_a3, ["USA", "MEX"])

Expand Down Expand Up @@ -323,7 +318,7 @@ def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext):
sql_dialect="OGRSQL",
columns=["iso_a3_renamed", "name"],
where="iso_a3_renamed IN ('CAN', 'USA', 'MEX')",
bbox=(-140, 20, -100, 40),
bbox=(-140, 20, -100, 45),
)
assert len(df.columns) == 3
assert len(df) == 2
Expand Down
10 changes: 6 additions & 4 deletions pyogrio/tests/test_raw_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,16 @@ def test_read_bbox_invalid(naturalearth_lowres, bbox):
read(naturalearth_lowres, bbox=bbox)


def test_read_bbox(naturalearth_lowres):
def test_read_bbox(naturalearth_lowres_all_ext):
# should return no features
with pytest.warns(UserWarning, match="does not have any features to read"):
geometry, fields = read(naturalearth_lowres, bbox=(0, 0, 0.00001, 0.00001))[2:]
geometry, fields = read(
naturalearth_lowres_all_ext, bbox=(0, 0, 0.00001, 0.00001)
)[2:]

assert len(geometry) == 0

geometry, fields = read(naturalearth_lowres, bbox=(-140, 20, -100, 40))[2:]
geometry, fields = read(naturalearth_lowres_all_ext, bbox=(-140, 20, -100, 45))[2:]

assert len(geometry) == 2
assert np.array_equal(fields[3], ["USA", "MEX"])
Expand Down Expand Up @@ -243,7 +245,7 @@ def test_read_fids_unsupported_keywords(naturalearth_lowres):
read(naturalearth_lowres, fids=[1], where="iso_a3 = 'CAN'")

with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
read(naturalearth_lowres, fids=[1], bbox=(-140, 20, -100, 40))
read(naturalearth_lowres, fids=[1], bbox=(-140, 20, -100, 45))

with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
read(naturalearth_lowres, fids=[1], skip_features=5)
Expand Down

0 comments on commit b60dbd5

Please sign in to comment.