Skip to content

Commit

Permalink
Fix missing data handling (geopandas#582)
Browse files Browse the repository at this point in the history
Following on pandas-dev/pandas#17728

* Use None for missing values

Previously we used `Empty Polygon` for missing values.  Now we revert to
using NULL in GeometryArray (as before) and Python None when we convert
to shapely objects.

This makes it so that only Nones and NaNs are considered missing.
  • Loading branch information
mrocklin authored and jorisvandenbossche committed Sep 18, 2019
1 parent 2b0f5b6 commit 7beb2db
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 24 deletions.
21 changes: 15 additions & 6 deletions geopandas/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,14 +800,23 @@ def _fill(self, idx, value):
""" Fill index locations with value
Value should be a BaseGeometry
Returns a copy
"""
if not (isinstance(value, BaseGeometry) or value is None):
base = [self]
if isinstance(value, BaseGeometry):
base.append(value)
value = value.__geom__
elif value is None:
value = 0
else:
raise TypeError(
"Value should be either a BaseGeometry or None, got %s" % str(value)
"Value should be either a BaseGeometry or None, " "got %s" % str(value)
)
# self.data[idx] = value
self.data[idx] = np.array([value], dtype=object)
return self

new = GeometryArray(self.data.copy(), base=base)
new.data[idx] = value
return new

def fillna(self, value=None, method=None, limit=None):
""" Fill NA/NaN values using the specified method.
Expand Down Expand Up @@ -879,7 +888,7 @@ def isna(self):
"""
Boolean NumPy array indicating if each value is missing
"""
return np.array([g is None for g in self], dtype="bool")
return self.data == 0

def unique(self):
"""Compute the ExtensionArray of unique values.
Expand Down
23 changes: 16 additions & 7 deletions geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,15 +345,24 @@ def notnull(self):
"""Alias for `notna` method. See `notna` for more detail."""
return self.notna()

def fillna(self, value=None, method=None, inplace=False, **kwargs):
"""Fill NA values with a geometry (empty polygon by default).
"method" is currently not implemented for pandas <= 0.12.
"""
def fillna(self, value=None):
"""Fill NA/NaN values with a geometry (empty polygon by default)"""
if value is None:
value = BaseGeometry()
return super(GeoSeries, self).fillna(
value=value, method=method, inplace=inplace, **kwargs
return GeoSeries(
self.array.fillna(value), index=self.index, crs=self.crs, name=self.name
)

def dropna(self):
""" Drop NA/NaN values
Note: the inplace keyword is not currently supported.
"""
return GeoSeries(
self.array[~self.isna()],
index=self.index[~self.isna()],
crs=self.crs,
name=self.name,
)

def __contains__(self, other):
Expand Down
2 changes: 1 addition & 1 deletion geopandas/tests/test_geocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test_prepare_result_none():
assert "address" in df

row = df.loc["b"]
assert len(row["geometry"].coords) == 0
assert not row["geometry"]
assert np.isnan(row["address"])


Expand Down
1 change: 0 additions & 1 deletion geopandas/tests/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ def test_transform(self):
def test_fillna(self):
# default is to fill with empty geometry
na = self.na_none.fillna()
assert isinstance(na[2], BaseGeometry)
assert na[2].is_empty
assert geom_equals(self.na_none[:2], na[:2])
# XXX: method works inconsistently for different pandas versions
Expand Down
5 changes: 3 additions & 2 deletions geopandas/tests/test_pandas_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,11 @@ def test_select_dtypes(df):
# Missing values


def test_fillna(s):
def test_fillna():
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
res = s2.fillna(Point(1, 1))
assert_geoseries_equal(res, s)
expected = GeoSeries([Point(0, 0), Point(1, 1), Point(2, 2)])
assert_geoseries_equal(res, expected)


def test_dropna():
Expand Down
11 changes: 11 additions & 0 deletions geopandas/tests/test_vectorized.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,3 +424,14 @@ def test_unary_union():
def test_coords():
L = T.exterior.coords
assert L == [tuple(t.exterior.coords) for t in triangles]


def test_fill():
p = shapely.geometry.Point(1, 2)
P2 = P._fill([0, 3], p)
assert P2[0].equals(p)
assert P2[3].equals(p)
with pytest.raises(TypeError) as info:
P._fill([1, 2], 123)

assert "123" in str(info.value)
12 changes: 9 additions & 3 deletions geopandas/tools/tests/test_sjoin.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,13 @@ def test_sjoin(op, lsuffix, rsuffix, how, missing):
right_touched = set()
for left_index, left_row in left.iterrows():
for right_index, right_row in right.iterrows():
if getattr(left_row["geometry"], op)(right_row["geometry"]):
left_geom = left_row["geometry"]
right_geom = right_row["geometry"]
if (
left_geom
and right_geom
and getattr(left_row["geometry"], op)(right_row["geometry"])
):
left_out.append(left_index)
right_out.append(right_index)

Expand All @@ -81,15 +87,15 @@ def test_sjoin(op, lsuffix, rsuffix, how, missing):
L = list(result.geometry)
for t in triangles2:
if t:
assert any(t.equals(t2) for t2 in L)
assert any(t2 and t.equals(t2) for t2 in L)

if how == "right":
assert len(result) >= len(right_out)
assert set(result.columns) == set(columns + ["index_left"])
L = list(result.geometry)
for p in points2:
if p:
assert any(p.equals(p2) for p2 in L)
assert any(p2 and p.equals(p2) for p2 in L)


def test_crs_mismatch():
Expand Down
8 changes: 4 additions & 4 deletions geopandas/vectorized.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ cpdef get_element(np.ndarray[np.uintp_t, ndim=1, cast=True] geoms, int idx):

handle = get_geos_context_handle()

if not geom:
geom = GEOSGeom_createEmptyPolygon_r(handle)
if geom is NULL:
return None
else:
geom = GEOSGeom_clone_r(handle, geom) # create a copy rather than deal with gc

Expand All @@ -80,11 +80,11 @@ cpdef to_shapely(np.ndarray[np.uintp_t, ndim=1, cast=True] geoms):
geom = <GEOSGeometry *> geoms[i]

if not geom:
geom = GEOSGeom_createEmptyPolygon_r(handle)
out[i] = None
else:
geom = GEOSGeom_clone_r(handle, geom) # create a copy rather than deal with gc
out[i] = geom_factory(<np.uintp_t> geom)

out[i] = geom_factory(<np.uintp_t> geom)

return out

Expand Down

0 comments on commit 7beb2db

Please sign in to comment.