Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Raster.from_pointcloud_regular to reconstruct a raster from a point cloud #547

Merged
merged 5 commits into from
May 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 90 additions & 5 deletions geoutils/raster/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -3978,22 +3978,107 @@ def to_pointcloud(
np.array(a) for a in self.ij2xy(indices[0], indices[1], force_offset=force_pixel_offset)
)

# Merge the coordinates and pixel data into a point cloud.
points_arr = np.vstack((x_coords_2.reshape(1, -1), y_coords_2.reshape(1, -1), pixel_data)).T

if not as_array:
points = Vector(
gpd.GeoDataFrame(
points_arr[:, 2:],
pixel_data.T,
columns=all_column_names,
geometry=gpd.points_from_xy(points_arr[:, 0], points_arr[:, 1]),
geometry=gpd.points_from_xy(x_coords_2, y_coords_2),
crs=self.crs,
)
)
return points
else:
# Merge the coordinates and pixel data an array of N x K
# This has the downside of converting all the data to the same data type
points_arr = np.vstack((x_coords_2.reshape(1, -1), y_coords_2.reshape(1, -1), pixel_data)).T
return points_arr

@classmethod
def from_pointcloud_regular(
cls: type[RasterType],
pointcloud: gpd.GeoDataFrame,
grid_coords: tuple[NDArrayNum, NDArrayNum] = None,
transform: rio.transform.Affine = None,
shape: tuple[int, int] = None,
nodata: int | float | None = None,
data_column_name: str = "b1",
area_or_point: Literal["Area", "Point"] = "Point",
) -> RasterType:
"""
Create a raster from a point cloud with coordinates on a regular grid.

To inform on what grid to create the raster, either pass a tuple of X/Y grid coordinates, or the expected
transform and shape. All point cloud coordinates must fall exactly at one the coordinates of this grid.

:param pointcloud: Point cloud.
:param grid_coords: Regular coordinate vectors for the raster, from which the geotransform and shape are
deduced.
:param transform: Geotransform of the raster.
:param shape: Shape of the raster.
:param nodata: Nodata value of the raster.
:param data_column_name: Name to use for point cloud data column, defaults to "bX" where X is the data band
number.
:param area_or_point: Whether to set the pixel interpretation of the raster to "Area" or "Point".
"""

# Get transform and shape from input
if grid_coords is not None:

# Input checks
if (
not isinstance(grid_coords, tuple)
or not (isinstance(grid_coords[0], np.ndarray) and grid_coords[0].ndim == 1)
or not (isinstance(grid_coords[1], np.ndarray) and grid_coords[1].ndim == 1)
):
raise TypeError("Input grid coordinates must be 1D arrays.")

diff_x = np.diff(grid_coords[0])
diff_y = np.diff(grid_coords[1])

if not all(diff_x == diff_x[0]) and all(diff_y == diff_y[0]):
raise ValueError("Grid coordinates must be regular (equally spaced, independently along X and Y).")

# Build transform from min X, max Y and step in both
out_transform = rio.transform.from_origin(
np.min(grid_coords[0]), np.max(grid_coords[1]), diff_x[0], diff_y[0]
)
# Y is first axis, X is second axis
out_shape = (len(grid_coords[1]), len(grid_coords[0]))

elif transform is not None and shape is not None:

out_transform = transform
out_shape = shape

else:
raise ValueError("Either grid coordinates or both geotransform and shape must be provided.")

# Create raster from inputs, with placeholder data for now
dtype = pointcloud[data_column_name].dtype
out_nodata = nodata if not None else _default_nodata(dtype)
arr = np.ones(out_shape, dtype=dtype)
raster_arr = cls.from_array(
data=arr, transform=out_transform, crs=pointcloud.crs, nodata=out_nodata, area_or_point=area_or_point
)

# Get indexes of point cloud coordinates in the raster, forcing no shift
i, j = raster_arr.xy2ij(
x=pointcloud.geometry.x.values, y=pointcloud.geometry.y.values, shift_area_or_point=False
)

# If coordinates are not integer type (forced in xy2ij), then some points are not falling on exact coordinates
if not np.issubdtype(i.dtype, np.integer) or not np.issubdtype(i.dtype, np.integer):
raise ValueError("Some point cloud coordinates differ from the grid coordinates.")

# Set values
mask = np.ones(np.shape(arr), dtype=bool)
mask[i, j] = False
arr[i, j] = pointcloud[data_column_name].values
raster_arr.data = np.ma.masked_array(data=arr, mask=mask)

return raster_arr

def polygonize(
self, target_values: Number | tuple[Number, Number] | list[Number] | NDArrayNum | Literal["all"] = "all"
) -> Vector:
Expand Down
61 changes: 61 additions & 0 deletions tests/test_raster/test_raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -3381,6 +3381,67 @@ def test_to_pointcloud(self) -> None:
):
img2.to_pointcloud(auxiliary_data_bands=[2, 3], auxiliary_column_names=["lol", "lol2", "lol3"])

def test_from_pointcloud(self) -> None:
"""Test from_pointcloud method."""

# 1/ Create a small raster to test point sampling on
shape = (5, 5)
nodata = 100
img_arr = np.arange(np.prod(shape), dtype="int32").reshape(shape)
transform = rio.transform.from_origin(0, 5, 1, 1)
img1 = gu.Raster.from_array(img_arr, transform=transform, crs=4326, nodata=nodata)

# Check both inputs work (grid coords or transform+shape) on a subsample
pc1 = img1.to_pointcloud(subsample=10)
img1_sub = gu.Raster.from_pointcloud_regular(pc1, transform=transform, shape=shape)

grid_coords1 = img1.coords(grid=False)
img1_sub2 = gu.Raster.from_pointcloud_regular(pc1, grid_coords=grid_coords1)

assert img1_sub.raster_equal(img1_sub2)

# Check that number of valid values are equal to point cloud size
assert np.count_nonzero(~img1_sub.data.mask) == 10

# With no subsampling, should get the exact same raster back
pc1_full = img1.to_pointcloud()
img1_full = gu.Raster.from_pointcloud_regular(pc1_full, transform=transform, shape=shape, nodata=nodata)
assert img1.raster_equal(img1_full, warn_failure_reason=True)

# 2/ Single-band real raster with nodata values
img2 = gu.Raster(self.aster_dem_path)
nodata = img2.nodata
transform = img2.transform
shape = img2.shape

# Check both inputs work (grid coords or transform+shape) on a subsample
pc2 = img2.to_pointcloud(subsample=10000, random_state=42)
img2_sub = gu.Raster.from_pointcloud_regular(pc2, transform=transform, shape=shape, nodata=nodata)

grid_coords2 = img2.coords(grid=False)
img2_sub2 = gu.Raster.from_pointcloud_regular(pc2, grid_coords=grid_coords2, nodata=nodata)

assert img2_sub.raster_equal(img2_sub2, warn_failure_reason=True)

# Check that number of valid values are equal to point cloud size
assert np.count_nonzero(~img2_sub.data.mask) == 10000

# With no subsampling, should get the exact same raster back
pc2_full = img2.to_pointcloud()
img2_full = gu.Raster.from_pointcloud_regular(pc2_full, transform=transform, shape=shape, nodata=nodata)
assert img2.raster_equal(img2_full, warn_failure_reason=True, strict_masked=False)

# 3/ Error raising
with pytest.raises(TypeError, match="Input grid coordinates must be 1D arrays.*"):
gu.Raster.from_pointcloud_regular(pc1, grid_coords=(1, "lol")) # type: ignore
with pytest.raises(ValueError, match="Grid coordinates must be regular*"):
grid_coords1[0][0] += 1
gu.Raster.from_pointcloud_regular(pc1, grid_coords=grid_coords1) # type: ignore
with pytest.raises(
ValueError, match="Either grid coordinates or both geotransform and shape must be provided."
):
gu.Raster.from_pointcloud_regular(pc1)


class TestMask:
# Paths to example data
Expand Down
Loading