Skip to content

Commit

Permalink
feat: make city as optional when bbox is specified (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
HideBa committed Mar 5, 2024
1 parent 2dfae17 commit df38dd2
Show file tree
Hide file tree
Showing 9 changed files with 206 additions and 97 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ AHN CLI is a command-line interface tool designed for the effortless downloading

## Installation

> **NOTE:** AHN CLI requires PDAL to be installed on your system. Follow the installation instructions in the [PDAL documentation](https://pdal.io/download.html) before proceeding.
Install AHN CLI using pip:

```
Expand Down
21 changes: 19 additions & 2 deletions ahn_cli/fetcher/geotiles.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os

import geopandas as gpd
from pyproj import Transformer

from ahn_cli.fetcher.municipality import city_polygon

Expand All @@ -14,13 +15,29 @@ def geotiles() -> gpd.GeoDataFrame:
return ahn_tile_gdf


def ahn_subunit_indicies_of_city(city_name: str) -> list[int]:
def ahn_subunit_indicies_of_city(city_name: str) -> list[str]:
"""Return a list of AHN tile indicies that intersect with the city's boundary.""" # noqa
city_poly = city_polygon(city_name)
geotiles_tile_gdf = geotiles()

# Filter the DataFrame based on lowercase column values
filtered_df = geotiles_tile_gdf.overlay(city_poly)
tile_indices: list[int] = filtered_df["AHN_subuni"].tolist() # noqa
tile_indices: list[str] = filtered_df["AHN_subuni"].tolist() # noqa

return tile_indices


def ahn_subunit_indicies_of_bbox(bbox: list[float]) -> list[str]:
"""Return a list of AHN tile indicies that intersect with the bbox.""" # noqa
geotiles_tile_gdf = geotiles()

transformer = Transformer.from_crs(
"EPSG:28992", "EPSG:4326", always_xy=True
)
minx, miny = transformer.transform(bbox[0], bbox[1])
maxx, maxy = transformer.transform(bbox[2], bbox[3])
# Filter the DataFrame based on lowercase column values
filtered_df = geotiles_tile_gdf.cx[minx:maxx, miny:maxy]
tile_indices: list[str] = filtered_df["AHN_subuni"].tolist() # noqa

return tile_indices
62 changes: 58 additions & 4 deletions ahn_cli/fetcher/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,54 @@
import requests
from tqdm import tqdm

from ahn_cli.fetcher.geotiles import ahn_subunit_indicies_of_city
from ahn_cli.fetcher.geotiles import (ahn_subunit_indicies_of_bbox,
ahn_subunit_indicies_of_city)


class Fetcher:
def __init__(self, base_url: str, city_name: str):
"""
Fetcher class for fetching AHN data.
Args:
base_url (str): The base URL for fetching AHN data.
city_name (str): The name of the city for which to fetch AHN data.
bbox (list[float] | None, optional): The bounding box coordinates [minx, miny, maxx, maxy]
for a specific area of interest. Defaults to None.
Raises:
ValueError: If the base URL is invalid.
Attributes:
base_url (str): The base URL for fetching AHN data.
city_name (str): The name of the city for which to fetch AHN data.
bbox (list[float] | None): The bounding box coordinates [minx, miny, maxx, maxy]
for a specific area of interest.
urls (list[str]): The constructed URLs for fetching AHN data.
Methods:
fetch: Fetches AHN data.
_check_valid_url: Checks if the base URL is valid.
_construct_urls: Constructs the URLs for fetching AHN data.
"""

def __init__(
self, base_url: str, city_name: str, bbox: list[float] | None = None
):
if not self._check_valid_url(base_url):
raise ValueError("Invalid URL")
self.base_url = base_url
self.city_name = city_name
self.bbox = bbox
self.urls = self._construct_urls()

def fetch(self) -> dict:
"""
Fetches AHN data.
Returns:
dict: A dictionary containing the fetched AHN data, where the keys are the URLs
and the values are the temporary file names where the data is stored.
"""
logging.info("Start fetching AHN data")
logging.info(f"Fetching {len(self.urls)} tiles")

Expand Down Expand Up @@ -50,16 +86,34 @@ def req(
return results

def _check_valid_url(self, url: str) -> bool:
"""
Checks if the base URL is valid.
Args:
url (str): The base URL to check.
Returns:
bool: True if the URL is valid, False otherwise.
"""
try:
result = urlparse(url)
return all([result.scheme, result.netloc, result.path])
except ValueError:
return False

def _construct_urls(self) -> list[str]:
tiles_indices = ahn_subunit_indicies_of_city(self.city_name)
"""
Constructs the URLs for fetching AHN data.
Returns:
list[str]: A list of URLs for fetching AHN data.
"""
tiles_indices = (
ahn_subunit_indicies_of_bbox(self.bbox)
if self.bbox
else ahn_subunit_indicies_of_city(self.city_name)
)
urls = []
for tile_index in tiles_indices:
urls.append(os.path.join(self.base_url + f"{tile_index}.LAZ"))

return urls
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,26 @@
from ahn_cli.manipulator.transformer import tranform_polygon


class PntCPipeline:
class PntCHandler:
"""
A class representing a data processing pipeline.
Args:
input_path (str): The path to the input data.
output_path (str): The path to save the output data.
city_filepath (str): The path to the city data file.
city_name (str): The name of the city.
epsg (int): The EPSG code for the coordinate reference system (CRS).
A class for handling point clouds.
Attributes:
pipeline_setting (list): The configuration settings for the pipeline.
las (laspy.LasData): The point cloud data.
city_df (gpd.GeoDataFrame): The city data.
city_name (str): The name of the city.
raster_res (float): The raster resolution.
epsg (str | None): The EPSG code.
Methods:
__init__: Initializes the PntCHandler object.
decimate: Decimates the point cloud by selecting every `step`-th point.
include: Filters the point cloud to include only the specified classes.
exclude: Exclude points with specific classification values from the pipeline.
clip: Clip the point cloud by a polygon.
clip_by_arbitrary_polygon: Clip the point cloud by an arbitrary polygon.
clip_by_bbox: Clips the point cloud by a bounding box.
points: Execute the pipeline and return the processed point cloud.
"""

las = laspy.LasData
Expand All @@ -46,49 +52,53 @@ def __init__(

def decimate(self, step: int) -> Self:
"""
Decimate the point cloud by a given step.
Decimates the point cloud by selecting every `step`-th point.
Args:
step (int): The step to decimate by.
step (int): The decimation step size.
Returns:
Pipeline: The updated pipeline object.
Self: The modified pipeline object.
"""
self.las.points = self.las.points[::step]
valid_point_masks = np.arange(0, len(self.las.points), step)
self.las.points = self.las.points[valid_point_masks]
return self

def include(self, include_classes: list[int]) -> Self:
"""
Filters the point cloud to include only the specified classes.
Filters the point cloud by including only the specified classes.
Args:
include_classes (list[int]): List of class labels to include.
include_classes (list[int]): A list of class IDs to include.
Returns:
Self: The modified pipeline object.
Self: The updated instance of the pipeline.
"""
mask = np.isin(self.las.classification, include_classes)
self.las.points = self.las.points[mask]
return self

def exclude(self, exclude_classes: list[int]) -> Self:
"""
Exclude points with specific classification values from the pipeline.
Exclude points from the point cloud based on their classification.
Args:
exclude_classes (list[int]): List of classification values to exclude.
exclude_classes (list[int]): List of classification codes to exclude.
Returns:
Self: The modified pipeline object.
"""
mask = np.isin(self.las.classification, exclude_classes, invert=True)
self.las.points = self.las.points[mask]
return self

def clip(self) -> Self:
"""
Clip the point cloud by a polygon.
Clips the point cloud to the extent of the city polygon.
Returns:
Self: The modified pipeline object.
"""
rasterized_polygon, transform = rasterizer.polygon_to_raster(
self._city_polygon(), self.raster_res
Expand Down Expand Up @@ -118,15 +128,15 @@ def clip(self) -> Self:

def clip_by_arbitrary_polygon(self, clip_file: str) -> Self:
"""
Clip the point cloud by a polygon.
Clips the point cloud by an arbitrary polygon defined in a clip file.
Args:
clip_file (str): The path to the polygon file.
clip_file (str): The path to the clip file containing the polygon.
Returns:
Pipeline: The updated pipeline object.
Self: The modified instance of the pipeline.
"""

polygon = self._arbitrary_polygon(clip_file)
rasterized_polygon, transform = rasterizer.polygon_to_raster(
polygon, self.raster_res
Expand Down Expand Up @@ -156,28 +166,28 @@ def clip_by_arbitrary_polygon(self, clip_file: str) -> Self:

def clip_by_bbox(self, bbox: list[float]) -> Self:
"""
Clips the point cloud by a bounding box.
Clips the point cloud by a given bounding box.
Args:
bbox (list[float]): The bounding box to clip the point cloud. [xmin, ymin, xmax, ymax]
bbox (list[float]): The bounding box coordinates in the format [xmin, ymin, xmax, ymax].
Returns:
Self: The updated pipeline object.
Self: The modified instance of the pipeline.
"""
xyz = self.las.xyz
x_valid = (xyz[:, 0] >= bbox[0]) & (xyz[:, 0] <= bbox[2])
y_valid = (xyz[:, 1] >= bbox[1]) & (xyz[:, 1] <= bbox[3])
valid_points_mask = x_valid & y_valid

x_valid = (self.las.x >= bbox[0]) & (self.las.x <= bbox[2])
y_valid = (self.las.y >= bbox[1]) & (self.las.y <= bbox[3])
valid_points_mask = np.where(x_valid & y_valid)[0]
self.las.points = self.las.points[valid_points_mask]

return self

def points(self) -> laspy.LasData:
"""
Execute the pipeline.
Returns the point cloud data.
Returns:
laspy.LasData: The processed point cloud.
laspy.LasData: The point cloud data.
"""
return self.las

Expand Down
16 changes: 9 additions & 7 deletions ahn_cli/manipulator/rasterizer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Tuple

import numpy as np
from rasterio import features
from rasterio.features import rasterize
from rasterio.transform import Affine, from_origin
from shapely import Polygon

Expand All @@ -11,23 +11,25 @@ def polygon_to_raster(
resolution: float,
) -> Tuple[np.ndarray, Affine]:
"""
Convert a polygon to a raster file.
Converts a polygon into a rasterized numpy array.
Args:
polygon (Polygon): The polygon to convert.
resolution (float): The resolution of the raster.
polygon (Polygon): The input polygon to be rasterized.
resolution (float): The desired resolution of the rasterized array.
Returns:
None
Tuple[np.ndarray, Affine]: A tuple containing the rasterized numpy array and the affine transformation matrix.
"""
# Implementation code here
pass

bbox = polygon.bounds
height = int((bbox[3] - bbox[1]) / resolution)
width = int((bbox[2] - bbox[0]) / resolution)

transform = from_origin(bbox[0], bbox[3], resolution, resolution)
shape = (height, width)
rasterized = features.rasterize(
rasterized = rasterize(
shapes=[polygon],
out_shape=shape,
transform=transform,
Expand Down
Loading

0 comments on commit df38dd2

Please sign in to comment.