Skip to content

Commit

Permalink
feat: geojson reader
Browse files Browse the repository at this point in the history
  • Loading branch information
raphaelvignes committed Jun 3, 2022
1 parent 4573c8b commit 6a12fdd
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 5 deletions.
5 changes: 2 additions & 3 deletions peakina/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class TypeInfos(NamedTuple):


# For files without MIME types, we make fake MIME types based on detected extension
CUSTOM_MIMETYPES = {".parquet": "peakina/parquet"}
CUSTOM_MIMETYPES = {".parquet": "peakina/parquet", ".geojson": "application/geo+json"}

EXTRA_PEAKINA_READER_KWARGS = ["preview_offset", "preview_nrows"]

Expand All @@ -64,7 +64,7 @@ class TypeInfos(NamedTuple):
excel_meta,
),
"geojson": TypeInfos(
["application/json"],
["application/geo+json"],
read_file,
),
"json": TypeInfos(
Expand Down Expand Up @@ -97,7 +97,6 @@ def detect_type(filepath: str, is_regex: bool = False) -> Optional[TypeEnum]:
if is_regex:
filepath = filepath.rstrip("$")
mimetype, _ = mimetypes.guess_type(filepath)

# Fallback on custom MIME types
if mimetype is None:
_, fileext = os.path.splitext(filepath)
Expand Down
8 changes: 7 additions & 1 deletion peakina/readers/geojson.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
from functools import wraps
from typing import Any, Optional

import geopandas as gpd


@wraps(gpd.read_file)
def read_file(
path: str, preview_offset: int = 0, preview_nrows: Optional[int] = None, **kwargs: Any
) -> gpd.GeoDataFrame:
return gpd.read_file(path, rows=slice(preview_offset, preview_nrows))
if preview_nrows and not preview_offset:
return gpd.read_file(path, rows=preview_nrows, **kwargs)
else:
return gpd.read_file(
path, rows=slice(preview_offset, preview_nrows + 1 if preview_nrows else None), **kwargs
)
34 changes: 34 additions & 0 deletions tests/fixtures/sample.geojson
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{ "type": "FeatureCollection",
"features": [
{ "type": "Feature",
"geometry": {"type": "Point", "coordinates": [102.0, 0.5]},
"properties": {"prop0": "value0", "prop1": 2.0}
},
{ "type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [
[102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]
]
},
"properties": {
"prop0": "value0",
"prop1": 0.0
}
},
{ "type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0],
[100.0, 1.0], [100.0, 0.0] ]
]

},
"properties": {
"prop0": "value0",
"prop1": 3.0
}
}
]
}
79 changes: 79 additions & 0 deletions tests/readers/test_geojson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import geopandas as gpd
from peakina import DataSource

sample_geojson = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [102.0, 0.5]},
"properties": {"prop0": "value0", "prop1": 2.0},
},
{
"type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [[102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]],
},
"properties": {"prop0": "value0", "prop1": 0.0},
},
{
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[[100.0, 0.0], [101.0, 0.0], [101.0, 1.0], [100.0, 1.0], [100.0, 0.0]]
],
},
"properties": {"prop0": "value0", "prop1": 3.0},
},
],
}


def test_simple_geojson_preview(path):
"""It should be able to get a preview of a geojson file"""
ds = DataSource(path("sample.geojson"))
expected = gpd.GeoDataFrame.from_features(sample_geojson)
result = ds.get_df()
assert (result["prop0"] == expected["prop0"]).all()
assert (result["prop1"] == expected["prop1"]).all()
assert (result["geometry"] == expected["geometry"]).all()

ds = DataSource(path("sample.geojson"), reader_kwargs={"preview_offset": 1})
result = ds.get_df()

expected = gpd.GeoDataFrame.from_features(sample_geojson).iloc[1:]
expected.index = [0, 1]
assert (result["prop0"] == expected["prop0"]).all()
assert (result["prop1"] == expected["prop1"]).all()
assert (result["geometry"] == expected["geometry"]).all()

ds = DataSource(path("sample.geojson"), reader_kwargs={"preview_nrows": 2})
result = ds.get_df()

expected = gpd.GeoDataFrame.from_features(sample_geojson).iloc[:2]
assert (result["prop0"] == expected["prop0"]).all()
assert (result["prop1"] == expected["prop1"]).all()
assert (result["geometry"] == expected["geometry"]).all()

ds = DataSource(path("sample.geojson"), reader_kwargs={"preview_offset": 1, "preview_nrows": 1})
result = ds.get_df()
expected = gpd.GeoDataFrame.from_features(sample_geojson).iloc[1]
assert (result["prop0"] == expected["prop0"]).all()
assert (result["prop1"] == expected["prop1"]).all()
assert (result["geometry"] == expected["geometry"]).all()


def test_geojson_bbox(path):
bbox = (102, 0.5, 102.5, 0.5)
ds = DataSource(path("sample.geojson"), reader_kwargs={"bbox": bbox})
expected = gpd.GeoDataFrame.from_features(sample_geojson).iloc[:2]
assert (ds.get_df()["geometry"] == expected["geometry"]).all()


def test_geojson_mask(path):
mask = gpd.GeoDataFrame.from_features(sample_geojson).iloc[0]["geometry"]
ds = DataSource(path("sample.geojson"), reader_kwargs={"mask": mask})
expected = gpd.GeoDataFrame.from_features(sample_geojson).iloc[0]
assert (ds.get_df()["geometry"] == expected["geometry"]).all()
2 changes: 1 addition & 1 deletion tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_detect_type_no_regex():
detect_type("file.doc")
assert (
str(e.value) == "Unsupported mimetype 'application/msword'. "
"Supported types are: 'csv', 'excel', 'json', 'parquet', 'xml'."
"Supported types are: 'csv', 'geojson', 'excel', 'json', 'parquet', 'xml'."
)
with pytest.raises(ValueError):
detect_type("file*.csv$")
Expand Down

0 comments on commit 6a12fdd

Please sign in to comment.