diff --git a/peakina/helpers.py b/peakina/helpers.py index 6f9b212f..8ef1fa3c 100644 --- a/peakina/helpers.py +++ b/peakina/helpers.py @@ -43,7 +43,7 @@ class TypeInfos(NamedTuple): # For files without MIME types, we make fake MIME types based on detected extension -CUSTOM_MIMETYPES = {".parquet": "peakina/parquet"} +CUSTOM_MIMETYPES = {".parquet": "peakina/parquet", ".geojson": "application/geo+json"} EXTRA_PEAKINA_READER_KWARGS = ["preview_offset", "preview_nrows"] @@ -64,7 +64,7 @@ class TypeInfos(NamedTuple): excel_meta, ), "geojson": TypeInfos( - ["application/json"], + ["application/geo+json"], read_file, ), "json": TypeInfos( @@ -97,7 +97,6 @@ def detect_type(filepath: str, is_regex: bool = False) -> Optional[TypeEnum]: if is_regex: filepath = filepath.rstrip("$") mimetype, _ = mimetypes.guess_type(filepath) - # Fallback on custom MIME types if mimetype is None: _, fileext = os.path.splitext(filepath) diff --git a/peakina/readers/geojson.py b/peakina/readers/geojson.py index 9908cba3..5a666165 100644 --- a/peakina/readers/geojson.py +++ b/peakina/readers/geojson.py @@ -1,5 +1,6 @@ from functools import wraps from typing import Any, Optional + import geopandas as gpd @@ -7,4 +8,9 @@ def read_file( path: str, preview_offset: int = 0, preview_nrows: Optional[int] = None, **kwargs: Any ) -> gpd.GeoDataFrame: - return gpd.read_file(path, rows=slice(preview_offset, preview_nrows)) + if preview_nrows and not preview_offset: + return gpd.read_file(path, rows=preview_nrows, **kwargs) + else: + return gpd.read_file( + path, rows=slice(preview_offset, preview_nrows + 1 if preview_nrows else None), **kwargs + ) diff --git a/tests/fixtures/sample.geojson b/tests/fixtures/sample.geojson new file mode 100644 index 00000000..803980a8 --- /dev/null +++ b/tests/fixtures/sample.geojson @@ -0,0 +1,34 @@ +{ "type": "FeatureCollection", + "features": [ + { "type": "Feature", + "geometry": {"type": "Point", "coordinates": [102.0, 0.5]}, + "properties": {"prop0": "value0", "prop1": 2.0} + }, + { "type": "Feature", + "geometry": { + "type": "LineString", + "coordinates": [ + [102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0] + ] + }, + "properties": { + "prop0": "value0", + "prop1": 0.0 + } + }, + { "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0], + [100.0, 1.0], [100.0, 0.0] ] + ] + + }, + "properties": { + "prop0": "value0", + "prop1": 3.0 + } + } + ] + } diff --git a/tests/readers/test_geojson.py b/tests/readers/test_geojson.py new file mode 100644 index 00000000..4ec5477e --- /dev/null +++ b/tests/readers/test_geojson.py @@ -0,0 +1,79 @@ +import geopandas as gpd +from peakina import DataSource + +sample_geojson = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [102.0, 0.5]}, + "properties": {"prop0": "value0", "prop1": 2.0}, + }, + { + "type": "Feature", + "geometry": { + "type": "LineString", + "coordinates": [[102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]], + }, + "properties": {"prop0": "value0", "prop1": 0.0}, + }, + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [ + [[100.0, 0.0], [101.0, 0.0], [101.0, 1.0], [100.0, 1.0], [100.0, 0.0]] + ], + }, + "properties": {"prop0": "value0", "prop1": 3.0}, + }, + ], +} + + +def test_simple_geojson_preview(path): + """It should be able to get a preview of a geojson file""" + ds = DataSource(path("sample.geojson")) + expected = gpd.GeoDataFrame.from_features(sample_geojson) + result = ds.get_df() + assert (result["prop0"] == expected["prop0"]).all() + assert (result["prop1"] == expected["prop1"]).all() + assert (result["geometry"] == expected["geometry"]).all() + + ds = DataSource(path("sample.geojson"), reader_kwargs={"preview_offset": 1}) + result = ds.get_df() + + expected = gpd.GeoDataFrame.from_features(sample_geojson).iloc[1:] + expected.index = [0, 1] + assert (result["prop0"] == expected["prop0"]).all() + assert (result["prop1"] == expected["prop1"]).all() + assert (result["geometry"] == expected["geometry"]).all() + + ds = DataSource(path("sample.geojson"), reader_kwargs={"preview_nrows": 2}) + result = ds.get_df() + + expected = gpd.GeoDataFrame.from_features(sample_geojson).iloc[:2] + assert (result["prop0"] == expected["prop0"]).all() + assert (result["prop1"] == expected["prop1"]).all() + assert (result["geometry"] == expected["geometry"]).all() + + ds = DataSource(path("sample.geojson"), reader_kwargs={"preview_offset": 1, "preview_nrows": 1}) + result = ds.get_df() + expected = gpd.GeoDataFrame.from_features(sample_geojson).iloc[1] + assert (result["prop0"] == expected["prop0"]).all() + assert (result["prop1"] == expected["prop1"]).all() + assert (result["geometry"] == expected["geometry"]).all() + + +def test_geojson_bbox(path): + bbox = (102, 0.5, 102.5, 0.5) + ds = DataSource(path("sample.geojson"), reader_kwargs={"bbox": bbox}) + expected = gpd.GeoDataFrame.from_features(sample_geojson).iloc[:2] + assert (ds.get_df()["geometry"] == expected["geometry"]).all() + + +def test_geojson_mask(path): + mask = gpd.GeoDataFrame.from_features(sample_geojson).iloc[0]["geometry"] + ds = DataSource(path("sample.geojson"), reader_kwargs={"mask": mask}) + expected = gpd.GeoDataFrame.from_features(sample_geojson).iloc[0] + assert (ds.get_df()["geometry"] == expected["geometry"]).all() diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 24728c9c..3d1b36f8 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -26,7 +26,7 @@ def test_detect_type_no_regex(): detect_type("file.doc") assert ( str(e.value) == "Unsupported mimetype 'application/msword'. " - "Supported types are: 'csv', 'excel', 'json', 'parquet', 'xml'." + "Supported types are: 'csv', 'geojson', 'excel', 'json', 'parquet', 'xml'." ) with pytest.raises(ValueError): detect_type("file*.csv$")