Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix saving geodataframes with empty geometry #226

Merged
merged 3 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
## [Unreleased]

### Fixed
* Fixed problem snapping and routing PT services with stops whose names started with 'x' [#225](https://github.com/arup-group/genet/pull/225)
* Fixed summary report:
* Intermodal Access/Egress reporting is more general (not expecting just car and bike mode access to PT) [#204](https://github.com/arup-group/genet/pull/204)
* Node/Links numbers were reported incorrectly (switched) [#207](https://github.com/arup-group/genet/pull/207)
* Fixed reading `java.lang.Array` types in MATSim xml files [#216](https://github.com/arup-group/genet/pull/216)
* Fixed problem snapping and routing PT services with stops whose names started with 'x' [#225](https://github.com/arup-group/genet/pull/225)
* Fixed issues saving geodataframes with missing geometry values, refined how geodataframes with empty values are saved to keep them more true to originals [#226](https://github.com/arup-group/genet/pull/226)

### Changed
* GeNet's pre-baked python scripts have been retired in favour of CLI [#194](https://github.com/arup-group/genet/pull/194)
Expand Down
2 changes: 1 addition & 1 deletion genet/output/geojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def generate_standard_outputs_for_schedule(
logging.info("Saving vehicles per hour for all PT modes for selected hour slices")
for h in [7, 8, 9, 13, 16, 17, 18]:
save_geodataframe(
df_all_modes_vph[df_all_modes_vph["hour"].dt.hour == h],
df_all_modes_vph[pd.to_datetime(df_all_modes_vph["hour"]).dt.hour == h],
filename=f"vph_all_modes_within_{h - 1}_30-{h}_30",
output_dir=vph_dir,
include_shp_files=include_shp_files,
Expand Down
8 changes: 5 additions & 3 deletions genet/output/sanitiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ def sanitise_list(x):
def sanitise_geodataframe(gdf):
if isinstance(gdf, GeoSeries):
gdf = GeoDataFrame(gdf)
gdf = gdf.fillna("None")
object_columns = gdf.select_dtypes(["object"]).columns
for col in object_columns:
not_missing_mask = gdf[col].notna()
if gdf[col].apply(lambda x: isinstance(x, (set, list))).any():
gdf[col] = gdf[col].apply(lambda x: ",".join(x))
gdf.loc[not_missing_mask, col] = gdf.loc[not_missing_mask, col].apply(
lambda x: ",".join(x)
)
elif gdf[col].apply(lambda x: isinstance(x, dict)).any():
gdf[col] = gdf[col].apply(lambda x: str(x))
gdf.loc[not_missing_mask, col] = gdf.loc[not_missing_mask, col].apply(lambda x: str(x))
for col in gdf.select_dtypes(include=number).columns.tolist():
if (gdf[col] > sys.maxsize).any():
gdf[col] = gdf[col].apply(lambda x: str(x))
Expand Down
29 changes: 29 additions & 0 deletions tests/test_output_sanitiser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import os

from geopandas import GeoDataFrame
from shapely.geometry import Point

from genet import Network
from genet.output import geojson as gngeojson
from genet.output import sanitiser
Expand Down Expand Up @@ -51,3 +56,27 @@ def test_sanitising_geodataframes_with_ids_list(assert_semantically_equal):
assert_semantically_equal(
links[["length", "from", "to", "id", "ids", "u", "v", "modes"]].to_dict(), correct_links
)


def test_saving_geodataframe_with_missing_geometry_produces_file(tmpdir):
expected_file_name = "tmp"
expected_output_path = tmpdir / expected_file_name + ".geojson"
assert not os.path.exists(expected_output_path)

data = {"id": ["1", "2"], "geometry": [float("nan"), Point(2, 1)]}
gdf = GeoDataFrame(data, crs="EPSG:4326")
gngeojson.save_geodataframe(gdf, filename=expected_file_name, output_dir=tmpdir)

assert os.path.exists(expected_output_path)


def test_saving_geodataframe_with_missing_data_in_string_column_produces_file(tmpdir):
expected_file_name = "tmp"
expected_output_path = tmpdir / expected_file_name + ".geojson"
assert not os.path.exists(expected_output_path)

data = {"id": ["1", float("nan")], "geometry": [Point(2, 1), Point(2, 1)]}
gdf = GeoDataFrame(data, crs="EPSG:4326")
gngeojson.save_geodataframe(gdf, filename=expected_file_name, output_dir=tmpdir)

assert os.path.exists(expected_output_path)
Loading