Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(geopandas io): fixes dependency on gpd installation #93

Merged
merged 1 commit into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 24 additions & 43 deletions nesta_ds_utils/loading_saving/S3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import boto3
from fnmatch import fnmatch
import pandas as pd
import geopandas as gpd
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
Expand All @@ -12,6 +11,14 @@
import warnings
from nesta_ds_utils.loading_saving import file_ops

from nesta_ds_utils.loading_saving.gis_interface import _gis_enabled

if _gis_enabled:
from nesta_ds_utils.loading_saving.gis_interface import (
_gdf_to_fileobj,
_fileobj_to_gdf,
)


def get_bucket_filenames_s3(bucket_name: str, dir_name: str = "") -> List[str]:
"""Get a list of all files in bucket directory.
Expand Down Expand Up @@ -58,27 +65,6 @@ def _df_to_fileobj(df_data: pd.DataFrame, path_to: str, **kwargs) -> io.BytesIO:
return buffer


def _gdf_to_fileobj(df_data: gpd.GeoDataFrame, path_to: str, **kwargs) -> io.BytesIO:
"""Convert GeoDataFrame into bytes file object.

Args:
df_data (gpd.DataFrame): Dataframe to convert.
path_to (str): Saving file name.

Returns:
io.BytesIO: Bytes file object.
"""
buffer = io.BytesIO()
if fnmatch(path_to, "*.geojson"):
df_data.to_file(buffer, driver="GeoJSON", **kwargs)
else:
raise NotImplementedError(
"Uploading geodataframe currently supported only for 'geojson'."
)
buffer.seek(0)
return buffer


def _dict_to_fileobj(dict_data: dict, path_to: str, **kwargs) -> io.BytesIO:
"""Convert dictionary into bytes file object.

Expand Down Expand Up @@ -229,10 +215,16 @@ def upload_obj(
kwargs_writing (dict, optional): Dictionary of kwargs for writing data.

"""
if isinstance(obj, gpd.base.GeoPandasBase):
obj = _gdf_to_fileobj(obj, path_to, **kwargs_writing)
elif isinstance(obj, pd.DataFrame):
obj = _df_to_fileobj(obj, path_to, **kwargs_writing)
if isinstance(obj, pd.DataFrame):
if type(obj).__name__ == "GeoDataFrame":
if _gis_enabled:
obj = _gdf_to_fileobj(obj, path_to, **kwargs_writing)
else:
raise ModuleNotFoundError(
"Please install 'gis' extra from nesta_ds_utils or 'geopandas' to upload geodataframes."
)
else:
obj = _df_to_fileobj(obj, path_to, **kwargs_writing)
elif isinstance(obj, dict):
obj = _dict_to_fileobj(obj, path_to, **kwargs_writing)
elif isinstance(obj, list):
Expand Down Expand Up @@ -272,22 +264,6 @@ def _fileobj_to_df(fileobj: io.BytesIO, path_from: str, **kwargs) -> pd.DataFram
return pd.read_excel(fileobj, **kwargs)


def _fileobj_to_gdf(fileobj: io.BytesIO, path_from: str, **kwargs) -> pd.DataFrame:
"""Convert bytes file object into geodataframe.

Args:
fileobj (io.BytesIO): Bytes file object.
path_from (str): Path of loaded data.

Returns:
gpd.DataFrame: Data as geodataframe.
"""
if fnmatch(path_from, "*.geojson"):
return gpd.GeoDataFrame.from_features(
json.loads(fileobj.getvalue().decode())["features"]
)


def _fileobj_to_dict(fileobj: io.BytesIO, path_from: str, **kwargs) -> dict:
"""Convert bytes file object into dictionary.

Expand Down Expand Up @@ -399,7 +375,12 @@ def download_obj(
)
elif download_as == "geodf":
if path_from.endswith(tuple([".geojson"])):
return _fileobj_to_gdf(fileobj, path_from, **kwargs_reading)
if _gis_enabled:
return _fileobj_to_gdf(fileobj, path_from, **kwargs_reading)
else:
raise ModuleNotFoundError(
"Please install 'gis' extra from nesta_ds_utils or 'geopandas' to download geodataframes."
)
else:
raise NotImplementedError(
"Download as geodataframe currently supported only " "for 'geojson'."
Expand Down
45 changes: 45 additions & 0 deletions nesta_ds_utils/loading_saving/gis_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
try:
from json import loads as load_json
from geopandas import GeoDataFrame
from io import BytesIO
from fnmatch import fnmatch

_gis_enabled = True

def _gdf_to_fileobj(df_data: GeoDataFrame, path_to: str, **kwargs) -> BytesIO:
"""Convert GeoDataFrame into bytes file object.

Args:
df_data (gpd.DataFrame): Dataframe to convert.
path_to (str): Saving file name.

Returns:
io.BytesIO: Bytes file object.
"""
buffer = BytesIO()
if fnmatch(path_to, "*.geojson"):
df_data.to_file(buffer, driver="GeoJSON", **kwargs)
else:
raise NotImplementedError(
"Uploading geodataframe currently supported only for 'geojson'."
)
buffer.seek(0)
return buffer

def _fileobj_to_gdf(fileobj: BytesIO, path_from: str, **kwargs) -> GeoDataFrame:
"""Convert bytes file object into geodataframe.

Args:
fileobj (io.BytesIO): Bytes file object.
path_from (str): Path of loaded data.

Returns:
gpd.DataFrame: Data as geodataframe.
"""
if fnmatch(path_from, "*.geojson"):
return GeoDataFrame.from_features(
load_json(fileobj.getvalue().decode())["features"]
)

except ImportError:
_gis_enabled = False