Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: support writing dataframes without geometry column #267

Merged
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## 0.6.1 (???)
theroggy marked this conversation as resolved.
Show resolved Hide resolved

### Improvements

- Support writing dataframes without geometry (#267)

### Bug fixes

- Fix int32 overflow when reading int64 columns (#260)
Expand Down
2 changes: 1 addition & 1 deletion pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1611,7 +1611,7 @@ def ogr_write(
### Get geometry type
# TODO: this is brittle for 3D / ZM / M types
# TODO: fail on M / ZM types
geometry_code = get_geometry_type_code(geometry_type or "Unknown")
geometry_code = get_geometry_type_code(geometry_type)

try:
if create_layer:
Expand Down
6 changes: 0 additions & 6 deletions pyogrio/_ogr.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,6 @@ def get_gdal_config_option(str name):


def ogr_driver_supports_write(driver):
# exclude drivers known to be unsupported by pyogrio even though they are
# supported for write by GDAL
if driver in {"XLSX"}:
return False


# check metadata for driver to see if it supports write
if _get_driver_metadata_item(driver, "DCAP_CREATE") == 'YES':
return True
Expand Down
34 changes: 22 additions & 12 deletions pyogrio/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def write_dataframe(

Parameters
----------
df : GeoDataFrame
df : GeoDataFrame or DataFrame
The data to write. For attribute columns of the "object" dtype,
all values will be converted to strings to be written to the
output file, except None and np.nan, which will be set to NULL
Expand Down Expand Up @@ -306,25 +306,27 @@ def write_dataframe(

path = str(path)

if not isinstance(df, gp.GeoDataFrame):
raise ValueError("'df' must be a GeoDataFrame")
if not isinstance(df, (gp.GeoDataFrame, pd.DataFrame)):
theroggy marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError("'df' must be a DataFrame or GeoDataFrame")

if driver is None:
driver = detect_driver(path)

geometry_columns = df.columns[df.dtypes == "geometry"]
if len(geometry_columns) == 0:
raise ValueError("'df' does not have a geometry column")

if len(geometry_columns) > 1:
raise ValueError(
"'df' must have only one geometry column. "
"Multiple geometry columns are not supported for output using OGR."
)

geometry_column = geometry_columns[0]
geometry = df[geometry_column]
fields = [c for c in df.columns if not c == geometry_column]
if len(geometry_columns) > 0:
geometry_column = geometry_columns[0]
geometry = df[geometry_column]
fields = [c for c in df.columns if not c == geometry_column]
else:
geometry_column = None
geometry = None
fields = list(df.columns)

# TODO: may need to fill in pd.NA, etc
field_data = []
Expand All @@ -345,7 +347,9 @@ def write_dataframe(
field_mask.append(None)

# Determine geometry_type and/or promote_to_multi
if geometry_type is None or promote_to_multi is None:
if geometry_column is not None and (
geometry_type is None or promote_to_multi is None
):
tmp_geometry_type = "Unknown"
has_z = False

Expand Down Expand Up @@ -402,7 +406,7 @@ def write_dataframe(
geometry_type = f"{geometry_type} Z"

crs = None
if geometry.crs:
if geometry_column is not None and geometry.crs:
# TODO: this may need to be WKT1, due to issues
# if possible use EPSG codes instead
epsg = geometry.crs.to_epsg()
Expand All @@ -411,11 +415,17 @@ def write_dataframe(
else:
crs = geometry.crs.to_wkt(WktVersion.WKT1_GDAL)

# Prepare geometry array to write
if geometry_column is not None:
geometry = to_wkb(geometry.values)
else:
geometry = np.repeat(None, len(df))
theroggy marked this conversation as resolved.
Show resolved Hide resolved

write(
path,
layer=layer,
driver=driver,
geometry=to_wkb(geometry.values),
geometry=geometry,
field_data=field_data,
field_mask=field_mask,
fields=fields,
Expand Down
7 changes: 2 additions & 5 deletions pyogrio/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,9 +387,6 @@ def write(
layer_options=None,
**kwargs,
):
if geometry_type is None:
raise ValueError("geometry_type must be provided")

if driver is None:
driver = detect_driver(path)

Expand Down Expand Up @@ -421,13 +418,13 @@ def write(
if not isinstance(v, str):
raise ValueError(f"metadata value {v} must be a string")

if promote_to_multi is None:
if geometry_type is not None and promote_to_multi is None:
theroggy marked this conversation as resolved.
Show resolved Hide resolved
promote_to_multi = (
geometry_type.startswith("Multi")
and driver in DRIVERS_NO_MIXED_SINGLE_MULTI
)

if crs is None:
if geometry_type is not None and crs is None:
theroggy marked this conversation as resolved.
Show resolved Hide resolved
warnings.warn(
"'crs' was not provided. The output dataset will not have "
"projection information defined and may not be usable in other "
Expand Down
2 changes: 0 additions & 2 deletions pyogrio/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ def test_gdal_geos_version():
# drivers not supported for write by GDAL
("HTTP", False),
("OAPIF", False),
# drivers currently unsupported for write even though GDAL can write them
("XLSX", False),
],
)
def test_ogr_driver_supports_write(driver, expected):
Expand Down
45 changes: 45 additions & 0 deletions pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,51 @@ def test_write_dataframe(tmp_path, naturalearth_lowres, ext):
)


@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext != ".fgb"])
theroggy marked this conversation as resolved.
Show resolved Hide resolved
def test_write_dataframe_nogeom(tmp_path, naturalearth_lowres, ext):
"""Test writing a dataframe, so without a geometry column.
theroggy marked this conversation as resolved.
Show resolved Hide resolved

FlatGeobuf (.fgb) doesn't seem to support this, and just writes an empty file.
"""
# Prepare test data
input_gdf = read_dataframe(naturalearth_lowres)
theroggy marked this conversation as resolved.
Show resolved Hide resolved
input_df = input_gdf.drop(columns=["geometry"])
assert isinstance(input_df, pd.DataFrame)
assert not isinstance(input_df, gp.GeoDataFrame)

output_path = tmp_path / f"test{ext}"
write_dataframe(input_df, output_path)

if ext == ".shp":
# A shapefile without geometry column results in only a .dbf file.
output_path = output_path.with_suffix(".dbf")
theroggy marked this conversation as resolved.
Show resolved Hide resolved
assert output_path.exists()
result_df = read_dataframe(output_path)

assert isinstance(result_df, pd.DataFrame)

# some dtypes do not round-trip precisely through these file types
check_dtype = ext not in [".json", ".geojson", ".geojsonl", ".xlsx"]
theroggy marked this conversation as resolved.
Show resolved Hide resolved

if ext in [".gpkg", ".shp", ".xlsx"]:
# These file types return a DataFrame when read.
assert not isinstance(result_df, gp.GeoDataFrame)
pd.testing.assert_frame_equal(
result_df, input_df, check_index_type=False, check_dtype=check_dtype
)
else:
# These file types return a GeoDataFrame with None Geometries when read.
input_none_geom_gdf = gp.GeoDataFrame(
input_df, geometry=np.repeat(None, len(input_df)), crs="epsg:4326"
)
assert_geodataframe_equal(
result_df,
input_none_geom_gdf,
check_index_type=False,
check_dtype=check_dtype,
)


@pytest.mark.filterwarnings("ignore:.*Layer .* does not have any features to read")
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
def test_write_empty_dataframe(tmp_path, ext):
Expand Down