Skip to content

Commit

Permalink
fix(duckdb): ensure that duckdb columns argument to read_csv accepts …
Browse files Browse the repository at this point in the history
…duckdb syntax not ibis syntax (#10696)

BREAKING CHANGE: The duckdb backend's `read_csv` method accepts only DuckDB types for the values components of the `columns` and `types` arguments. You may need need to adjust existing code. For example, the string `"float64"` should be replaced with the string `"double"`.
  • Loading branch information
cpcloud authored Jan 22, 2025
1 parent b9bd2a8 commit 83bed74
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 10 deletions.
14 changes: 9 additions & 5 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,14 +667,18 @@ def read_csv(
def make_struct_argument(obj: Mapping[str, str | dt.DataType]) -> sge.Struct:
expressions = []
geospatial = False
type_mapper = self.compiler.type_mapper
dialect = self.compiler.dialect
possible_geospatial_types = (
sge.DataType.Type.GEOGRAPHY,
sge.DataType.Type.GEOMETRY,
)

for name, typ in obj.items():
typ = dt.dtype(typ)
geospatial |= typ.is_geospatial()
sgtype = type_mapper.from_ibis(typ)
sgtype = sg.parse_one(typ, read=dialect, into=sge.DataType)
geospatial |= sgtype.this in possible_geospatial_types
prop = sge.PropertyEQ(
this=sge.to_identifier(name), expression=sge.convert(sgtype)
this=sge.to_identifier(name),
expression=sge.convert(sgtype.sql(dialect)),
)
expressions.append(prop)

Expand Down
10 changes: 5 additions & 5 deletions ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys

import duckdb
import numpy as np
import pandas as pd
import pyarrow as pa
import pytest
Expand Down Expand Up @@ -391,11 +392,12 @@ def test_multiple_tables_with_the_same_name(tmp_path):
@pytest.mark.parametrize(
"input",
[
{"columns": {"lat": "float64", "lon": "float64", "geom": "geometry"}},
{"types": {"geom": "geometry"}},
{"columns": {"lat": "double", "lon": "float", "geom": "geometry"}},
{"types": {"geom": "geometry", "lon": "float"}},
],
ids=["columns", "types"],
)
@pytest.mark.parametrize("all_varchar", [True, False])
@pytest.mark.parametrize("all_varchar", [True, False], ids=["varchar", "not_varchar"])
@pytest.mark.xfail(
LINUX and SANDBOXED,
reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
Expand Down Expand Up @@ -423,8 +425,6 @@ def test_memtable_doesnt_leak(con):


def test_pyarrow_batches_chunk_size(con): # 10443
import numpy as np

t = ibis.memtable(
{
"id": np.arange(10_000),
Expand Down
8 changes: 8 additions & 0 deletions ibis/backends/duckdb/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import ibis
import ibis.expr.datatypes as dt
from ibis.conftest import ARM64, LINUX, MACOS, SANDBOXED
from ibis.util import gen_name


def test_read_csv(con, data_dir):
Expand Down Expand Up @@ -461,3 +462,10 @@ def test_read_json_no_auto_detection(con, tmp_path):

t = con.read_json(path, auto_detect=False, columns={"year": "varchar"})
assert t.year.type() == dt.string


def test_read_csv_with_duckdb_specific_types(con):
path = gen_name("duckdb")
columns = {"a": "STRUCT(a INTEGER)"}
with pytest.raises(duckdb.IOException, match="No files found"):
con.read_csv(path, columns=columns)

0 comments on commit 83bed74

Please sign in to comment.