Skip to content

Commit

Permalink
feat(duckdb): support arbitrary url prefixes (#9691)
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored Jul 25, 2024
1 parent 6cd3eee commit 11af489
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
23 changes: 23 additions & 0 deletions ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,3 +322,26 @@ def test_connect_named_in_memory_db():

default_memory_db = ibis.duckdb.connect()
assert "ork" not in default_memory_db.list_tables()


@pytest.mark.parametrize(
("url", "method_name"),
[
("hf://datasets/datasets-examples/doc-formats-csv-1/data.csv", "read_csv"),
("hf://datasets/datasets-examples/doc-formats-jsonl-1/data.jsonl", "read_json"),
(
"hf://datasets/datasets-examples/doc-formats-parquet-1/data/train-00000-of-00001.parquet",
"read_parquet",
),
],
ids=["csv", "jsonl", "parquet"],
)
@pytest.mark.xfail(
LINUX and SANDBOXED,
reason="nix on linux is not allowed to access the network and cannot download the httpfs extension",
raises=duckdb.Error,
)
def test_hugging_face(con, url, method_name):
method = getattr(con, method_name)
t = method(url)
assert t.count().execute() > 0
5 changes: 2 additions & 3 deletions ibis/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import itertools
import operator
import os
import re
import sys
import textwrap
import types
Expand Down Expand Up @@ -499,9 +500,7 @@ def normalize_filename(source: str | Path) -> str:
source = source.removeprefix(f"{prefix}://")

def _absolufy_paths(name):
if not name.startswith(
("http", "s3", "az", "abfs", "abfss", "adl", "gs", "gcs", "azure")
):
if re.search(r"^(?:.+)://", name) is None:
return os.path.abspath(name)
return name

Expand Down

0 comments on commit 11af489

Please sign in to comment.