ibis-project · cpcloud · Jul 25, 2024 · Jul 24, 2024
diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py
@@ -322,3 +322,26 @@ def test_connect_named_in_memory_db():
 
     default_memory_db = ibis.duckdb.connect()
     assert "ork" not in default_memory_db.list_tables()
+
+
+@pytest.mark.parametrize(
+    ("url", "method_name"),
+    [
+        ("hf://datasets/datasets-examples/doc-formats-csv-1/data.csv", "read_csv"),
+        ("hf://datasets/datasets-examples/doc-formats-jsonl-1/data.jsonl", "read_json"),
+        (
+            "hf://datasets/datasets-examples/doc-formats-parquet-1/data/train-00000-of-00001.parquet",
+            "read_parquet",
+        ),
+    ],
+    ids=["csv", "jsonl", "parquet"],
+)
+@pytest.mark.xfail(
+    LINUX and SANDBOXED,
+    reason="nix on linux is not allowed to access the network and cannot download the httpfs extension",
+    raises=duckdb.Error,
+)
+def test_hugging_face(con, url, method_name):
+    method = getattr(con, method_name)
+    t = method(url)
+    assert t.count().execute() > 0
diff --git a/ibis/util.py b/ibis/util.py
@@ -10,6 +10,7 @@
 import itertools
 import operator
 import os
+import re
 import sys
 import textwrap
 import types
@@ -499,9 +500,7 @@ def normalize_filename(source: str | Path) -> str:
         source = source.removeprefix(f"{prefix}://")
 
     def _absolufy_paths(name):
-        if not name.startswith(
-            ("http", "s3", "az", "abfs", "abfss", "adl", "gs", "gcs", "azure")
-        ):
+        if re.search(r"^(?:.+)://", name) is None:
             return os.path.abspath(name)
         return name