feat(pandas): add read_csv and read_parquet

test(pandas, dask): remove `notyet` markers for read_*
ibis-project · Oct 31, 2023 · 34eeca6 · 34eeca6
1 parent 2d75955
commit 34eeca6
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 4 deletions.
diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py
@@ -12,11 +12,13 @@
 import ibis.expr.operations as ops
 import ibis.expr.schema as sch
 import ibis.expr.types as ir
+from ibis import util
 from ibis.backends.base import BaseBackend
 from ibis.formats.pandas import PandasData, PandasSchema
 from ibis.formats.pyarrow import PyArrowData
 
 if TYPE_CHECKING:
+    import pathlib
     from collections.abc import Mapping, MutableMapping
 
 
@@ -80,6 +82,62 @@ def from_dataframe(
         client.dictionary[name] = df
         return client.table(name)
 
+    def read_csv(
+        self, source: str | pathlib.Path, table_name: str | None = None, **kwargs: Any
+    ):
+        """Register a CSV file as a table in the current session.
+
+        Parameters
+        ----------
+        source
+            The data source. Can be a local or remote file, pathlike objects
+            also accepted.
+        table_name
+            An optional name to use for the created table. This defaults to
+            a generated name.
+        **kwargs
+            Additional keyword arguments passed to Pandas loading function.
+            See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
+            for more information.
+
+        Returns
+        -------
+        ir.Table
+            The just-registered table
+        """
+        table_name = table_name or util.gen_name("read_csv")
+        df = pd.read_csv(source, **kwargs)
+        self.dictionary[table_name] = df
+        return self.table(table_name)
+
+    def read_parquet(
+        self, source: str | pathlib.Path, table_name: str | None = None, **kwargs: Any
+    ):
+        """Register a parquet file as a table in the current session.
+
+        Parameters
+        ----------
+        source
+            The data source(s). May be a path to a file, an iterable of files,
+            or directory of parquet files.
+        table_name
+            An optional name to use for the created table. This defaults to
+            a generated name.
+        **kwargs
+            Additional keyword arguments passed to Pandas loading function.
+            See https://pandas.pydata.org/docs/reference/api/pandas.read_parquet.html
+            for more information.
+
+        Returns
+        -------
+        ir.Table
+            The just-registered table
+        """
+        table_name = table_name or util.gen_name("read_parquet")
+        df = pd.read_parquet(source, **kwargs)
+        self.dictionary[table_name] = df
+        return self.table(table_name)
+
     @property
     def version(self) -> str:
         return pd.__version__

diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py
@@ -393,11 +393,9 @@ def test_register_garbage(con, monkeypatch):
 @pytest.mark.notyet(
     [
         "bigquery",
-        "dask",
         "impala",
         "mssql",
         "mysql",
-        "pandas",
         "postgres",
         "sqlite",
         "trino",
@@ -554,11 +552,9 @@ def num_diamonds(data_dir):
 @pytest.mark.notyet(
     [
         "bigquery",
-        "dask",
         "impala",
         "mssql",
         "mysql",
-        "pandas",
         "postgres",
         "sqlite",
         "trino",