Skip to content

Commit

Permalink
feat(pandas): add read_csv and read_parquet
Browse files Browse the repository at this point in the history
test(pandas, dask): remove `notyet` markers for read_*
  • Loading branch information
gforsyth authored and cpcloud committed Oct 31, 2023
1 parent 2d75955 commit 34eeca6
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 4 deletions.
58 changes: 58 additions & 0 deletions ibis/backends/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
import ibis.expr.operations as ops
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis import util
from ibis.backends.base import BaseBackend
from ibis.formats.pandas import PandasData, PandasSchema
from ibis.formats.pyarrow import PyArrowData

if TYPE_CHECKING:
import pathlib
from collections.abc import Mapping, MutableMapping


Expand Down Expand Up @@ -80,6 +82,62 @@ def from_dataframe(
client.dictionary[name] = df
return client.table(name)

def read_csv(
self, source: str | pathlib.Path, table_name: str | None = None, **kwargs: Any
):
"""Register a CSV file as a table in the current session.
Parameters
----------
source
The data source. Can be a local or remote file, pathlike objects
also accepted.
table_name
An optional name to use for the created table. This defaults to
a generated name.
**kwargs
Additional keyword arguments passed to Pandas loading function.
See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
for more information.
Returns
-------
ir.Table
The just-registered table
"""
table_name = table_name or util.gen_name("read_csv")
df = pd.read_csv(source, **kwargs)
self.dictionary[table_name] = df
return self.table(table_name)

def read_parquet(
self, source: str | pathlib.Path, table_name: str | None = None, **kwargs: Any
):
"""Register a parquet file as a table in the current session.
Parameters
----------
source
The data source(s). May be a path to a file, an iterable of files,
or directory of parquet files.
table_name
An optional name to use for the created table. This defaults to
a generated name.
**kwargs
Additional keyword arguments passed to Pandas loading function.
See https://pandas.pydata.org/docs/reference/api/pandas.read_parquet.html
for more information.
Returns
-------
ir.Table
The just-registered table
"""
table_name = table_name or util.gen_name("read_parquet")
df = pd.read_parquet(source, **kwargs)
self.dictionary[table_name] = df
return self.table(table_name)

@property
def version(self) -> str:
return pd.__version__
Expand Down
4 changes: 0 additions & 4 deletions ibis/backends/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,11 +393,9 @@ def test_register_garbage(con, monkeypatch):
@pytest.mark.notyet(
[
"bigquery",
"dask",
"impala",
"mssql",
"mysql",
"pandas",
"postgres",
"sqlite",
"trino",
Expand Down Expand Up @@ -554,11 +552,9 @@ def num_diamonds(data_dir):
@pytest.mark.notyet(
[
"bigquery",
"dask",
"impala",
"mssql",
"mysql",
"pandas",
"postgres",
"sqlite",
"trino",
Expand Down

0 comments on commit 34eeca6

Please sign in to comment.