ibis-project · cpcloud · Mar 28, 2024 · Mar 28, 2024 · Mar 28, 2024
diff --git a/ibis/backends/dask/__init__.py b/ibis/backends/dask/__init__.py
@@ -10,8 +10,6 @@
 
 # import the pandas execution module to register dispatched implementations of
 # execute_node that the dask backend will later override
-import ibis.expr.operations as ops
-import ibis.expr.schema as sch
 import ibis.expr.types as ir
 from ibis import util
 from ibis.backends import NoUrl
@@ -167,11 +165,14 @@ def read_parquet(
         self.dictionary[table_name] = df
         return self.table(table_name)
 
-    def table(self, name: str, schema: sch.Schema | None = None):
-        df = self.dictionary[name]
-        schema = schema or self.schemas.get(name, None)
-        schema = PandasData.infer_table(df.head(1), schema=schema)
-        return ops.DatabaseTable(name, schema, self).to_expr()
+    def get_schema(self, table_name, *, database=None):
+        try:
+            schema = self.schemas[table_name]
+        except KeyError:
+            df = self.dictionary[table_name]
+            self.schemas[table_name] = schema = PandasData.infer_table(df.head(1))
+
+        return schema
 
     def _convert_object(self, obj) -> dd.DataFrame:
         if isinstance(obj, dd.DataFrame):

diff --git a/ibis/backends/pandas/__init__.py b/ibis/backends/pandas/__init__.py
@@ -165,18 +165,16 @@ def list_tables(self, like=None, database=None):
         return self._filter_with_like(list(self.dictionary.keys()), like)
 
     def table(self, name: str, schema: sch.Schema | None = None):
-        df = self.dictionary[name]
-        schema = schema or self.schemas.get(name, None)
-        schema = PandasData.infer_table(df, schema=schema)
-        return ops.DatabaseTable(name, schema, self).to_expr()
+        inferred_schema = self.get_schema(name)
+        overridden_schema = {**inferred_schema, **(schema or {})}
+        return ops.DatabaseTable(name, overridden_schema, self).to_expr()
 
     def get_schema(self, table_name, *, database=None):
-        schemas = self.schemas
         try:
-            schema = schemas[table_name]
+            schema = self.schemas[table_name]
         except KeyError:
             df = self.dictionary[table_name]
-            schemas[table_name] = schema = PandasData.infer_table(df)
+            self.schemas[table_name] = schema = PandasData.infer_table(df)
 
         return schema
 

diff --git a/ibis/expr/datatypes/core.py b/ibis/expr/datatypes/core.py
@@ -247,11 +247,6 @@ def from_polars(cls, polars_type, nullable=True) -> Self:
 
         return PolarsType.to_ibis(polars_type, nullable=nullable)
 
-    @classmethod
-    def from_dask(cls, dask_type, nullable=True) -> Self:
-        """Return the equivalent ibis datatype."""
-        return cls.from_pandas(dask_type, nullable=nullable)
-
     def to_numpy(self):
         """Return the equivalent numpy datatype."""
         from ibis.formats.numpy import NumpyType
@@ -276,10 +271,6 @@ def to_polars(self):
 
         return PolarsType.from_ibis(self)
 
-    def to_dask(self):
-        """Return the equivalent dask datatype."""
-        return self.to_pandas()
-
     def is_array(self) -> bool:
         """Return True if an instance of an Array type."""
         return isinstance(self, Array)

diff --git a/ibis/expr/schema.py b/ibis/expr/schema.py
@@ -162,11 +162,6 @@ def from_polars(cls, polars_schema):
 
         return PolarsSchema.to_ibis(polars_schema)
 
-    @classmethod
-    def from_dask(cls, dask_schema):
-        """Return the equivalent ibis schema."""
-        return cls.from_pandas(dask_schema)
-
     def to_numpy(self):
         """Return the equivalent numpy dtypes."""
         from ibis.formats.numpy import NumpySchema
@@ -191,10 +186,6 @@ def to_polars(self):
 
         return PolarsSchema.from_ibis(self)
 
-    def to_dask(self):
-        """Return the equivalent dask dtypes."""
-        return self.to_pandas()
-
     def as_struct(self) -> dt.Struct:
         return dt.Struct(self)
 
@@ -238,7 +229,7 @@ def schema(value: Any) -> Schema:
 
 
 @lazy_singledispatch
-def infer(value: Any, schema=None) -> Schema:
+def infer(value: Any) -> Schema:
     """Infer the corresponding ibis schema for a python object."""
     raise InputTypeError(value)
 
@@ -278,28 +269,25 @@ def from_pyarrow_schema(schema):
 
 
 @infer.register("pandas.DataFrame")
-def infer_pandas_dataframe(df, schema=None):
+def infer_pandas_dataframe(df):
     from ibis.formats.pandas import PandasData
 
-    return PandasData.infer_table(df, schema)
+    return PandasData.infer_table(df)
 
 
-# TODO(kszucs): do we really need the schema kwarg?
 @infer.register("pyarrow.Table")
-def infer_pyarrow_table(table, schema=None):
+def infer_pyarrow_table(table):
     from ibis.formats.pyarrow import PyArrowSchema
 
-    schema = schema if schema is not None else table.schema
-    return PyArrowSchema.to_ibis(schema)
+    return PyArrowSchema.to_ibis(table.schema)
 
 
 @infer.register("polars.DataFrame")
 @infer.register("polars.LazyFrame")
-def infer_polars_dataframe(df, schema=None):
+def infer_polars_dataframe(df):
     from ibis.formats.polars import PolarsSchema
 
-    schema = schema if schema is not None else df.schema
-    return PolarsSchema.to_ibis(schema)
+    return PolarsSchema.to_ibis(df.schema)
 
 
 # lock the dispatchers to avoid adding new implementations

diff --git a/ibis/expr/tests/test_schema.py b/ibis/expr/tests/test_schema.py
@@ -20,12 +20,6 @@
 
     has_pandas = True
 
-has_dask = False
-with contextlib.suppress(ImportError):
-    import dask.dataframe as dd  # noqa: F401
-
-    has_dask = True
-
 
 def test_whole_schema():
     schema = {
@@ -437,11 +431,6 @@ def test_schema_from_to_numpy_dtypes():
 @pytest.mark.parametrize(
     ("from_method", "to_method"),
     [
-        pytest.param(
-            "from_dask",
-            "to_dask",
-            marks=pytest.mark.skipif(not has_dask, reason="dask not installed"),
-        ),
         pytest.param(
             "from_pandas",
             "to_pandas",

diff --git a/ibis/formats/__init__.py b/ibis/formats/__init__.py
@@ -168,15 +168,13 @@ def convert_column(cls, obj: C, dtype: DataType) -> C:
         raise NotImplementedError
 
     @classmethod
-    def convert_table(cls, obj: T, schema: Schema) -> T:
+    def convert_table(cls, obj: T) -> T:
         """Convert a format-specific table to the given ibis schema.
 
         Parameters
         ----------
         obj
             The format-specific table-like object to convert.
-        schema
-            The Ibis schema to convert to.
 
         Returns
         -------

diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py
@@ -94,25 +94,20 @@ def infer_column(cls, s):
         return PyArrowData.infer_column(s)
 
     @classmethod
-    def infer_table(cls, df, schema=None):
-        schema = schema if schema is not None else {}
-
+    def infer_table(cls, df):
         pairs = []
         for column_name in df.dtypes.keys():
             if not isinstance(column_name, str):
                 raise TypeError(
                     "Column names must be strings to use the pandas backend"
                 )
 
-            if column_name in schema:
-                ibis_dtype = schema[column_name]
+            pandas_column = df[column_name]
+            pandas_dtype = pandas_column.dtype
+            if pandas_dtype == np.object_:
+                ibis_dtype = cls.infer_column(pandas_column)
             else:
-                pandas_column = df[column_name]
-                pandas_dtype = pandas_column.dtype
-                if pandas_dtype == np.object_:
-                    ibis_dtype = cls.infer_column(pandas_column)
-                else:
-                    ibis_dtype = PandasType.to_ibis(pandas_dtype)
+                ibis_dtype = PandasType.to_ibis(pandas_dtype)
 
             pairs.append((column_name, ibis_dtype))