gradio-app · abidlabs · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025
diff --git a/.changeset/lucky-towns-allow.md b/.changeset/lucky-towns-allow.md
@@ -0,0 +1,5 @@
+---
+"gradio": patch
+---
+
+fix:Clean up `gr.DataFrame.postprocess()` and fix issue with getting headers of empty dataframes
diff --git a/demo/mini_leaderboard/run.ipynb b/demo/mini_leaderboard/run.ipynb
diff --git a/demo/mini_leaderboard/run.py b/demo/mini_leaderboard/run.py
@@ -1,3 +1,4 @@
+# type: ignore
 import gradio as gr
 import pandas as pd
 from pathlib import Path

diff --git a/gradio/components/dataframe.py b/gradio/components/dataframe.py
@@ -74,7 +74,10 @@ def __init__(
         headers: list[str] | None = None,
         row_count: int | tuple[int, str] = (1, "dynamic"),
         col_count: int | tuple[int, str] | None = None,
-        datatype: str | list[str] = "str",
+        datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
+        | Sequence[
+            Literal["str", "number", "bool", "date", "markdown", "html"]
+        ] = "str",
         type: Literal["pandas", "numpy", "array", "polars"] = "pandas",
         latex_delimiters: list[dict[str, str | bool]] | None = None,
         label: str | None = None,
@@ -99,8 +102,8 @@ def __init__(
     ):
         """
         Parameters:
-            value: Default value to display in the DataFrame. If a Styler is provided, it will be used to set the displayed value in the DataFrame (e.g. to set precision of numbers) if the `interactive` is False. If a Callable function is provided, the function will be called whenever the app loads to set the initial value of the component.
-            headers: List of str header names. If None, no headers are shown.
+            value: Default value to display in the DataFrame. Supports pandas, numpy, polars, and list of lists. If a Styler is provided, it will be used to set the displayed value in the DataFrame (e.g. to set precision of numbers) if the `interactive` is False. If a Callable function is provided, the function will be called whenever the app loads to set the initial value of the component.
+            headers: List of str header names. These are used to set the column headers of the dataframe if the value does not have headers. If None, no headers are shown.
             row_count: Limit number of rows for input and decide whether user can create new rows or delete existing rows. The first element of the tuple is an `int`, the row count; the second should be 'fixed' or 'dynamic', the new row behaviour. If an `int` is passed the rows default to 'dynamic'
             col_count: Limit number of columns for input and decide whether user can create new columns or delete existing columns. The first element of the tuple is an `int`, the number of columns; the second should be 'fixed' or 'dynamic', the new column behaviour. If an `int` is passed the columns default to 'dynamic'
             datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", "date", and "markdown".
@@ -150,24 +153,6 @@ def __init__(
                 "Polars is not installed. Please install using `pip install polars`."
             )
         self.type = type
-        values = {
-            "str": "",
-            "number": 0,
-            "bool": False,
-            "date": "01/01/1970",
-            "markdown": "",
-            "html": "",
-        }
-        column_dtypes = (
-            [datatype] * self.col_count[0] if isinstance(datatype, str) else datatype
-        )
-        self.empty_input = {
-            "headers": self.headers,
-            "data": [
-                [values[c] for c in column_dtypes] for _ in range(self.row_count[0])
-            ],
-            "metadata": None,
-        }
 
         if latex_delimiters is None:
             latex_delimiters = [{"left": "$$", "right": "$$", "display": True}]
@@ -235,7 +220,7 @@ def preprocess(
             )
 
     @staticmethod
-    def _is_empty(
+    def is_empty(
         value: pd.DataFrame
         | Styler
         | np.ndarray
@@ -246,9 +231,14 @@ def _is_empty(
         | str
         | None,
     ) -> bool:
+        """
+        Checks if the value of the dataframe provided is empty.
+        """
         import pandas as pd
         from pandas.io.formats.style import Styler
 
+        if value is None:
+            return True
         if isinstance(value, pd.DataFrame):
             return value.empty
         elif isinstance(value, Styler):
@@ -257,13 +247,15 @@ def _is_empty(
             return value.size == 0
         elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
             return value.is_empty()
-        elif isinstance(value, list) and len(value) and isinstance(value[0], list):
-            return len(value[0]) == 0
-        elif isinstance(value, (list, dict)):
+        elif isinstance(value, list):
+            if len(value) > 0 and isinstance(value[0], list):
+                return len(value[0]) == 0
+            return len(value) == 0
+        elif isinstance(value, dict):
             return len(value) == 0
         return False
 
-    def postprocess(
+    def get_headers(
         self,
         value: pd.DataFrame
         | Styler
@@ -274,102 +266,153 @@ def postprocess(
         | dict
         | str
         | None,
-    ) -> DataframeData:
+    ) -> list[str]:
         """
-        Parameters:
-            value: Expects data in any of these formats: `pandas.DataFrame`, `pandas.Styler`, `numpy.array`, `polars.DataFrame`, `list[list]`, `list`, or a `dict` with keys 'data' (and optionally 'headers'), or `str` path to a csv, which is rendered as the spreadsheet.
-        Returns:
-            the uploaded spreadsheet data as an object with `headers` and `data` keys and optional `metadata` key
+        Returns the headers of the dataframes based on the value provided. For values
+        that do not have headers, an empty list is returned.
         """
         import pandas as pd
         from pandas.io.formats.style import Styler
 
-        if isinstance(value, Styler) and semantic_version.Version(
-            pd.__version__
-        ) < semantic_version.Version("1.5.0"):
-            raise ValueError(
-                "Styler objects are only supported in pandas version 1.5.0 or higher. Please try: `pip install --upgrade pandas` to use this feature."
-            )
+        if value is None:
+            return []
+        if isinstance(value, pd.DataFrame):
+            return list(value.columns)
+        elif isinstance(value, Styler):
+            return list(value.data.columns)  # type: ignore
+        elif isinstance(value, str):
+            return list(pd.read_csv(value).columns)
+        elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
+            return list(value.columns)
+        elif isinstance(value, dict):
+            return value.get("headers", [])
+        elif isinstance(value, (list, np.ndarray)):
+            return []
+        return []
+
+    @staticmethod
+    def get_cell_data(
+        value: pd.DataFrame
+        | Styler
+        | np.ndarray
+        | pl.DataFrame
+        | list
+        | list[list]
+        | dict
+        | str
+        | None,
+    ) -> list[list[Any]]:
+        """
+        Gets the cell data (as a list of lists) from the value provided.
+        """
+        import pandas as pd
+        from pandas.io.formats.style import Styler
 
-        if value is None or self._is_empty(value):
-            return DataframeData(
-                headers=self.headers, data=[["" for _ in range(len(self.headers))]]
-            )
         if isinstance(value, dict):
-            if len(value) == 0:
-                return DataframeData(
-                    headers=self.headers, data=[["" for _ in range(len(self.headers))]]
-                )
-            return DataframeData(
-                headers=value.get("headers", []), data=value.get("data", [[]])
-            )
+            return value.get("data", [[]])
         if isinstance(value, (str, pd.DataFrame)):
             if isinstance(value, str):
                 value = pd.read_csv(value)  # type: ignore
-            if len(value) == 0:
-                return DataframeData(
-                    headers=[str(col) for col in value.columns],  # Convert to strings
-                    data=[["" for _ in range(len(value.columns))]],
-                )
-            return DataframeData(
-                headers=[str(col) for col in value.columns],
-                data=value.to_dict(orient="split")["data"],
-            )
+            return value.to_dict(orient="split")["data"]
         elif isinstance(value, Styler):
-            if self.interactive:
-                warnings.warn(
-                    "Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
-                )
             df: pd.DataFrame = value.data  # type: ignore
+            hidden_columns = getattr(value, "hidden_columns", [])
             visible_cols = [
-                i
-                for i, col in enumerate(df.columns)
-                if i not in getattr(value, "hidden_columns", [])
+                i for i, _ in enumerate(df.columns) if i not in hidden_columns
             ]
             df = df.iloc[:, visible_cols]
-
-            if len(df) == 0:
-                return DataframeData(
-                    headers=list(df.columns),
-                    data=[["" for _ in range(len(df.columns))]],
-                    metadata=self.__extract_metadata(
-                        value, getattr(value, "hidden_columns", [])
-                    ),  # type: ignore
-                )
-            return DataframeData(
-                headers=list(df.columns),
-                data=df.to_dict(orient="split")["data"],  # type: ignore
-                metadata=self.__extract_metadata(
-                    value, getattr(value, "hidden_columns", [])
-                ),  # type: ignore
-            )
+            return df.to_dict(orient="split")["data"]
         elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
-            if len(value) == 0:
-                return DataframeData(headers=list(value.to_dict().keys()), data=[[]])  # type: ignore
             df_dict = value.to_dict()  # type: ignore
-            headers = list(df_dict.keys())
             data = list(zip(*df_dict.values()))
-            return DataframeData(headers=headers, data=data)
+            return data
         elif isinstance(value, (np.ndarray, list)):
-            if len(value) == 0:
-                return DataframeData(headers=self.headers, data=[[]])
             if isinstance(value, np.ndarray):
                 value = value.tolist()
             if not isinstance(value, list):
                 raise ValueError("output cannot be converted to list")
+            if not isinstance(value[0], list):
+                return [[v] for v in value]
+            return value
+        else:
+            raise ValueError(
+                f"Cannot process value of type {type(value)} in gr.Dataframe"
+            )
 
-            _headers = self.headers
-            if len(self.headers) < len(value[0]):
-                _headers: list[str] = [
-                    *self.headers,
-                    *[str(i) for i in range(len(self.headers) + 1, len(value[0]) + 1)],
-                ]
-            elif len(self.headers) > len(value[0]):
-                _headers = self.headers[: len(value[0])]
+    @staticmethod
+    def get_metadata(
+        value: pd.DataFrame
+        | Styler
+        | np.ndarray
+        | pl.DataFrame
+        | list
+        | list[list]
+        | dict
+        | str
+        | None,
+    ) -> dict[str, list[list]] | None:
+        """
+        Gets the metadata from the value provided.
+        """
+        from pandas.io.formats.style import Styler
 
-            return DataframeData(headers=_headers, data=value)
-        else:
-            raise ValueError("Cannot process value as a Dataframe")
+        if isinstance(value, Styler):
+            return Dataframe.__extract_metadata(
+                value, getattr(value, "hidden_columns", [])
+            )
+        return None
+
+    def postprocess(
+        self,
+        value: pd.DataFrame
+        | Styler
+        | np.ndarray
+        | pl.DataFrame
+        | list
+        | list[list]
+        | dict
+        | str
+        | None,
+    ) -> DataframeData:
+        """
+        Parameters:
+            value: Expects data in any of these formats: `pandas.DataFrame`, `pandas.Styler`, `numpy.array`, `polars.DataFrame`, `list[list]`, `list`, or a `dict` with keys 'data' (and optionally 'headers'), or `str` path to a csv, which is rendered as the spreadsheet.
+        Returns:
+            the uploaded spreadsheet data as an object with `headers` and `data` keys and optional `metadata` key
+        """
+        import pandas as pd
+        from pandas.io.formats.style import Styler
+
+        if isinstance(value, Styler) and semantic_version.Version(
+            pd.__version__
+        ) < semantic_version.Version("1.5.0"):
+            raise ValueError(
+                "Styler objects are only supported in pandas version 1.5.0 or higher. Please try: `pip install --upgrade pandas` to use this feature."
+            )
+        if isinstance(value, Styler) and self.interactive:
+            warnings.warn(
+                "Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
+            )
+
+        headers = self.get_headers(value) or self.headers
+        data = (
+            [["" for _ in range(len(headers))]]
+            if self.is_empty(value)
+            else self.get_cell_data(value)
+        )
+        if len(headers) > len(data[0]):
+            headers = headers[: len(data[0])]
+        elif len(headers) < len(data[0]):
+            headers = [
+                *headers,
+                *[str(i) for i in range(len(headers) + 1, len(data[0]) + 1)],
+            ]
+        metadata = self.get_metadata(value)
+        return DataframeData(
+            headers=headers,
+            data=data,
+            metadata=metadata,  # type: ignore
+        )
 
     @staticmethod
     def __get_cell_style(cell_id: str, cell_styles: list[dict]) -> str:

diff --git a/gradio/templates.py b/gradio/templates.py
@@ -579,7 +579,10 @@ def __init__(
         headers: list[str] | None = None,
         row_count: int | tuple[int, str] = (1, "dynamic"),
         col_count: int | tuple[int, str] | None = None,
-        datatype: str | list[str] = "str",
+        datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
+        | Sequence[
+            Literal["str", "number", "bool", "date", "markdown", "html"]
+        ] = "str",
         type: Literal["numpy"] = "numpy",
         latex_delimiters: list[dict[str, str | bool]] | None = None,
         label: str | None = None,
@@ -649,7 +652,10 @@ def __init__(
         headers: list[str] | None = None,
         row_count: int | tuple[int, str] = (1, "dynamic"),
         col_count: int | tuple[int, str] | None = None,
-        datatype: str | list[str] = "str",
+        datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
+        | Sequence[
+            Literal["str", "number", "bool", "date", "markdown", "html"]
+        ] = "str",
         type: Literal["array"] = "array",
         latex_delimiters: list[dict[str, str | bool]] | None = None,
         label: str | None = None,
@@ -719,7 +725,10 @@ def __init__(
         headers: list[str] | None = None,
         row_count: int | tuple[int, str] = (1, "dynamic"),
         col_count: Literal[1] = 1,
-        datatype: str | list[str] = "str",
+        datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
+        | Sequence[
+            Literal["str", "number", "bool", "date", "markdown", "html"]
+        ] = "str",
         type: Literal["array"] = "array",
         latex_delimiters: list[dict[str, str | bool]] | None = None,
         label: str | None = None,