Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up gr.DataFrame.postprocess() and fix issue with getting headers of empty dataframes #10476

Merged
merged 11 commits into from
Jan 31, 2025
5 changes: 5 additions & 0 deletions .changeset/lucky-towns-allow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"gradio": patch
---

fix:Clean up `gr.DataFrame.postprocess()` and fix issue with getting headers of empty dataframes
2 changes: 1 addition & 1 deletion demo/mini_leaderboard/run.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions demo/mini_leaderboard/run.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# type: ignore
import gradio as gr
import pandas as pd
from pathlib import Path
Expand Down
237 changes: 140 additions & 97 deletions gradio/components/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ def __init__(
headers: list[str] | None = None,
row_count: int | tuple[int, str] = (1, "dynamic"),
col_count: int | tuple[int, str] | None = None,
datatype: str | list[str] = "str",
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
| Sequence[
Literal["str", "number", "bool", "date", "markdown", "html"]
] = "str",
type: Literal["pandas", "numpy", "array", "polars"] = "pandas",
latex_delimiters: list[dict[str, str | bool]] | None = None,
label: str | None = None,
Expand All @@ -99,8 +102,8 @@ def __init__(
):
"""
Parameters:
value: Default value to display in the DataFrame. If a Styler is provided, it will be used to set the displayed value in the DataFrame (e.g. to set precision of numbers) if the `interactive` is False. If a Callable function is provided, the function will be called whenever the app loads to set the initial value of the component.
headers: List of str header names. If None, no headers are shown.
value: Default value to display in the DataFrame. Supports pandas, numpy, polars, and list of lists. If a Styler is provided, it will be used to set the displayed value in the DataFrame (e.g. to set precision of numbers) if the `interactive` is False. If a Callable function is provided, the function will be called whenever the app loads to set the initial value of the component.
headers: List of str header names. These are used to set the column headers of the dataframe if the value does not have headers. If None, no headers are shown.
row_count: Limit number of rows for input and decide whether user can create new rows or delete existing rows. The first element of the tuple is an `int`, the row count; the second should be 'fixed' or 'dynamic', the new row behaviour. If an `int` is passed the rows default to 'dynamic'
col_count: Limit number of columns for input and decide whether user can create new columns or delete existing columns. The first element of the tuple is an `int`, the number of columns; the second should be 'fixed' or 'dynamic', the new column behaviour. If an `int` is passed the columns default to 'dynamic'
datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", "date", and "markdown".
Expand Down Expand Up @@ -150,24 +153,6 @@ def __init__(
"Polars is not installed. Please install using `pip install polars`."
)
self.type = type
values = {
"str": "",
"number": 0,
"bool": False,
"date": "01/01/1970",
"markdown": "",
"html": "",
}
column_dtypes = (
[datatype] * self.col_count[0] if isinstance(datatype, str) else datatype
)
self.empty_input = {
"headers": self.headers,
"data": [
[values[c] for c in column_dtypes] for _ in range(self.row_count[0])
],
"metadata": None,
}

if latex_delimiters is None:
latex_delimiters = [{"left": "$$", "right": "$$", "display": True}]
Expand Down Expand Up @@ -235,7 +220,7 @@ def preprocess(
)

@staticmethod
def _is_empty(
def is_empty(
value: pd.DataFrame
| Styler
| np.ndarray
Expand All @@ -246,9 +231,14 @@ def _is_empty(
| str
| None,
) -> bool:
"""
Checks if the value of the dataframe provided is empty.
"""
import pandas as pd
from pandas.io.formats.style import Styler

if value is None:
return True
if isinstance(value, pd.DataFrame):
return value.empty
elif isinstance(value, Styler):
Expand All @@ -257,13 +247,15 @@ def _is_empty(
return value.size == 0
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
return value.is_empty()
elif isinstance(value, list) and len(value) and isinstance(value[0], list):
return len(value[0]) == 0
elif isinstance(value, (list, dict)):
elif isinstance(value, list):
if len(value) > 0 and isinstance(value[0], list):
return len(value[0]) == 0
return len(value) == 0
elif isinstance(value, dict):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think the logic for dict is inconsistent. A df is empty if it has no values (even if his has headers). The dict is considered empty only if it doesn't have headers and values.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah great catch thanks

return len(value) == 0
return False

def postprocess(
def get_headers(
self,
value: pd.DataFrame
| Styler
Expand All @@ -274,102 +266,153 @@ def postprocess(
| dict
| str
| None,
) -> DataframeData:
) -> list[str]:
"""
Parameters:
value: Expects data in any of these formats: `pandas.DataFrame`, `pandas.Styler`, `numpy.array`, `polars.DataFrame`, `list[list]`, `list`, or a `dict` with keys 'data' (and optionally 'headers'), or `str` path to a csv, which is rendered as the spreadsheet.
Returns:
the uploaded spreadsheet data as an object with `headers` and `data` keys and optional `metadata` key
Returns the headers of the dataframes based on the value provided. For values
that do not have headers, an empty list is returned.
"""
import pandas as pd
from pandas.io.formats.style import Styler

if isinstance(value, Styler) and semantic_version.Version(
pd.__version__
) < semantic_version.Version("1.5.0"):
raise ValueError(
"Styler objects are only supported in pandas version 1.5.0 or higher. Please try: `pip install --upgrade pandas` to use this feature."
)
if value is None:
return []
if isinstance(value, pd.DataFrame):
return list(value.columns)
elif isinstance(value, Styler):
return list(value.data.columns) # type: ignore
elif isinstance(value, str):
return list(pd.read_csv(value).columns)
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
return list(value.columns)
elif isinstance(value, dict):
return value.get("headers", [])
elif isinstance(value, (list, np.ndarray)):
return []
return []

@staticmethod
def get_cell_data(
value: pd.DataFrame
| Styler
| np.ndarray
| pl.DataFrame
| list
| list[list]
| dict
| str
| None,
) -> list[list[Any]]:
"""
Gets the cell data (as a list of lists) from the value provided.
"""
import pandas as pd
from pandas.io.formats.style import Styler

if value is None or self._is_empty(value):
return DataframeData(
headers=self.headers, data=[["" for _ in range(len(self.headers))]]
)
if isinstance(value, dict):
if len(value) == 0:
return DataframeData(
headers=self.headers, data=[["" for _ in range(len(self.headers))]]
)
return DataframeData(
headers=value.get("headers", []), data=value.get("data", [[]])
)
return value.get("data", [[]])
if isinstance(value, (str, pd.DataFrame)):
if isinstance(value, str):
value = pd.read_csv(value) # type: ignore
if len(value) == 0:
return DataframeData(
headers=[str(col) for col in value.columns], # Convert to strings
data=[["" for _ in range(len(value.columns))]],
)
return DataframeData(
headers=[str(col) for col in value.columns],
data=value.to_dict(orient="split")["data"],
)
return value.to_dict(orient="split")["data"]
elif isinstance(value, Styler):
if self.interactive:
warnings.warn(
"Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
)
df: pd.DataFrame = value.data # type: ignore
hidden_columns = getattr(value, "hidden_columns", [])
visible_cols = [
i
for i, col in enumerate(df.columns)
if i not in getattr(value, "hidden_columns", [])
i for i, _ in enumerate(df.columns) if i not in hidden_columns
]
df = df.iloc[:, visible_cols]

if len(df) == 0:
return DataframeData(
headers=list(df.columns),
data=[["" for _ in range(len(df.columns))]],
metadata=self.__extract_metadata(
value, getattr(value, "hidden_columns", [])
), # type: ignore
)
return DataframeData(
headers=list(df.columns),
data=df.to_dict(orient="split")["data"], # type: ignore
metadata=self.__extract_metadata(
value, getattr(value, "hidden_columns", [])
), # type: ignore
)
return df.to_dict(orient="split")["data"]
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
if len(value) == 0:
return DataframeData(headers=list(value.to_dict().keys()), data=[[]]) # type: ignore
df_dict = value.to_dict() # type: ignore
headers = list(df_dict.keys())
data = list(zip(*df_dict.values()))
return DataframeData(headers=headers, data=data)
return data
elif isinstance(value, (np.ndarray, list)):
if len(value) == 0:
return DataframeData(headers=self.headers, data=[[]])
if isinstance(value, np.ndarray):
value = value.tolist()
if not isinstance(value, list):
raise ValueError("output cannot be converted to list")
if not isinstance(value[0], list):
return [[v] for v in value]
return value
else:
raise ValueError(
f"Cannot process value of type {type(value)} in gr.Dataframe"
)

_headers = self.headers
if len(self.headers) < len(value[0]):
_headers: list[str] = [
*self.headers,
*[str(i) for i in range(len(self.headers) + 1, len(value[0]) + 1)],
]
elif len(self.headers) > len(value[0]):
_headers = self.headers[: len(value[0])]
@staticmethod
def get_metadata(
value: pd.DataFrame
| Styler
| np.ndarray
| pl.DataFrame
| list
| list[list]
| dict
| str
| None,
) -> dict[str, list[list]] | None:
"""
Gets the metadata from the value provided.
"""
from pandas.io.formats.style import Styler

return DataframeData(headers=_headers, data=value)
else:
raise ValueError("Cannot process value as a Dataframe")
if isinstance(value, Styler):
return Dataframe.__extract_metadata(
value, getattr(value, "hidden_columns", [])
)
return None

def postprocess(
self,
value: pd.DataFrame
| Styler
| np.ndarray
| pl.DataFrame
| list
| list[list]
| dict
| str
| None,
) -> DataframeData:
"""
Parameters:
value: Expects data in any of these formats: `pandas.DataFrame`, `pandas.Styler`, `numpy.array`, `polars.DataFrame`, `list[list]`, `list`, or a `dict` with keys 'data' (and optionally 'headers'), or `str` path to a csv, which is rendered as the spreadsheet.
Returns:
the uploaded spreadsheet data as an object with `headers` and `data` keys and optional `metadata` key
"""
import pandas as pd
from pandas.io.formats.style import Styler

if isinstance(value, Styler) and semantic_version.Version(
pd.__version__
) < semantic_version.Version("1.5.0"):
raise ValueError(
"Styler objects are only supported in pandas version 1.5.0 or higher. Please try: `pip install --upgrade pandas` to use this feature."
)
if isinstance(value, Styler) and self.interactive:
warnings.warn(
"Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
)

headers = self.get_headers(value) or self.headers
data = (
[["" for _ in range(len(headers))]]
if self.is_empty(value)
else self.get_cell_data(value)
)
if len(headers) > len(data[0]):
headers = headers[: len(data[0])]
elif len(headers) < len(data[0]):
headers = [
*headers,
*[str(i) for i in range(len(headers) + 1, len(data[0]) + 1)],
]
metadata = self.get_metadata(value)
return DataframeData(
headers=headers,
data=data,
metadata=metadata, # type: ignore
)

@staticmethod
def __get_cell_style(cell_id: str, cell_styles: list[dict]) -> str:
Expand Down
15 changes: 12 additions & 3 deletions gradio/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,10 @@ def __init__(
headers: list[str] | None = None,
row_count: int | tuple[int, str] = (1, "dynamic"),
col_count: int | tuple[int, str] | None = None,
datatype: str | list[str] = "str",
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
| Sequence[
Literal["str", "number", "bool", "date", "markdown", "html"]
] = "str",
type: Literal["numpy"] = "numpy",
latex_delimiters: list[dict[str, str | bool]] | None = None,
label: str | None = None,
Expand Down Expand Up @@ -649,7 +652,10 @@ def __init__(
headers: list[str] | None = None,
row_count: int | tuple[int, str] = (1, "dynamic"),
col_count: int | tuple[int, str] | None = None,
datatype: str | list[str] = "str",
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
| Sequence[
Literal["str", "number", "bool", "date", "markdown", "html"]
] = "str",
type: Literal["array"] = "array",
latex_delimiters: list[dict[str, str | bool]] | None = None,
label: str | None = None,
Expand Down Expand Up @@ -719,7 +725,10 @@ def __init__(
headers: list[str] | None = None,
row_count: int | tuple[int, str] = (1, "dynamic"),
col_count: Literal[1] = 1,
datatype: str | list[str] = "str",
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
| Sequence[
Literal["str", "number", "bool", "date", "markdown", "html"]
] = "str",
type: Literal["array"] = "array",
latex_delimiters: list[dict[str, str | bool]] | None = None,
label: str | None = None,
Expand Down
Loading
Loading