From 6be154c94ef418518740c42f176a0970493e9b96 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Tue, 19 Mar 2024 12:12:13 +0100
Subject: [PATCH] depr(python): Rename `from_repr` parameter from `tbl` to
 `data` (#15156)

---
 py-polars/polars/convert.py | 410 ++++++++++++++++++------------------
 1 file changed, 206 insertions(+), 204 deletions(-)

diff --git a/py-polars/polars/convert.py b/py-polars/polars/convert.py
index e9a390095cef..c45271455aef 100644
--- a/py-polars/polars/convert.py
+++ b/py-polars/polars/convert.py
@@ -7,6 +7,7 @@
 
 import polars._reexport as pl
 from polars import functions as F
+from polars._utils.deprecation import deprecate_renamed_parameter
 from polars._utils.various import _cast_repr_strings_with_schema
 from polars.datatypes import N_INFER_DEFAULT, Categorical, List, Object, String, Struct
 from polars.dependencies import pandas as pd
@@ -243,210 +244,6 @@ def from_records(
     )
 
 
-def _from_dataframe_repr(m: re.Match[str]) -> DataFrame:
-    """Reconstruct a DataFrame from a regex-matched table repr."""
-    from polars.datatypes.convert import dtype_short_repr_to_dtype
-
-    # extract elements from table structure
-    lines = m.group().split("\n")[1:-1]
-    rows = [
-        [re.sub(r"^[\W+]*│", "", elem).strip() for elem in row]
-        for row in [re.split("[┆|]", row.rstrip("│ ")) for row in lines]
-        if len(row) > 1 or not re.search("├[╌┼]+┤", row[0])
-    ]
-
-    # determine beginning/end of the header block
-    table_body_start = 2
-    for idx, (elem, *_) in enumerate(rows):
-        if re.match(r"^\W*╞", elem):
-            table_body_start = idx
-            break
-
-    # handle headers with wrapped column names and determine headers/dtypes
-    header_block = ["".join(h).split("---") for h in zip(*rows[:table_body_start])]
-    dtypes: list[str | None]
-    if all(len(h) == 1 for h in header_block):
-        headers = [h[0] for h in header_block]
-        dtypes = [None] * len(headers)
-    else:
-        headers, dtypes = (list(h) for h in zip_longest(*header_block))
-
-    body = rows[table_body_start + 1 :]
-    no_dtypes = all(d is None for d in dtypes)
-
-    # transpose rows into columns, detect/omit truncated columns
-    coldata = list(zip(*(row for row in body if not all((e == "…") for e in row))))
-    for el in ("…", "..."):
-        if el in headers:
-            idx = headers.index(el)
-            for table_elem in (headers, dtypes):
-                table_elem.pop(idx)  # type: ignore[attr-defined]
-            if coldata:
-                coldata.pop(idx)
-
-    # init cols as String Series, handle "null" -> None, create schema from repr dtype
-    data = [
-        pl.Series([(None if v == "null" else v) for v in cd], dtype=String)
-        for cd in coldata
-    ]
-    schema = dict(zip(headers, (dtype_short_repr_to_dtype(d) for d in dtypes)))
-    if schema and data and (n_extend_cols := (len(schema) - len(data))) > 0:
-        empty_data = [None] * len(data[0])
-        data.extend((pl.Series(empty_data, dtype=String)) for _ in range(n_extend_cols))
-    for dtype in set(schema.values()):
-        if dtype in (List, Struct, Object):
-            msg = (
-                f"`from_repr` does not support data type {dtype.base_type().__name__!r}"
-            )
-            raise NotImplementedError(msg)
-
-    # construct DataFrame from string series and cast from repr to native dtype
-    df = pl.DataFrame(data=data, orient="col", schema=list(schema))
-    if no_dtypes:
-        if df.is_empty():
-            # if no dtypes *and* empty, default to string
-            return df.with_columns(F.all().cast(String))
-        else:
-            # otherwise, take a trip through our CSV inference logic
-            if all(tp == String for tp in df.schema.values()):
-                buf = io.BytesIO()
-                df.write_csv(file=buf)
-                df = read_csv(buf, new_columns=df.columns, try_parse_dates=True)
-            return df
-    elif schema and not data:
-        return df.cast(schema)  # type: ignore[arg-type]
-    else:
-        return _cast_repr_strings_with_schema(df, schema)
-
-
-def _from_series_repr(m: re.Match[str]) -> Series:
-    """Reconstruct a Series from a regex-matched series repr."""
-    from polars.datatypes.convert import dtype_short_repr_to_dtype
-
-    shape = m.groups()[0]
-    name = m.groups()[1][1:-1]
-    length = int(shape[1:-2] if shape else -1)
-    dtype = dtype_short_repr_to_dtype(m.groups()[2])
-
-    if length == 0:
-        string_values = []
-    else:
-        string_values = [
-            v.strip()
-            for v in re.findall(r"[\s>#]*(?:\t|\s{4,})([^\n]*)\n", m.groups()[-1])
-        ]
-        if string_values == ["[", "]"]:
-            string_values = []
-        elif string_values and string_values[0].lstrip("#> ") == "[":
-            string_values = string_values[1:]
-
-    values = string_values[:length] if length > 0 else string_values
-    values = [(None if v == "null" else v) for v in values if v not in ("…", "...")]
-
-    if not values:
-        return pl.Series(name=name, values=values, dtype=dtype)
-    else:
-        srs = pl.Series(name=name, values=values, dtype=String)
-        if dtype is None:
-            return srs
-        elif dtype in (Categorical, String):
-            return srs.str.replace('^"(.*)"$', r"$1").cast(dtype)
-
-        return _cast_repr_strings_with_schema(
-            srs.to_frame(), schema={srs.name: dtype}
-        ).to_series()
-
-
-def from_repr(tbl: str) -> DataFrame | Series:
-    """
-    Utility function that reconstructs a DataFrame or Series from the object's repr.
-
-    Parameters
-    ----------
-    tbl
-        A string containing a polars DataFrame or Series repr; does not need
-        to be trimmed of whitespace (or leading prompts) as the repr will be
-        found/extracted automatically.
-
-    Notes
-    -----
-    This function handles the default UTF8_FULL and UTF8_FULL_CONDENSED DataFrame
-    tables (with or without rounded corners). Truncated columns/rows are omitted,
-    wrapped headers are accounted for, and dtypes automatically identified.
-
-    Currently compound/nested dtypes such as List and Struct are not supported;
-    neither are Object dtypes.
-
-    See Also
-    --------
-    polars.DataFrame.to_init_repr
-    polars.Series.to_init_repr
-
-    Examples
-    --------
-    From DataFrame table repr:
-
-    >>> df = pl.from_repr(
-    ...     '''
-    ...     Out[3]:
-    ...     shape: (1, 5)
-    ...     ┌───────────┬────────────┬───┬───────┬────────────────────────────────┐
-    ...     │ source_ac ┆ source_cha ┆ … ┆ ident ┆ timestamp                      │
-    ...     │ tor_id    ┆ nnel_id    ┆   ┆ ---   ┆ ---                            │
-    ...     │ ---       ┆ ---        ┆   ┆ str   ┆ datetime[μs, Asia/Tokyo]       │
-    ...     │ i32       ┆ i64        ┆   ┆       ┆                                │
-    ...     ╞═══════════╪════════════╪═══╪═══════╪════════════════════════════════╡
-    ...     │ 123456780 ┆ 9876543210 ┆ … ┆ a:b:c ┆ 2023-03-25 10:56:59.663053 JST │
-    ...     │ …         ┆ …          ┆ … ┆ …     ┆ …                              │
-    ...     │ 803065983 ┆ 2055938745 ┆ … ┆ x:y:z ┆ 2023-03-25 12:38:18.050545 JST │
-    ...     └───────────┴────────────┴───┴───────┴────────────────────────────────┘
-    ... '''
-    ... )
-    >>> df
-    shape: (2, 4)
-    ┌─────────────────┬───────────────────┬───────┬────────────────────────────────┐
-    │ source_actor_id ┆ source_channel_id ┆ ident ┆ timestamp                      │
-    │ ---             ┆ ---               ┆ ---   ┆ ---                            │
-    │ i32             ┆ i64               ┆ str   ┆ datetime[μs, Asia/Tokyo]       │
-    ╞═════════════════╪═══════════════════╪═══════╪════════════════════════════════╡
-    │ 123456780       ┆ 9876543210        ┆ a:b:c ┆ 2023-03-25 10:56:59.663053 JST │
-    │ 803065983       ┆ 2055938745        ┆ x:y:z ┆ 2023-03-25 12:38:18.050545 JST │
-    └─────────────────┴───────────────────┴───────┴────────────────────────────────┘
-
-    From Series repr:
-
-    >>> s = pl.from_repr(
-    ...     '''
-    ...     shape: (3,)
-    ...     Series: 's' [bool]
-    ...     [
-    ...        true
-    ...        false
-    ...        true
-    ...     ]
-    ...     '''
-    ... )
-    >>> s.to_list()
-    [True, False, True]
-    """
-    # find DataFrame table...
-    m = re.search(r"([┌╭].*?[┘╯])", tbl, re.DOTALL)
-    if m is not None:
-        return _from_dataframe_repr(m)
-
-    # ...or Series in the given string
-    m = re.search(
-        pattern=r"(?:shape: (\(\d+,\))\n.*?)?Series:\s+([^\n]+)\s+\[([^\n]+)](.*)",
-        string=tbl,
-        flags=re.DOTALL,
-    )
-    if m is not None:
-        return _from_series_repr(m)
-
-    msg = "input string does not contain DataFrame or Series"
-    raise ValueError(msg)
-
-
 def from_numpy(
     data: np.ndarray[Any, Any],
     schema: SchemaDefinition | None = None,
@@ -732,6 +529,211 @@ def from_pandas(
         raise TypeError(msg)
 
 
+@deprecate_renamed_parameter("tbl", "data", version="0.20.17")
+def from_repr(data: str) -> DataFrame | Series:
+    """
+    Construct a Polars DataFrame or Series from its string representation.
+
+    Parameters
+    ----------
+    data
+        A string containing a polars DataFrame or Series repr; does not need
+        to be trimmed of whitespace (or leading prompts) as the repr will be
+        found/extracted automatically.
+
+    Notes
+    -----
+    This function handles the default UTF8_FULL and UTF8_FULL_CONDENSED DataFrame
+    tables (with or without rounded corners). Truncated columns/rows are omitted,
+    wrapped headers are accounted for, and dtypes automatically identified.
+
+    Currently compound/nested dtypes such as List and Struct are not supported;
+    neither are Object dtypes.
+
+    See Also
+    --------
+    polars.DataFrame.to_init_repr
+    polars.Series.to_init_repr
+
+    Examples
+    --------
+    From DataFrame table repr:
+
+    >>> df = pl.from_repr(
+    ...     '''
+    ...     Out[3]:
+    ...     shape: (1, 5)
+    ...     ┌───────────┬────────────┬───┬───────┬────────────────────────────────┐
+    ...     │ source_ac ┆ source_cha ┆ … ┆ ident ┆ timestamp                      │
+    ...     │ tor_id    ┆ nnel_id    ┆   ┆ ---   ┆ ---                            │
+    ...     │ ---       ┆ ---        ┆   ┆ str   ┆ datetime[μs, Asia/Tokyo]       │
+    ...     │ i32       ┆ i64        ┆   ┆       ┆                                │
+    ...     ╞═══════════╪════════════╪═══╪═══════╪════════════════════════════════╡
+    ...     │ 123456780 ┆ 9876543210 ┆ … ┆ a:b:c ┆ 2023-03-25 10:56:59.663053 JST │
+    ...     │ …         ┆ …          ┆ … ┆ …     ┆ …                              │
+    ...     │ 803065983 ┆ 2055938745 ┆ … ┆ x:y:z ┆ 2023-03-25 12:38:18.050545 JST │
+    ...     └───────────┴────────────┴───┴───────┴────────────────────────────────┘
+    ... '''
+    ... )
+    >>> df
+    shape: (2, 4)
+    ┌─────────────────┬───────────────────┬───────┬────────────────────────────────┐
+    │ source_actor_id ┆ source_channel_id ┆ ident ┆ timestamp                      │
+    │ ---             ┆ ---               ┆ ---   ┆ ---                            │
+    │ i32             ┆ i64               ┆ str   ┆ datetime[μs, Asia/Tokyo]       │
+    ╞═════════════════╪═══════════════════╪═══════╪════════════════════════════════╡
+    │ 123456780       ┆ 9876543210        ┆ a:b:c ┆ 2023-03-25 10:56:59.663053 JST │
+    │ 803065983       ┆ 2055938745        ┆ x:y:z ┆ 2023-03-25 12:38:18.050545 JST │
+    └─────────────────┴───────────────────┴───────┴────────────────────────────────┘
+
+    From Series repr:
+
+    >>> s = pl.from_repr(
+    ...     '''
+    ...     shape: (3,)
+    ...     Series: 's' [bool]
+    ...     [
+    ...        true
+    ...        false
+    ...        true
+    ...     ]
+    ...     '''
+    ... )
+    >>> s.to_list()
+    [True, False, True]
+    """
+    # find DataFrame table...
+    m = re.search(r"([┌╭].*?[┘╯])", data, re.DOTALL)
+    if m is not None:
+        return _from_dataframe_repr(m)
+
+    # ...or Series in the given string
+    m = re.search(
+        pattern=r"(?:shape: (\(\d+,\))\n.*?)?Series:\s+([^\n]+)\s+\[([^\n]+)](.*)",
+        string=data,
+        flags=re.DOTALL,
+    )
+    if m is not None:
+        return _from_series_repr(m)
+
+    msg = "input string does not contain DataFrame or Series"
+    raise ValueError(msg)
+
+
+def _from_dataframe_repr(m: re.Match[str]) -> DataFrame:
+    """Reconstruct a DataFrame from a regex-matched table repr."""
+    from polars.datatypes.convert import dtype_short_repr_to_dtype
+
+    # extract elements from table structure
+    lines = m.group().split("\n")[1:-1]
+    rows = [
+        [re.sub(r"^[\W+]*│", "", elem).strip() for elem in row]
+        for row in [re.split("[┆|]", row.rstrip("│ ")) for row in lines]
+        if len(row) > 1 or not re.search("├[╌┼]+┤", row[0])
+    ]
+
+    # determine beginning/end of the header block
+    table_body_start = 2
+    for idx, (elem, *_) in enumerate(rows):
+        if re.match(r"^\W*╞", elem):
+            table_body_start = idx
+            break
+
+    # handle headers with wrapped column names and determine headers/dtypes
+    header_block = ["".join(h).split("---") for h in zip(*rows[:table_body_start])]
+    dtypes: list[str | None]
+    if all(len(h) == 1 for h in header_block):
+        headers = [h[0] for h in header_block]
+        dtypes = [None] * len(headers)
+    else:
+        headers, dtypes = (list(h) for h in zip_longest(*header_block))
+
+    body = rows[table_body_start + 1 :]
+    no_dtypes = all(d is None for d in dtypes)
+
+    # transpose rows into columns, detect/omit truncated columns
+    coldata = list(zip(*(row for row in body if not all((e == "…") for e in row))))
+    for el in ("…", "..."):
+        if el in headers:
+            idx = headers.index(el)
+            for table_elem in (headers, dtypes):
+                table_elem.pop(idx)  # type: ignore[attr-defined]
+            if coldata:
+                coldata.pop(idx)
+
+    # init cols as String Series, handle "null" -> None, create schema from repr dtype
+    data = [
+        pl.Series([(None if v == "null" else v) for v in cd], dtype=String)
+        for cd in coldata
+    ]
+    schema = dict(zip(headers, (dtype_short_repr_to_dtype(d) for d in dtypes)))
+    if schema and data and (n_extend_cols := (len(schema) - len(data))) > 0:
+        empty_data = [None] * len(data[0])
+        data.extend((pl.Series(empty_data, dtype=String)) for _ in range(n_extend_cols))
+    for dtype in set(schema.values()):
+        if dtype in (List, Struct, Object):
+            msg = (
+                f"`from_repr` does not support data type {dtype.base_type().__name__!r}"
+            )
+            raise NotImplementedError(msg)
+
+    # construct DataFrame from string series and cast from repr to native dtype
+    df = pl.DataFrame(data=data, orient="col", schema=list(schema))
+    if no_dtypes:
+        if df.is_empty():
+            # if no dtypes *and* empty, default to string
+            return df.with_columns(F.all().cast(String))
+        else:
+            # otherwise, take a trip through our CSV inference logic
+            if all(tp == String for tp in df.schema.values()):
+                buf = io.BytesIO()
+                df.write_csv(file=buf)
+                df = read_csv(buf, new_columns=df.columns, try_parse_dates=True)
+            return df
+    elif schema and not data:
+        return df.cast(schema)  # type: ignore[arg-type]
+    else:
+        return _cast_repr_strings_with_schema(df, schema)
+
+
+def _from_series_repr(m: re.Match[str]) -> Series:
+    """Reconstruct a Series from a regex-matched series repr."""
+    from polars.datatypes.convert import dtype_short_repr_to_dtype
+
+    shape = m.groups()[0]
+    name = m.groups()[1][1:-1]
+    length = int(shape[1:-2] if shape else -1)
+    dtype = dtype_short_repr_to_dtype(m.groups()[2])
+
+    if length == 0:
+        string_values = []
+    else:
+        string_values = [
+            v.strip()
+            for v in re.findall(r"[\s>#]*(?:\t|\s{4,})([^\n]*)\n", m.groups()[-1])
+        ]
+        if string_values == ["[", "]"]:
+            string_values = []
+        elif string_values and string_values[0].lstrip("#> ") == "[":
+            string_values = string_values[1:]
+
+    values = string_values[:length] if length > 0 else string_values
+    values = [(None if v == "null" else v) for v in values if v not in ("…", "...")]
+
+    if not values:
+        return pl.Series(name=name, values=values, dtype=dtype)
+    else:
+        srs = pl.Series(name=name, values=values, dtype=String)
+        if dtype is None:
+            return srs
+        elif dtype in (Categorical, String):
+            return srs.str.replace('^"(.*)"$', r"$1").cast(dtype)
+
+        return _cast_repr_strings_with_schema(
+            srs.to_frame(), schema={srs.name: dtype}
+        ).to_series()
+
+
 def from_dataframe(df: SupportsInterchange, *, allow_copy: bool = True) -> DataFrame:
     """
     Build a Polars DataFrame from any dataframe supporting the interchange protocol.