From f0cf33e1abbc58a982306af0c56cae05bbcd3414 Mon Sep 17 00:00:00 2001 From: petrosbar Date: Mon, 11 Mar 2024 19:35:52 +0200 Subject: [PATCH] fix: Use primitive constructors to create a Series of lists when dtype is defined --- py-polars/polars/_utils/construction.py | 16 +++++++++++++--- py-polars/tests/unit/datatypes/test_list.py | 9 +++++++++ py-polars/tests/unit/io/test_json.py | 2 +- py-polars/tests/unit/series/test_describe.py | 2 +- py-polars/tests/unit/test_serde.py | 2 +- 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/py-polars/polars/_utils/construction.py b/py-polars/polars/_utils/construction.py index 71f37b4f583c..f8091fb64c01 100644 --- a/py-polars/polars/_utils/construction.py +++ b/py-polars/polars/_utils/construction.py @@ -444,7 +444,6 @@ def sequence_to_pyseries( # lists defer to subsequent handling; identify nested type elif dtype == List: - getattr(dtype, "inner", None) python_dtype = list # infer temporal type handling @@ -459,7 +458,7 @@ def sequence_to_pyseries( or is_namedtuple(value.__class__) ) and dtype != Object: return pl.DataFrame(values).to_struct(name)._s - elif isinstance(value, range): + elif isinstance(value, range) and dtype is None: values = [range_to_series("", v) for v in values] else: # for temporal dtypes: @@ -602,7 +601,18 @@ def sequence_to_pyseries( if isinstance(dtype, Object): return PySeries.new_object(name, values, strict) if dtype: - srs = sequence_from_any_value_and_dtype_or_object(name, values, dtype) + if (inner_dtype := getattr(dtype, "inner", None)) is not None: + py_srs = [ + None + if value is None + else sequence_to_pyseries("", value, inner_dtype, strict=strict) + for value in values + ] + srs = PySeries.new_series_list(name, py_srs, strict) + else: + srs = sequence_from_any_value_and_dtype_or_object( + name, values, dtype + ) if dtype != srs.dtype(): srs = srs.cast(dtype, strict=False) return srs diff --git a/py-polars/tests/unit/datatypes/test_list.py b/py-polars/tests/unit/datatypes/test_list.py index 819588d413d5..ba580ea8e6ba 100644 --- a/py-polars/tests/unit/datatypes/test_list.py +++ b/py-polars/tests/unit/datatypes/test_list.py @@ -23,15 +23,20 @@ def test_dtype() -> None: assert a.dtype.is_(pl.List(pl.Int64)) # explicit + u64_max = (2**64) - 1 df = pl.DataFrame( data={ "i": [[1, 2, 3]], + "li": [[[1, 2, 3]]], + "u": [[u64_max]], "tm": [[time(10, 30, 45)]], "dt": [[date(2022, 12, 31)]], "dtm": [[datetime(2022, 12, 31, 1, 2, 3)]], }, schema=[ ("i", pl.List(pl.Int8)), + ("li", pl.List(pl.List(pl.Int8))), + ("u", pl.List(pl.UInt64)), ("tm", pl.List(pl.Time)), ("dt", pl.List(pl.Date)), ("dtm", pl.List(pl.Datetime)), @@ -39,6 +44,8 @@ def test_dtype() -> None: ) assert df.schema == { "i": pl.List(pl.Int8), + "li": pl.List(pl.List(pl.Int8)), + "u": pl.List(pl.UInt64), "tm": pl.List(pl.Time), "dt": pl.List(pl.Date), "dtm": pl.List(pl.Datetime), @@ -48,6 +55,8 @@ def test_dtype() -> None: assert df.rows() == [ ( [1, 2, 3], + [[1, 2, 3]], + [u64_max], [time(10, 30, 45)], [date(2022, 12, 31)], [datetime(2022, 12, 31, 1, 2, 3)], diff --git a/py-polars/tests/unit/io/test_json.py b/py-polars/tests/unit/io/test_json.py index 74e38c3c186e..9acbb061a63c 100644 --- a/py-polars/tests/unit/io/test_json.py +++ b/py-polars/tests/unit/io/test_json.py @@ -297,7 +297,7 @@ def test_write_json_duration() -> None: ([["a", "b"], [None, None]], pl.Array(pl.Utf8, width=2)), ([[True, False, None], [None, None, None]], pl.Array(pl.Utf8, width=3)), ( - [[[1, 2, 3], [4, None]], None, [[None, None, 2]]], + [[[1, 2, 3], [4, None, 5]], None, [[None, None, 2]]], pl.List(pl.Array(pl.Int32(), width=3)), ), ( diff --git a/py-polars/tests/unit/series/test_describe.py b/py-polars/tests/unit/series/test_describe.py index 15ed7bc84c54..cdf180423231 100644 --- a/py-polars/tests/unit/series/test_describe.py +++ b/py-polars/tests/unit/series/test_describe.py @@ -117,7 +117,7 @@ def test_series_describe_null() -> None: def test_series_describe_nested_list() -> None: s = pl.Series( values=[[10e10, 10e15], [10e12, 10e13], [10e10, 10e15]], - dtype=pl.List(pl.Int64), + dtype=pl.List(pl.Float64), ) result = s.describe() stats = { diff --git a/py-polars/tests/unit/test_serde.py b/py-polars/tests/unit/test_serde.py index bd84d56be9a2..8fe6b1131174 100644 --- a/py-polars/tests/unit/test_serde.py +++ b/py-polars/tests/unit/test_serde.py @@ -188,7 +188,7 @@ def test_serde_array_dtype() -> None: assert_series_equal(pickle.loads(pickle.dumps(s)), s) nested_s = pl.Series( - [[[1, 2, 3], [4, None]], None, [[None, None, 2]]], + [[[1, 2, 3], [4, None, 5]], None, [[None, None, 2]]], dtype=pl.List(pl.Array(pl.Int32(), width=3)), ) assert_series_equal(pickle.loads(pickle.dumps(nested_s)), nested_s)