From 3bd92a4876e48fe32cf2121e020c08157cc9f33b Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Tue, 19 Mar 2024 09:23:52 +0100 Subject: [PATCH 01/10] Fix bools --- py-polars/src/series/construction.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/py-polars/src/series/construction.rs b/py-polars/src/series/construction.rs index abe959bcd79d..0c0865e9cec5 100644 --- a/py-polars/src/series/construction.rs +++ b/py-polars/src/series/construction.rs @@ -98,7 +98,7 @@ impl PySeries { #[pymethods] impl PySeries { #[staticmethod] - fn new_opt_bool(name: &str, values: &Bound, strict: bool) -> PyResult { + fn new_opt_bool(name: &str, values: &Bound, _strict: bool) -> PyResult { let len = values.len()?; let mut builder = BooleanChunkedBuilder::new(name, len); @@ -107,15 +107,8 @@ impl PySeries { if value.is_none() { builder.append_null() } else { - match value.extract::() { - Ok(v) => builder.append_value(v), - Err(e) => { - if strict { - return Err(e); - } - builder.append_null() - }, - } + let v = item.extract::()?; + builder.append_value(v) } } let ca = builder.finish(); From c8e5a963e03554347a89880a4cc71118af6a86db Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Tue, 19 Mar 2024 15:35:14 +0100 Subject: [PATCH 02/10] Fix primitives --- py-polars/src/series/construction.rs | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/py-polars/src/series/construction.rs b/py-polars/src/series/construction.rs index 0c0865e9cec5..aa5b96095a1b 100644 --- a/py-polars/src/series/construction.rs +++ b/py-polars/src/series/construction.rs @@ -107,18 +107,18 @@ impl PySeries { if value.is_none() { builder.append_null() } else { - let v = item.extract::()?; + let v = value.extract::()?; builder.append_value(v) } } - let ca = builder.finish(); + let ca = builder.finish(); let s = ca.into_series(); Ok(s.into()) } } -fn new_primitive<'a, T>(name: &str, values: &'a Bound, strict: bool) -> PyResult +fn new_primitive<'a, T>(name: &str, values: &'a Bound, _strict: bool) -> PyResult where T: PolarsNumericType, ChunkedArray: IntoSeries, @@ -132,19 +132,12 @@ where if value.is_none() { builder.append_null() } else { - match value.extract::() { - Ok(v) => builder.append_value(v), - Err(e) => { - if strict { - return Err(e); - } - builder.append_null() - }, - } + let v = value.extract::()?; + builder.append_value(v) } } - let ca = builder.finish(); + let ca = builder.finish(); let s = ca.into_series(); Ok(s.into()) } From e3936528a82ce8dc32987c7916c7604bf5ca5dbe Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Tue, 19 Mar 2024 15:54:28 +0100 Subject: [PATCH 03/10] Update Python fallback logic --- .../polars/_utils/construction/series.py | 45 ++++--------------- 1 file changed, 8 insertions(+), 37 deletions(-) diff --git a/py-polars/polars/_utils/construction/series.py b/py-polars/polars/_utils/construction/series.py index a52e9b61678b..2966e0a34c61 100644 --- a/py-polars/polars/_utils/construction/series.py +++ b/py-polars/polars/_utils/construction/series.py @@ -2,7 +2,6 @@ import contextlib from datetime import date, datetime, time, timedelta -from decimal import Decimal as PyDecimal from itertools import islice from typing import ( TYPE_CHECKING, @@ -27,8 +26,6 @@ ) from polars._utils.wrap import wrap_s from polars.datatypes import ( - INTEGER_DTYPES, - TEMPORAL_DTYPES, Array, Boolean, Categorical, @@ -293,44 +290,18 @@ def _construct_series_with_fallbacks( constructor: Callable[[str, Sequence[Any], bool], PySeries], name: str, values: Sequence[Any], - target_dtype: PolarsDataType | None, + dtype: PolarsDataType | None, *, strict: bool, ) -> PySeries: """Construct Series, with fallbacks for basic type mismatch (eg: bool/int).""" - while True: - try: - return constructor(name, values, strict) - except TypeError as exc: - str_exc = str(exc) - - # from x to float - # error message can be: - # - integers: "'float' object cannot be interpreted as an integer" - if "'float'" in str_exc and ( - # we do not accept float values as int/temporal, as it causes silent - # information loss; the caller should explicitly cast in this case. - target_dtype not in (INTEGER_DTYPES | TEMPORAL_DTYPES) - ): - constructor = py_type_to_constructor(float) - - # from x to string - # error message can be: - # - integers: "'str' object cannot be interpreted as an integer" - # - floats: "must be real number, not str" - elif "'str'" in str_exc or str_exc == "must be real number, not str": - constructor = py_type_to_constructor(str) - - # from x to int - # error message can be: - # - bools: "'int' object cannot be converted to 'PyBool'" - elif str_exc == "'int' object cannot be converted to 'PyBool'": - constructor = py_type_to_constructor(int) - - elif "decimal.Decimal" in str_exc: - constructor = py_type_to_constructor(PyDecimal) - else: - raise + try: + return constructor(name, values, strict) + except TypeError: + if dtype is None: + return PySeries.new_from_any_values(name, values, strict) + else: + return PySeries.new_from_any_values_and_dtype(name, values, dtype, strict) def iterable_to_pyseries( From e3b474dea2dc7772fb26bde4e41ca17897d45a09 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Tue, 19 Mar 2024 15:55:43 +0100 Subject: [PATCH 04/10] Fix tests --- py-polars/tests/unit/constructors/test_constructors.py | 2 +- py-polars/tests/unit/constructors/test_dataframe.py | 5 +---- py-polars/tests/unit/constructors/test_series.py | 5 ++++- py-polars/tests/unit/test_errors.py | 4 +--- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index 47398b04e1eb..0d27c5499240 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -1113,7 +1113,7 @@ def test_from_dicts_list_struct_without_inner_dtype_5611() -> None: def test_from_dict_upcast_primitive() -> None: - df = pl.from_dict({"a": [1, 2.1, 3], "b": [4, 5, 6.4]}) + df = pl.from_dict({"a": [1, 2.1, 3], "b": [4, 5, 6.4]}, strict=False) assert df.dtypes == [pl.Float64, pl.Float64] diff --git a/py-polars/tests/unit/constructors/test_dataframe.py b/py-polars/tests/unit/constructors/test_dataframe.py index ee5f5a494f27..3248b7c3ebe1 100644 --- a/py-polars/tests/unit/constructors/test_dataframe.py +++ b/py-polars/tests/unit/constructors/test_dataframe.py @@ -105,10 +105,7 @@ def test_df_init_strict() -> None: df = pl.DataFrame(data, schema=schema, strict=False) - # TODO: This should result in a Float Series without nulls - # https://github.com/pola-rs/polars/issues/14427 - assert df["a"].to_list() == [1, 2, None] - + assert df["a"].to_list() == [1, 2, 3] assert df["a"].dtype == pl.Int8 diff --git a/py-polars/tests/unit/constructors/test_series.py b/py-polars/tests/unit/constructors/test_series.py index fd6dc683bda8..4dcbad20f536 100644 --- a/py-polars/tests/unit/constructors/test_series.py +++ b/py-polars/tests/unit/constructors/test_series.py @@ -73,7 +73,10 @@ def test_sequence_of_series_with_dtype(dtype: pl.PolarsDataType | None) -> None: def test_upcast_primitive_and_strings( values: list[Any], dtype: pl.PolarsDataType, expected_dtype: pl.PolarsDataType ) -> None: - assert pl.Series(values, dtype=dtype).dtype == expected_dtype + with pytest.raises(TypeError): + pl.Series(values, dtype=dtype, strict=True) + + assert pl.Series(values, dtype=dtype, strict=False).dtype == expected_dtype def test_preserve_decimal_precision() -> None: diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py index 06134a16939c..d958a659cb08 100644 --- a/py-polars/tests/unit/test_errors.py +++ b/py-polars/tests/unit/test_errors.py @@ -78,9 +78,7 @@ def test_error_on_invalid_series_init() -> None: ): pl.Series([1.5, 2.0, 3.75], dtype=dtype) - with pytest.raises( - TypeError, match="'float' object cannot be interpreted as an integer" - ): + with pytest.raises(TypeError, match="unexpected value"): pl.Series([1.5, 2.0, 3.75], dtype=pl.Int32) From bc57bd5ceb3cb3f8fb334a70210107ca2cca5e21 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 21 Mar 2024 12:08:12 +0100 Subject: [PATCH 05/10] Fix str/binary --- py-polars/src/series/construction.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/py-polars/src/series/construction.rs b/py-polars/src/series/construction.rs index aa5b96095a1b..4ef54674b13a 100644 --- a/py-polars/src/series/construction.rs +++ b/py-polars/src/series/construction.rs @@ -229,9 +229,11 @@ impl PySeries { for res in values.iter()? { let value = res?; - match value.extract::>() { - Ok(v) => builder.append_value(v), - Err(_) => builder.append_null(), + if value.is_none() { + builder.append_null() + } else { + let v = value.extract::>()?; + builder.append_value(v) } } @@ -247,9 +249,11 @@ impl PySeries { for res in values.iter()? { let value = res?; - match value.extract::<&[u8]>() { - Ok(v) => builder.append_value(v), - Err(_) => builder.append_null(), + if value.is_none() { + builder.append_null() + } else { + let v = value.extract::<&[u8]>()?; + builder.append_value(v) } } From 18aa623be89d3f7115bc2381b000d1d4f42bc886 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 13 Jun 2024 16:54:08 +0200 Subject: [PATCH 06/10] keyword args --- py-polars/polars/_utils/construction/series.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/py-polars/polars/_utils/construction/series.py b/py-polars/polars/_utils/construction/series.py index 2966e0a34c61..660147574465 100644 --- a/py-polars/polars/_utils/construction/series.py +++ b/py-polars/polars/_utils/construction/series.py @@ -299,9 +299,11 @@ def _construct_series_with_fallbacks( return constructor(name, values, strict) except TypeError: if dtype is None: - return PySeries.new_from_any_values(name, values, strict) + return PySeries.new_from_any_values(name, values, strict=strict) else: - return PySeries.new_from_any_values_and_dtype(name, values, dtype, strict) + return PySeries.new_from_any_values_and_dtype( + name, values, dtype, strict=strict + ) def iterable_to_pyseries( From bc85b2a7263d5161ec46061e85ccf4c895fc7584 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 13 Jun 2024 17:18:26 +0200 Subject: [PATCH 07/10] Fix tests --- py-polars/polars/dataframe/frame.py | 2 +- py-polars/polars/expr/expr.py | 8 ++++---- py-polars/polars/series/series.py | 6 +++--- py-polars/tests/unit/dataframe/test_getitem.py | 2 +- py-polars/tests/unit/dataframe/test_serde.py | 2 +- .../operations/aggregation/test_aggregations.py | 4 ++-- .../operations/aggregation/test_horizontal.py | 2 +- py-polars/tests/unit/operations/test_ewm.py | 6 +++--- py-polars/tests/unit/series/test_getitem.py | 2 +- py-polars/tests/unit/test_convert.py | 15 +++++++++++---- 10 files changed, 28 insertions(+), 21 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index dde9f1b2b0c1..f9e02db248ce 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -9487,7 +9487,7 @@ def fold(self, operation: Callable[[Series, Series], Series]) -> Series: >>> df = pl.DataFrame( ... { - ... "a": ["foo", "bar", 2], + ... "a": ["foo", "bar", None], ... "b": [1, 2, 3], ... "c": [1.0, 2.0, 3.0], ... } diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 3fbc3ae01a8b..f5fee387faa7 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -2964,7 +2964,7 @@ def max(self) -> Self: Examples -------- - >>> df = pl.DataFrame({"a": [-1, float("nan"), 1]}) + >>> df = pl.DataFrame({"a": [-1.0, float("nan"), 1.0]}) >>> df.select(pl.col("a").max()) shape: (1, 1) ┌─────┐ @@ -2983,7 +2983,7 @@ def min(self) -> Self: Examples -------- - >>> df = pl.DataFrame({"a": [-1, float("nan"), 1]}) + >>> df = pl.DataFrame({"a": [-1.0, float("nan"), 1.0]}) >>> df.select(pl.col("a").min()) shape: (1, 1) ┌──────┐ @@ -3005,7 +3005,7 @@ def nan_max(self) -> Self: Examples -------- - >>> df = pl.DataFrame({"a": [0, float("nan")]}) + >>> df = pl.DataFrame({"a": [0.0, float("nan")]}) >>> df.select(pl.col("a").nan_max()) shape: (1, 1) ┌─────┐ @@ -3027,7 +3027,7 @@ def nan_min(self) -> Self: Examples -------- - >>> df = pl.DataFrame({"a": [0, float("nan")]}) + >>> df = pl.DataFrame({"a": [0.0, float("nan")]}) >>> df.select(pl.col("a").nan_min()) shape: (1, 1) ┌─────┐ diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 465a81d34064..bd6757b8445d 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -1958,7 +1958,7 @@ def nan_max(self) -> int | float | date | datetime | timedelta | str: >>> s.nan_max() 4 - >>> s = pl.Series("a", [1, float("nan"), 4]) + >>> s = pl.Series("a", [1.0, float("nan"), 4.0]) >>> s.nan_max() nan """ @@ -1977,7 +1977,7 @@ def nan_min(self) -> int | float | date | datetime | timedelta | str: >>> s.nan_min() 1 - >>> s = pl.Series("a", [1, float("nan"), 4]) + >>> s = pl.Series("a", [1.0, float("nan"), 4.0]) >>> s.nan_min() nan """ @@ -4730,7 +4730,7 @@ def fill_nan(self, value: int | float | Expr | None) -> Series: Examples -------- - >>> s = pl.Series("a", [1, 2, 3, float("nan")]) + >>> s = pl.Series("a", [1.0, 2.0, 3.0, float("nan")]) >>> s.fill_nan(0) shape: (4,) Series: 'a' [f64] diff --git a/py-polars/tests/unit/dataframe/test_getitem.py b/py-polars/tests/unit/dataframe/test_getitem.py index 0583526fce7a..5d112ad67528 100644 --- a/py-polars/tests/unit/dataframe/test_getitem.py +++ b/py-polars/tests/unit/dataframe/test_getitem.py @@ -205,7 +205,7 @@ def test_df_getitem_col_mixed_inputs(input: list[Any], match: str) -> None: @pytest.mark.parametrize( ("input", "match"), [ - ([0.0, 1.0], "'float' object cannot be interpreted as an integer"), + ([0.0, 1.0], "unexpected value while building Series of type Int64"), ( pl.Series([[1, 2], [3, 4]]), "cannot treat Series of type List\\(Int64\\) as indices", diff --git a/py-polars/tests/unit/dataframe/test_serde.py b/py-polars/tests/unit/dataframe/test_serde.py index ab627a84fb32..609f7d3fb113 100644 --- a/py-polars/tests/unit/dataframe/test_serde.py +++ b/py-polars/tests/unit/dataframe/test_serde.py @@ -78,7 +78,7 @@ def test_df_serde_enum() -> None: [ ([[1, 2, 3], [None, None, None], [1, None, 3]], pl.Array(pl.Int32(), shape=3)), ([["a", "b"], [None, None]], pl.Array(pl.Utf8, shape=2)), - ([[True, False, None], [None, None, None]], pl.Array(pl.Utf8, shape=3)), + ([[True, False, None], [None, None, None]], pl.Array(pl.Boolean, shape=3)), ( [[[1, 2, 3], [4, None, 5]], None, [[None, None, 2]]], pl.List(pl.Array(pl.Int32(), shape=3)), diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index e4f341b18566..1566e225732e 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -496,12 +496,12 @@ def test_horizontal_mean_single_column( out_dtype: PolarsDataType, ) -> None: out = ( - pl.LazyFrame({"a": pl.Series([1, 0], dtype=in_dtype)}) + pl.LazyFrame({"a": pl.Series([1, 0]).cast(in_dtype)}) .select(pl.mean_horizontal(pl.all())) .collect() ) - assert_frame_equal(out, pl.DataFrame({"a": pl.Series([1.0, 0.0], dtype=out_dtype)})) + assert_frame_equal(out, pl.DataFrame({"a": pl.Series([1.0, 0.0]).cast(out_dtype)})) def test_horizontal_mean_in_group_by_15115() -> None: diff --git a/py-polars/tests/unit/operations/aggregation/test_horizontal.py b/py-polars/tests/unit/operations/aggregation/test_horizontal.py index d840094571a8..c6c6631ab60c 100644 --- a/py-polars/tests/unit/operations/aggregation/test_horizontal.py +++ b/py-polars/tests/unit/operations/aggregation/test_horizontal.py @@ -430,7 +430,7 @@ def test_schema_mean_horizontal_single_column( in_dtype: pl.PolarsDataType, out_dtype: pl.PolarsDataType, ) -> None: - lf = pl.LazyFrame({"a": pl.Series([1, 0], dtype=in_dtype)}).select( + lf = pl.LazyFrame({"a": pl.Series([1, 0]).cast(in_dtype)}).select( pl.mean_horizontal(pl.all()) ) diff --git a/py-polars/tests/unit/operations/test_ewm.py b/py-polars/tests/unit/operations/test_ewm.py index 05b7a07ca09c..e643fdf30d3d 100644 --- a/py-polars/tests/unit/operations/test_ewm.py +++ b/py-polars/tests/unit/operations/test_ewm.py @@ -153,7 +153,7 @@ def test_ewm_std_var() -> None: var = series.ewm_var(alpha=0.5, ignore_nulls=False) std = series.ewm_std(alpha=0.5, ignore_nulls=False) - expected = pl.Series("a", [0, 4.5, 1.9285714285714288]) + expected = pl.Series("a", [0.0, 4.5, 1.9285714285714288]) assert np.allclose(var, std**2, rtol=1e-16) assert_series_equal(var, expected) @@ -163,13 +163,13 @@ def test_ewm_std_var_with_nulls() -> None: var = series.ewm_var(alpha=0.5, ignore_nulls=True) std = series.ewm_std(alpha=0.5, ignore_nulls=True) - expected = pl.Series("a", [0, 4.5, None, 1.9285714285714288]) + expected = pl.Series("a", [0.0, 4.5, None, 1.9285714285714288]) assert_series_equal(var, expected) assert_series_equal(std**2, expected) var = series.ewm_var(alpha=0.5, ignore_nulls=False) std = series.ewm_std(alpha=0.5, ignore_nulls=False) - expected = pl.Series("a", [0, 4.5, None, 1.7307692307692308]) + expected = pl.Series("a", [0.0, 4.5, None, 1.7307692307692308]) assert_series_equal(var, expected) assert_series_equal(std**2, expected) diff --git a/py-polars/tests/unit/series/test_getitem.py b/py-polars/tests/unit/series/test_getitem.py index 3f106de3034f..50dee3e0cf02 100644 --- a/py-polars/tests/unit/series/test_getitem.py +++ b/py-polars/tests/unit/series/test_getitem.py @@ -88,7 +88,7 @@ def test_series_getitem_multiple_indices(indices: Any) -> None: @pytest.mark.parametrize( ("input", "match"), [ - ([0.0, 1.0], "'float' object cannot be interpreted as an integer"), + ([0.0, 1.0], "unexpected value while building Series of type Int64"), ( pl.Series([[1, 2], [3, 4]]), "cannot treat Series of type List\\(Int64\\) as indices", diff --git a/py-polars/tests/unit/test_convert.py b/py-polars/tests/unit/test_convert.py index e74bd6f13024..74ff178dd9ea 100644 --- a/py-polars/tests/unit/test_convert.py +++ b/py-polars/tests/unit/test_convert.py @@ -5,15 +5,22 @@ import polars as pl -def test_schema_inference_from_rows() -> None: - # these have to upcast to float - result = pl.from_records([[1, 2.1, 3], [4, 5, 6.4]]) +def test_from_records_schema_inference() -> None: + data = [[1, 2.1, 3], [4, 5, 6.4]] + + with pytest.raises(TypeError, match="unexpected value"): + pl.from_records(data) + + result = pl.from_records(data, strict=False) assert result.to_dict(as_series=False) == { "column_0": [1.0, 2.1, 3.0], "column_1": [4.0, 5.0, 6.4], } - result = pl.from_dicts([{"a": 1, "b": 2}, {"a": 3.1, "b": 4.5}]) + +def test_from_dicts_schema_inference() -> None: + data = [{"a": 1, "b": 2}, {"a": 3.1, "b": 4.5}] + result = pl.from_dicts(data) # type: ignore[arg-type] assert result.to_dict(as_series=False) == { "a": [1.0, 3.1], "b": [2.0, 4.5], From 27c0fa59b4a541ac138c22d39b032fff4f49da59 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 13 Jun 2024 23:36:21 +0200 Subject: [PATCH 08/10] Update more tests --- docs/src/python/user-guide/getting-started/expressions.py | 2 +- docs/src/python/user-guide/getting-started/joins.py | 2 +- py-polars/polars/io/spreadsheet/functions.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/python/user-guide/getting-started/expressions.py b/docs/src/python/user-guide/getting-started/expressions.py index 12c6ea2170ec..dd27738f33ef 100644 --- a/docs/src/python/user-guide/getting-started/expressions.py +++ b/docs/src/python/user-guide/getting-started/expressions.py @@ -15,7 +15,7 @@ datetime(2025, 12, 4), datetime(2025, 12, 5), ], - "d": [1, 2.0, float("nan"), -42, None], + "d": [1.0, 2.0, float("nan"), -42.0, None], } ) # --8<-- [end:setup] diff --git a/docs/src/python/user-guide/getting-started/joins.py b/docs/src/python/user-guide/getting-started/joins.py index 5db0820843c7..fd7dcc19eb4a 100644 --- a/docs/src/python/user-guide/getting-started/joins.py +++ b/docs/src/python/user-guide/getting-started/joins.py @@ -9,7 +9,7 @@ { "a": range(8), "b": np.random.rand(8), - "d": [1, 2.0, float("nan"), float("nan"), 0, -5, -42, None], + "d": [1.0, 2.0, float("nan"), float("nan"), 0.0, -5.0, -42.0, None], } ) diff --git a/py-polars/polars/io/spreadsheet/functions.py b/py-polars/polars/io/spreadsheet/functions.py index 2d1864f06f02..1cee8fc50386 100644 --- a/py-polars/polars/io/spreadsheet/functions.py +++ b/py-polars/polars/io/spreadsheet/functions.py @@ -777,7 +777,7 @@ def _read_spreadsheet_openpyxl( # the non-strings will become null, so we handle the cast here values = [str(v) if (v is not None) else v for v in values] - s = pl.Series(name, values, dtype=dtype) + s = pl.Series(name, values, dtype=dtype, strict=False) series_data.append(s) df = pl.DataFrame( From 9da204ab2a1fdaf2e556aaff56f8d9dddf20e1f0 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 14 Jun 2024 17:03:37 +0200 Subject: [PATCH 09/10] Re-enable test case --- py-polars/tests/unit/datatypes/test_decimal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py index 6a1f549216a5..ee1005c1b0f6 100644 --- a/py-polars/tests/unit/datatypes/test_decimal.py +++ b/py-polars/tests/unit/datatypes/test_decimal.py @@ -22,7 +22,7 @@ def permutations_int_dec_none() -> list[tuple[D | int | None, ...]]: D("-0.01"), D("1.2345678"), D("500"), - # -1, # TODO: Address in https://github.com/pola-rs/polars/issues/14427 + -1, None, ] ) From 8679207f72266c7279d088a9a33e39019e5412b0 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Sat, 15 Jun 2024 00:31:30 +0200 Subject: [PATCH 10/10] Add Decimal workaround --- crates/polars-core/src/datatypes/dtype.rs | 7 ++++--- crates/polars-core/src/series/any_value.rs | 9 +++++++++ py-polars/src/series/construction.rs | 4 +--- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 1e0a13c57d59..0144af1537dd 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -53,9 +53,9 @@ pub enum DataType { Int64, Float32, Float64, - #[cfg(feature = "dtype-decimal")] /// Fixed point decimal type optional precision and non-negative scale. /// This is backed by a signed 128-bit integer which allows for up to 38 significant digits. + #[cfg(feature = "dtype-decimal")] Decimal(Option, Option), // precision/scale; scale being None means "infer" /// String data String, @@ -76,14 +76,14 @@ pub enum DataType { Array(Box, usize), /// A nested list with a variable size in each row List(Box), - #[cfg(feature = "object")] /// A generic type that can be used in a `Series` /// &'static str can be used to determine/set inner type + #[cfg(feature = "object")] Object(&'static str, Option>), Null, - #[cfg(feature = "dtype-categorical")] // The RevMapping has the internal state. // This is ignored with comparisons, hashing etc. + #[cfg(feature = "dtype-categorical")] Categorical(Option>, CategoricalOrdering), #[cfg(feature = "dtype-categorical")] Enum(Option>, CategoricalOrdering), @@ -140,6 +140,7 @@ impl PartialEq for DataType { (UnknownKind::Int(_), UnknownKind::Int(_)) => true, _ => l == r, }, + // TODO: Add Decimal equality _ => std::mem::discriminant(self) == std::mem::discriminant(other), } } diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index aa935786cb4a..72b4c10f79ae 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -60,8 +60,17 @@ impl Series { let dtype = if strict { get_first_non_null_dtype(values) } else { + // Currently does not work correctly for Decimal because equality is not implemented. any_values_to_supertype(values)? }; + + // TODO: Remove this when Decimal data type equality is implemented. + #[cfg(feature = "dtype-decimal")] + if !strict && dtype.is_decimal() { + let dtype = DataType::Decimal(None, None); + return Self::from_any_values_and_dtype(name, values, &dtype, strict); + } + Self::from_any_values_and_dtype(name, values, &dtype, strict) } diff --git a/py-polars/src/series/construction.rs b/py-polars/src/series/construction.rs index 4ef54674b13a..a2dba38927e4 100644 --- a/py-polars/src/series/construction.rs +++ b/py-polars/src/series/construction.rs @@ -264,9 +264,7 @@ impl PySeries { #[staticmethod] fn new_decimal(name: &str, values: &Bound, strict: bool) -> PyResult { - // Create a fake dtype with a placeholder "none" scale, to be inferred later. - let dtype = DataType::Decimal(None, None); - Self::new_from_any_values_and_dtype(name, values, Wrap(dtype), strict) + Self::new_from_any_values(name, values, strict) } #[staticmethod]