diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 1e0a13c57d59..0144af1537dd 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -53,9 +53,9 @@ pub enum DataType { Int64, Float32, Float64, - #[cfg(feature = "dtype-decimal")] /// Fixed point decimal type optional precision and non-negative scale. /// This is backed by a signed 128-bit integer which allows for up to 38 significant digits. + #[cfg(feature = "dtype-decimal")] Decimal(Option, Option), // precision/scale; scale being None means "infer" /// String data String, @@ -76,14 +76,14 @@ pub enum DataType { Array(Box, usize), /// A nested list with a variable size in each row List(Box), - #[cfg(feature = "object")] /// A generic type that can be used in a `Series` /// &'static str can be used to determine/set inner type + #[cfg(feature = "object")] Object(&'static str, Option>), Null, - #[cfg(feature = "dtype-categorical")] // The RevMapping has the internal state. // This is ignored with comparisons, hashing etc. + #[cfg(feature = "dtype-categorical")] Categorical(Option>, CategoricalOrdering), #[cfg(feature = "dtype-categorical")] Enum(Option>, CategoricalOrdering), @@ -140,6 +140,7 @@ impl PartialEq for DataType { (UnknownKind::Int(_), UnknownKind::Int(_)) => true, _ => l == r, }, + // TODO: Add Decimal equality _ => std::mem::discriminant(self) == std::mem::discriminant(other), } } diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index aa935786cb4a..72b4c10f79ae 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -60,8 +60,17 @@ impl Series { let dtype = if strict { get_first_non_null_dtype(values) } else { + // Currently does not work correctly for Decimal because equality is not implemented. any_values_to_supertype(values)? }; + + // TODO: Remove this when Decimal data type equality is implemented. + #[cfg(feature = "dtype-decimal")] + if !strict && dtype.is_decimal() { + let dtype = DataType::Decimal(None, None); + return Self::from_any_values_and_dtype(name, values, &dtype, strict); + } + Self::from_any_values_and_dtype(name, values, &dtype, strict) } diff --git a/docs/src/python/user-guide/getting-started/expressions.py b/docs/src/python/user-guide/getting-started/expressions.py index 12c6ea2170ec..dd27738f33ef 100644 --- a/docs/src/python/user-guide/getting-started/expressions.py +++ b/docs/src/python/user-guide/getting-started/expressions.py @@ -15,7 +15,7 @@ datetime(2025, 12, 4), datetime(2025, 12, 5), ], - "d": [1, 2.0, float("nan"), -42, None], + "d": [1.0, 2.0, float("nan"), -42.0, None], } ) # --8<-- [end:setup] diff --git a/docs/src/python/user-guide/getting-started/joins.py b/docs/src/python/user-guide/getting-started/joins.py index 5db0820843c7..fd7dcc19eb4a 100644 --- a/docs/src/python/user-guide/getting-started/joins.py +++ b/docs/src/python/user-guide/getting-started/joins.py @@ -9,7 +9,7 @@ { "a": range(8), "b": np.random.rand(8), - "d": [1, 2.0, float("nan"), float("nan"), 0, -5, -42, None], + "d": [1.0, 2.0, float("nan"), float("nan"), 0.0, -5.0, -42.0, None], } ) diff --git a/py-polars/polars/_utils/construction/series.py b/py-polars/polars/_utils/construction/series.py index a52e9b61678b..660147574465 100644 --- a/py-polars/polars/_utils/construction/series.py +++ b/py-polars/polars/_utils/construction/series.py @@ -2,7 +2,6 @@ import contextlib from datetime import date, datetime, time, timedelta -from decimal import Decimal as PyDecimal from itertools import islice from typing import ( TYPE_CHECKING, @@ -27,8 +26,6 @@ ) from polars._utils.wrap import wrap_s from polars.datatypes import ( - INTEGER_DTYPES, - TEMPORAL_DTYPES, Array, Boolean, Categorical, @@ -293,44 +290,20 @@ def _construct_series_with_fallbacks( constructor: Callable[[str, Sequence[Any], bool], PySeries], name: str, values: Sequence[Any], - target_dtype: PolarsDataType | None, + dtype: PolarsDataType | None, *, strict: bool, ) -> PySeries: """Construct Series, with fallbacks for basic type mismatch (eg: bool/int).""" - while True: - try: - return constructor(name, values, strict) - except TypeError as exc: - str_exc = str(exc) - - # from x to float - # error message can be: - # - integers: "'float' object cannot be interpreted as an integer" - if "'float'" in str_exc and ( - # we do not accept float values as int/temporal, as it causes silent - # information loss; the caller should explicitly cast in this case. - target_dtype not in (INTEGER_DTYPES | TEMPORAL_DTYPES) - ): - constructor = py_type_to_constructor(float) - - # from x to string - # error message can be: - # - integers: "'str' object cannot be interpreted as an integer" - # - floats: "must be real number, not str" - elif "'str'" in str_exc or str_exc == "must be real number, not str": - constructor = py_type_to_constructor(str) - - # from x to int - # error message can be: - # - bools: "'int' object cannot be converted to 'PyBool'" - elif str_exc == "'int' object cannot be converted to 'PyBool'": - constructor = py_type_to_constructor(int) - - elif "decimal.Decimal" in str_exc: - constructor = py_type_to_constructor(PyDecimal) - else: - raise + try: + return constructor(name, values, strict) + except TypeError: + if dtype is None: + return PySeries.new_from_any_values(name, values, strict=strict) + else: + return PySeries.new_from_any_values_and_dtype( + name, values, dtype, strict=strict + ) def iterable_to_pyseries( diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index dde9f1b2b0c1..f9e02db248ce 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -9487,7 +9487,7 @@ def fold(self, operation: Callable[[Series, Series], Series]) -> Series: >>> df = pl.DataFrame( ... { - ... "a": ["foo", "bar", 2], + ... "a": ["foo", "bar", None], ... "b": [1, 2, 3], ... "c": [1.0, 2.0, 3.0], ... } diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 3fbc3ae01a8b..f5fee387faa7 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -2964,7 +2964,7 @@ def max(self) -> Self: Examples -------- - >>> df = pl.DataFrame({"a": [-1, float("nan"), 1]}) + >>> df = pl.DataFrame({"a": [-1.0, float("nan"), 1.0]}) >>> df.select(pl.col("a").max()) shape: (1, 1) ┌─────┐ @@ -2983,7 +2983,7 @@ def min(self) -> Self: Examples -------- - >>> df = pl.DataFrame({"a": [-1, float("nan"), 1]}) + >>> df = pl.DataFrame({"a": [-1.0, float("nan"), 1.0]}) >>> df.select(pl.col("a").min()) shape: (1, 1) ┌──────┐ @@ -3005,7 +3005,7 @@ def nan_max(self) -> Self: Examples -------- - >>> df = pl.DataFrame({"a": [0, float("nan")]}) + >>> df = pl.DataFrame({"a": [0.0, float("nan")]}) >>> df.select(pl.col("a").nan_max()) shape: (1, 1) ┌─────┐ @@ -3027,7 +3027,7 @@ def nan_min(self) -> Self: Examples -------- - >>> df = pl.DataFrame({"a": [0, float("nan")]}) + >>> df = pl.DataFrame({"a": [0.0, float("nan")]}) >>> df.select(pl.col("a").nan_min()) shape: (1, 1) ┌─────┐ diff --git a/py-polars/polars/io/spreadsheet/functions.py b/py-polars/polars/io/spreadsheet/functions.py index 2d1864f06f02..1cee8fc50386 100644 --- a/py-polars/polars/io/spreadsheet/functions.py +++ b/py-polars/polars/io/spreadsheet/functions.py @@ -777,7 +777,7 @@ def _read_spreadsheet_openpyxl( # the non-strings will become null, so we handle the cast here values = [str(v) if (v is not None) else v for v in values] - s = pl.Series(name, values, dtype=dtype) + s = pl.Series(name, values, dtype=dtype, strict=False) series_data.append(s) df = pl.DataFrame( diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 465a81d34064..bd6757b8445d 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -1958,7 +1958,7 @@ def nan_max(self) -> int | float | date | datetime | timedelta | str: >>> s.nan_max() 4 - >>> s = pl.Series("a", [1, float("nan"), 4]) + >>> s = pl.Series("a", [1.0, float("nan"), 4.0]) >>> s.nan_max() nan """ @@ -1977,7 +1977,7 @@ def nan_min(self) -> int | float | date | datetime | timedelta | str: >>> s.nan_min() 1 - >>> s = pl.Series("a", [1, float("nan"), 4]) + >>> s = pl.Series("a", [1.0, float("nan"), 4.0]) >>> s.nan_min() nan """ @@ -4730,7 +4730,7 @@ def fill_nan(self, value: int | float | Expr | None) -> Series: Examples -------- - >>> s = pl.Series("a", [1, 2, 3, float("nan")]) + >>> s = pl.Series("a", [1.0, 2.0, 3.0, float("nan")]) >>> s.fill_nan(0) shape: (4,) Series: 'a' [f64] diff --git a/py-polars/src/series/construction.rs b/py-polars/src/series/construction.rs index abe959bcd79d..a2dba38927e4 100644 --- a/py-polars/src/series/construction.rs +++ b/py-polars/src/series/construction.rs @@ -98,7 +98,7 @@ impl PySeries { #[pymethods] impl PySeries { #[staticmethod] - fn new_opt_bool(name: &str, values: &Bound, strict: bool) -> PyResult { + fn new_opt_bool(name: &str, values: &Bound, _strict: bool) -> PyResult { let len = values.len()?; let mut builder = BooleanChunkedBuilder::new(name, len); @@ -107,25 +107,18 @@ impl PySeries { if value.is_none() { builder.append_null() } else { - match value.extract::() { - Ok(v) => builder.append_value(v), - Err(e) => { - if strict { - return Err(e); - } - builder.append_null() - }, - } + let v = value.extract::()?; + builder.append_value(v) } } - let ca = builder.finish(); + let ca = builder.finish(); let s = ca.into_series(); Ok(s.into()) } } -fn new_primitive<'a, T>(name: &str, values: &'a Bound, strict: bool) -> PyResult +fn new_primitive<'a, T>(name: &str, values: &'a Bound, _strict: bool) -> PyResult where T: PolarsNumericType, ChunkedArray: IntoSeries, @@ -139,19 +132,12 @@ where if value.is_none() { builder.append_null() } else { - match value.extract::() { - Ok(v) => builder.append_value(v), - Err(e) => { - if strict { - return Err(e); - } - builder.append_null() - }, - } + let v = value.extract::()?; + builder.append_value(v) } } - let ca = builder.finish(); + let ca = builder.finish(); let s = ca.into_series(); Ok(s.into()) } @@ -243,9 +229,11 @@ impl PySeries { for res in values.iter()? { let value = res?; - match value.extract::>() { - Ok(v) => builder.append_value(v), - Err(_) => builder.append_null(), + if value.is_none() { + builder.append_null() + } else { + let v = value.extract::>()?; + builder.append_value(v) } } @@ -261,9 +249,11 @@ impl PySeries { for res in values.iter()? { let value = res?; - match value.extract::<&[u8]>() { - Ok(v) => builder.append_value(v), - Err(_) => builder.append_null(), + if value.is_none() { + builder.append_null() + } else { + let v = value.extract::<&[u8]>()?; + builder.append_value(v) } } @@ -274,9 +264,7 @@ impl PySeries { #[staticmethod] fn new_decimal(name: &str, values: &Bound, strict: bool) -> PyResult { - // Create a fake dtype with a placeholder "none" scale, to be inferred later. - let dtype = DataType::Decimal(None, None); - Self::new_from_any_values_and_dtype(name, values, Wrap(dtype), strict) + Self::new_from_any_values(name, values, strict) } #[staticmethod] diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index 47398b04e1eb..0d27c5499240 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -1113,7 +1113,7 @@ def test_from_dicts_list_struct_without_inner_dtype_5611() -> None: def test_from_dict_upcast_primitive() -> None: - df = pl.from_dict({"a": [1, 2.1, 3], "b": [4, 5, 6.4]}) + df = pl.from_dict({"a": [1, 2.1, 3], "b": [4, 5, 6.4]}, strict=False) assert df.dtypes == [pl.Float64, pl.Float64] diff --git a/py-polars/tests/unit/constructors/test_dataframe.py b/py-polars/tests/unit/constructors/test_dataframe.py index ee5f5a494f27..3248b7c3ebe1 100644 --- a/py-polars/tests/unit/constructors/test_dataframe.py +++ b/py-polars/tests/unit/constructors/test_dataframe.py @@ -105,10 +105,7 @@ def test_df_init_strict() -> None: df = pl.DataFrame(data, schema=schema, strict=False) - # TODO: This should result in a Float Series without nulls - # https://github.com/pola-rs/polars/issues/14427 - assert df["a"].to_list() == [1, 2, None] - + assert df["a"].to_list() == [1, 2, 3] assert df["a"].dtype == pl.Int8 diff --git a/py-polars/tests/unit/constructors/test_series.py b/py-polars/tests/unit/constructors/test_series.py index fd6dc683bda8..4dcbad20f536 100644 --- a/py-polars/tests/unit/constructors/test_series.py +++ b/py-polars/tests/unit/constructors/test_series.py @@ -73,7 +73,10 @@ def test_sequence_of_series_with_dtype(dtype: pl.PolarsDataType | None) -> None: def test_upcast_primitive_and_strings( values: list[Any], dtype: pl.PolarsDataType, expected_dtype: pl.PolarsDataType ) -> None: - assert pl.Series(values, dtype=dtype).dtype == expected_dtype + with pytest.raises(TypeError): + pl.Series(values, dtype=dtype, strict=True) + + assert pl.Series(values, dtype=dtype, strict=False).dtype == expected_dtype def test_preserve_decimal_precision() -> None: diff --git a/py-polars/tests/unit/dataframe/test_getitem.py b/py-polars/tests/unit/dataframe/test_getitem.py index 0583526fce7a..5d112ad67528 100644 --- a/py-polars/tests/unit/dataframe/test_getitem.py +++ b/py-polars/tests/unit/dataframe/test_getitem.py @@ -205,7 +205,7 @@ def test_df_getitem_col_mixed_inputs(input: list[Any], match: str) -> None: @pytest.mark.parametrize( ("input", "match"), [ - ([0.0, 1.0], "'float' object cannot be interpreted as an integer"), + ([0.0, 1.0], "unexpected value while building Series of type Int64"), ( pl.Series([[1, 2], [3, 4]]), "cannot treat Series of type List\\(Int64\\) as indices", diff --git a/py-polars/tests/unit/dataframe/test_serde.py b/py-polars/tests/unit/dataframe/test_serde.py index ab627a84fb32..609f7d3fb113 100644 --- a/py-polars/tests/unit/dataframe/test_serde.py +++ b/py-polars/tests/unit/dataframe/test_serde.py @@ -78,7 +78,7 @@ def test_df_serde_enum() -> None: [ ([[1, 2, 3], [None, None, None], [1, None, 3]], pl.Array(pl.Int32(), shape=3)), ([["a", "b"], [None, None]], pl.Array(pl.Utf8, shape=2)), - ([[True, False, None], [None, None, None]], pl.Array(pl.Utf8, shape=3)), + ([[True, False, None], [None, None, None]], pl.Array(pl.Boolean, shape=3)), ( [[[1, 2, 3], [4, None, 5]], None, [[None, None, 2]]], pl.List(pl.Array(pl.Int32(), shape=3)), diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py index 6a1f549216a5..ee1005c1b0f6 100644 --- a/py-polars/tests/unit/datatypes/test_decimal.py +++ b/py-polars/tests/unit/datatypes/test_decimal.py @@ -22,7 +22,7 @@ def permutations_int_dec_none() -> list[tuple[D | int | None, ...]]: D("-0.01"), D("1.2345678"), D("500"), - # -1, # TODO: Address in https://github.com/pola-rs/polars/issues/14427 + -1, None, ] ) diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index e4f341b18566..1566e225732e 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -496,12 +496,12 @@ def test_horizontal_mean_single_column( out_dtype: PolarsDataType, ) -> None: out = ( - pl.LazyFrame({"a": pl.Series([1, 0], dtype=in_dtype)}) + pl.LazyFrame({"a": pl.Series([1, 0]).cast(in_dtype)}) .select(pl.mean_horizontal(pl.all())) .collect() ) - assert_frame_equal(out, pl.DataFrame({"a": pl.Series([1.0, 0.0], dtype=out_dtype)})) + assert_frame_equal(out, pl.DataFrame({"a": pl.Series([1.0, 0.0]).cast(out_dtype)})) def test_horizontal_mean_in_group_by_15115() -> None: diff --git a/py-polars/tests/unit/operations/aggregation/test_horizontal.py b/py-polars/tests/unit/operations/aggregation/test_horizontal.py index d840094571a8..c6c6631ab60c 100644 --- a/py-polars/tests/unit/operations/aggregation/test_horizontal.py +++ b/py-polars/tests/unit/operations/aggregation/test_horizontal.py @@ -430,7 +430,7 @@ def test_schema_mean_horizontal_single_column( in_dtype: pl.PolarsDataType, out_dtype: pl.PolarsDataType, ) -> None: - lf = pl.LazyFrame({"a": pl.Series([1, 0], dtype=in_dtype)}).select( + lf = pl.LazyFrame({"a": pl.Series([1, 0]).cast(in_dtype)}).select( pl.mean_horizontal(pl.all()) ) diff --git a/py-polars/tests/unit/operations/test_ewm.py b/py-polars/tests/unit/operations/test_ewm.py index 05b7a07ca09c..e643fdf30d3d 100644 --- a/py-polars/tests/unit/operations/test_ewm.py +++ b/py-polars/tests/unit/operations/test_ewm.py @@ -153,7 +153,7 @@ def test_ewm_std_var() -> None: var = series.ewm_var(alpha=0.5, ignore_nulls=False) std = series.ewm_std(alpha=0.5, ignore_nulls=False) - expected = pl.Series("a", [0, 4.5, 1.9285714285714288]) + expected = pl.Series("a", [0.0, 4.5, 1.9285714285714288]) assert np.allclose(var, std**2, rtol=1e-16) assert_series_equal(var, expected) @@ -163,13 +163,13 @@ def test_ewm_std_var_with_nulls() -> None: var = series.ewm_var(alpha=0.5, ignore_nulls=True) std = series.ewm_std(alpha=0.5, ignore_nulls=True) - expected = pl.Series("a", [0, 4.5, None, 1.9285714285714288]) + expected = pl.Series("a", [0.0, 4.5, None, 1.9285714285714288]) assert_series_equal(var, expected) assert_series_equal(std**2, expected) var = series.ewm_var(alpha=0.5, ignore_nulls=False) std = series.ewm_std(alpha=0.5, ignore_nulls=False) - expected = pl.Series("a", [0, 4.5, None, 1.7307692307692308]) + expected = pl.Series("a", [0.0, 4.5, None, 1.7307692307692308]) assert_series_equal(var, expected) assert_series_equal(std**2, expected) diff --git a/py-polars/tests/unit/series/test_getitem.py b/py-polars/tests/unit/series/test_getitem.py index 3f106de3034f..50dee3e0cf02 100644 --- a/py-polars/tests/unit/series/test_getitem.py +++ b/py-polars/tests/unit/series/test_getitem.py @@ -88,7 +88,7 @@ def test_series_getitem_multiple_indices(indices: Any) -> None: @pytest.mark.parametrize( ("input", "match"), [ - ([0.0, 1.0], "'float' object cannot be interpreted as an integer"), + ([0.0, 1.0], "unexpected value while building Series of type Int64"), ( pl.Series([[1, 2], [3, 4]]), "cannot treat Series of type List\\(Int64\\) as indices", diff --git a/py-polars/tests/unit/test_convert.py b/py-polars/tests/unit/test_convert.py index e74bd6f13024..74ff178dd9ea 100644 --- a/py-polars/tests/unit/test_convert.py +++ b/py-polars/tests/unit/test_convert.py @@ -5,15 +5,22 @@ import polars as pl -def test_schema_inference_from_rows() -> None: - # these have to upcast to float - result = pl.from_records([[1, 2.1, 3], [4, 5, 6.4]]) +def test_from_records_schema_inference() -> None: + data = [[1, 2.1, 3], [4, 5, 6.4]] + + with pytest.raises(TypeError, match="unexpected value"): + pl.from_records(data) + + result = pl.from_records(data, strict=False) assert result.to_dict(as_series=False) == { "column_0": [1.0, 2.1, 3.0], "column_1": [4.0, 5.0, 6.4], } - result = pl.from_dicts([{"a": 1, "b": 2}, {"a": 3.1, "b": 4.5}]) + +def test_from_dicts_schema_inference() -> None: + data = [{"a": 1, "b": 2}, {"a": 3.1, "b": 4.5}] + result = pl.from_dicts(data) # type: ignore[arg-type] assert result.to_dict(as_series=False) == { "a": [1.0, 3.1], "b": [2.0, 4.5], diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py index 06134a16939c..d958a659cb08 100644 --- a/py-polars/tests/unit/test_errors.py +++ b/py-polars/tests/unit/test_errors.py @@ -78,9 +78,7 @@ def test_error_on_invalid_series_init() -> None: ): pl.Series([1.5, 2.0, 3.75], dtype=dtype) - with pytest.raises( - TypeError, match="'float' object cannot be interpreted as an integer" - ): + with pytest.raises(TypeError, match="unexpected value"): pl.Series([1.5, 2.0, 3.75], dtype=pl.Int32)