diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index fbd5ed07d..abe325726 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -74,7 +74,7 @@ def __init__(self, name: str, data: Sequence[T_co] | None = None) -> None: if data is None: data = [] - self._series: pl.Series = pl.Series(name, data) + self._series: pl.Series = pl.Series(name, data, strict=False) def __contains__(self, item: Any) -> bool: return self._series.__contains__(item) @@ -84,7 +84,7 @@ def __eq__(self, other: object) -> bool: return NotImplemented if self is other: return True - return self._series.equals(other._series) + return self.name == other.name and self._series.equals(other._series) @overload def __getitem__(self, index: int) -> T_co: ... diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 150dbb5dd..4ef0cb8be 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -242,7 +242,7 @@ def from_json_file(path: str | Path) -> Table: try: return Table._from_polars_data_frame(pl.read_json(path)) - except pl.PolarsPanicError: + except (pl.exceptions.PanicException, pl.exceptions.ComputeError): # Can happen if the JSON file is empty (https://github.com/pola-rs/polars/issues/10234) return Table() @@ -322,7 +322,7 @@ def __init__(self, data: Mapping[str, Sequence[Any]] | None = None) -> None: ) # Implementation - self._lazy_frame: pl.LazyFrame = pl.LazyFrame(data) + self._lazy_frame: pl.LazyFrame = pl.LazyFrame(data, strict=False) self.__data_frame_cache: pl.DataFrame | None = None # Scramble the name to prevent access from outside def __eq__(self, other: object) -> bool: @@ -425,10 +425,10 @@ def schema(self) -> Schema: import polars as pl try: - return _PolarsSchema(self._lazy_frame.schema) - except (pl.NoDataError, pl.PolarsPanicError): + return _PolarsSchema(self._lazy_frame.collect_schema()) + except (pl.exceptions.NoDataError, pl.exceptions.PanicException): # Can happen for some operations on empty tables (e.g. https://github.com/pola-rs/polars/issues/16202) - return _PolarsSchema({}) + return _PolarsSchema(pl.Schema({})) # ------------------------------------------------------------------------------------------------------------------ # Column operations @@ -698,7 +698,7 @@ def remove_columns( _check_columns_exist(self, names) return Table._from_polars_lazy_frame( - self._lazy_frame.drop(names), + self._lazy_frame.drop(names, strict=not ignore_unknown_names), ) def remove_columns_except( @@ -1919,8 +1919,6 @@ def to_dict(self) -> dict[str, list[Any]]: def to_json_file( self, path: str | Path, - *, - orientation: Literal["column", "row"] = "column", ) -> None: """ Write the table to a JSON file. @@ -1934,10 +1932,6 @@ def to_json_file( ---------- path: The path to the JSON file. If the file extension is omitted, it is assumed to be ".json". - orientation: - The orientation of the JSON file. If "column", the JSON file will be structured as a list of columns. If - "row", the JSON file will be structured as a list of rows. Row orientation is more human-readable, but - slower and less memory-efficient. Raises ------ @@ -1954,7 +1948,7 @@ def to_json_file( path.parent.mkdir(parents=True, exist_ok=True) # Write JSON to file - self._data_frame.write_json(path, row_oriented=(orientation == "row")) + self._data_frame.write_json(path) def to_parquet_file(self, path: str | Path) -> None: """ diff --git a/src/safeds/data/tabular/typing/_polars_schema.py b/src/safeds/data/tabular/typing/_polars_schema.py index fe9b585f9..c5023d8bf 100644 --- a/src/safeds/data/tabular/typing/_polars_schema.py +++ b/src/safeds/data/tabular/typing/_polars_schema.py @@ -26,8 +26,8 @@ class _PolarsSchema(Schema): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, schema: dict[str, pl.DataType]): - self._schema: dict[str, pl.DataType] = schema + def __init__(self, schema: pl.Schema): + self._schema: pl.Schema = schema def __eq__(self, other: object) -> bool: if not isinstance(other, _PolarsSchema): @@ -66,7 +66,7 @@ def __str__(self) -> str: @property def column_names(self) -> list[str]: - return list(self._schema.keys()) + return list(self._schema.names()) # ------------------------------------------------------------------------------------------------------------------ # Getters diff --git a/tests/safeds/data/tabular/containers/_cell/test_hash.py b/tests/safeds/data/tabular/containers/_cell/test_hash.py index cdea9c706..721469c26 100644 --- a/tests/safeds/data/tabular/containers/_cell/test_hash.py +++ b/tests/safeds/data/tabular/containers/_cell/test_hash.py @@ -8,7 +8,7 @@ def test_should_be_deterministic() -> None: cell: Cell[Any] = _LazyCell(pl.col("a")) - assert hash(cell) == 7139977585477665635 + assert hash(cell) == 977452292332124345 @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_column/test_get_value.py b/tests/safeds/data/tabular/containers/_column/test_get_value.py index 156c10db6..34ee47ad0 100644 --- a/tests/safeds/data/tabular/containers/_column/test_get_value.py +++ b/tests/safeds/data/tabular/containers/_column/test_get_value.py @@ -32,7 +32,7 @@ def test_should_get_the_item_at_index(index: int, expected: Any) -> None: ], ) def test_should_raise_if_index_is_out_of_bounds(index: int) -> None: - column = Column("a", [0, "1"]) + column = Column("a", ["a", "b"]) with pytest.raises(IndexOutOfBoundsError): column.get_value(index) diff --git a/tests/safeds/data/tabular/containers/_column/test_repr.py b/tests/safeds/data/tabular/containers/_column/test_repr.py index 7b3b6dee3..dc4c6989b 100644 --- a/tests/safeds/data/tabular/containers/_column/test_repr.py +++ b/tests/safeds/data/tabular/containers/_column/test_repr.py @@ -14,8 +14,8 @@ "+-----+\n| a |\n| --- |\n| i64 |\n+=====+\n| 0 |\n+-----+", ), ( - Column("a", [0, "1"]), - "+------+\n| a |\n| --- |\n| str |\n+======+\n| null |\n| 1 |\n+------+", + Column("a", ["a", "b"]), + "+-----+\n| a |\n| --- |\n| str |\n+=====+\n| a |\n| b |\n+-----+", ), ], ids=[ diff --git a/tests/safeds/data/tabular/containers/_column/test_row_count.py b/tests/safeds/data/tabular/containers/_column/test_row_count.py index 5862060f8..2829d798d 100644 --- a/tests/safeds/data/tabular/containers/_column/test_row_count.py +++ b/tests/safeds/data/tabular/containers/_column/test_row_count.py @@ -7,7 +7,7 @@ [ (Column("a", []), 0), (Column("a", [0]), 1), - (Column("a", [0, "1"]), 2), + (Column("a", ["a", "b"]), 2), ], ids=[ "empty", diff --git a/tests/safeds/data/tabular/containers/_column/test_sizeof.py b/tests/safeds/data/tabular/containers/_column/test_sizeof.py index 2494fbc70..1517e6ce0 100644 --- a/tests/safeds/data/tabular/containers/_column/test_sizeof.py +++ b/tests/safeds/data/tabular/containers/_column/test_sizeof.py @@ -9,7 +9,7 @@ [ Column("a", []), Column("a", [0]), - Column("a", [0, "1"]), + Column("a", ["a", "b"]), ], ids=[ "empty", diff --git a/tests/safeds/data/tabular/containers/_column/test_str.py b/tests/safeds/data/tabular/containers/_column/test_str.py index 7b3b6dee3..dc4c6989b 100644 --- a/tests/safeds/data/tabular/containers/_column/test_str.py +++ b/tests/safeds/data/tabular/containers/_column/test_str.py @@ -14,8 +14,8 @@ "+-----+\n| a |\n| --- |\n| i64 |\n+=====+\n| 0 |\n+-----+", ), ( - Column("a", [0, "1"]), - "+------+\n| a |\n| --- |\n| str |\n+======+\n| null |\n| 1 |\n+------+", + Column("a", ["a", "b"]), + "+-----+\n| a |\n| --- |\n| str |\n+=====+\n| a |\n| b |\n+-----+", ), ], ids=[ diff --git a/tests/safeds/data/tabular/containers/_string_cell/test_hash.py b/tests/safeds/data/tabular/containers/_string_cell/test_hash.py index db9837178..cb2e0050b 100644 --- a/tests/safeds/data/tabular/containers/_string_cell/test_hash.py +++ b/tests/safeds/data/tabular/containers/_string_cell/test_hash.py @@ -6,7 +6,7 @@ def test_should_be_deterministic() -> None: cell = _LazyStringCell(pl.col("a")) - assert hash(cell) == 7139977585477665635 + assert hash(cell) == 977452292332124345 @pytest.mark.parametrize( diff --git a/tests/safeds/data/tabular/containers/_table/test_sizeof.py b/tests/safeds/data/tabular/containers/_table/test_sizeof.py index 69d896aa9..61e59e3e8 100644 --- a/tests/safeds/data/tabular/containers/_table/test_sizeof.py +++ b/tests/safeds/data/tabular/containers/_table/test_sizeof.py @@ -9,7 +9,7 @@ [ Table(), Table({"col1": [0]}), - Table({"col1": [0, "1"], "col2": ["a", "b"]}), + Table({"col1": [0, 1], "col2": ["a", "b"]}), ], ids=[ "empty table", diff --git a/tests/safeds/data/tabular/containers/_temporal_cell/test_hash.py b/tests/safeds/data/tabular/containers/_temporal_cell/test_hash.py index a5c49c0e6..64dd7cf42 100644 --- a/tests/safeds/data/tabular/containers/_temporal_cell/test_hash.py +++ b/tests/safeds/data/tabular/containers/_temporal_cell/test_hash.py @@ -6,7 +6,7 @@ def test_should_be_deterministic() -> None: cell = _LazyTemporalCell(pl.col("a")) - assert hash(cell) == 7139977585477665635 + assert hash(cell) == 977452292332124345 @pytest.mark.parametrize(