From 73cdfb119cb4243e56a5e99d40cfdbacf0466daf Mon Sep 17 00:00:00 2001 From: Gerhardsa0 <113539440+Gerhardsa0@users.noreply.github.com> Date: Mon, 29 Jan 2024 14:07:28 +0100 Subject: [PATCH] feat: class for time series (#508) Closes #481 ### Summary of Changes added the time series class, with all basic tests and functionalities so it can be used like a normal table or taggedtrable with an extra time column --------- Co-authored-by: Ettl Co-authored-by: Simon Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Simon Breuer <86068340+sibre28@users.noreply.github.com> Co-authored-by: Alexander <47296670+Marsmaennchen221@users.noreply.github.com> --- .../data/tabular/containers/__init__.py | 2 + src/safeds/data/tabular/containers/_table.py | 38 + .../data/tabular/containers/_tagged_table.py | 10 +- .../data/tabular/containers/_time_series.py | 848 ++++++++++++++++++ src/safeds/exceptions/__init__.py | 2 + src/safeds/exceptions/_data.py | 7 + tests/helpers/__init__.py | 13 +- tests/helpers/_assertions.py | 20 +- .../_tagged_table/_time_series/__init__.py | 0 .../_time_series/test_add_column.py | 38 + .../test_add_column_as_feature.py | 94 ++ .../_time_series/test_add_columns.py | 43 + .../test_add_columns_as_features.py | 115 +++ .../_time_series/test_add_row.py | 82 ++ .../_time_series/test_add_rows.py | 65 ++ .../_time_series/test_as_table.py | 58 ++ .../_time_series/test_features.py | 41 + .../_time_series/test_filter_rows.py | 124 +++ .../test_from_table_to_time_series.py | 214 +++++ .../_time_series/test_from_tagged_table.py | 194 ++++ .../_tagged_table/_time_series/test_init.py | 185 ++++ .../_time_series/test_keep_only_columns.py | 165 ++++ .../_time_series/test_remove_columns.py | 229 +++++ ...test_remove_columns_with_missing_values.py | 221 +++++ ...emove_columns_with_non_numerical_values.py | 218 +++++ .../test_remove_duplicate_rows.py | 55 ++ .../test_remove_rows_with_missing_values.py | 55 ++ .../test_remove_rows_with_outliers.py | 55 ++ .../_time_series/test_rename_column.py | 124 +++ .../_time_series/test_replace_column.py | 248 +++++ .../_time_series/test_slice_rows.py | 58 ++ .../_time_series/test_sort_columns.py | 62 ++ .../_time_series/test_time_target.py | 30 + .../_time_series/test_transform_column.py | 116 +++ .../_tagged_table/test_replace_column.py | 7 +- .../_table/_tagged_table/test_sort_columns.py | 1 + 36 files changed, 3827 insertions(+), 10 deletions(-) create mode 100644 src/safeds/data/tabular/containers/_time_series.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/__init__.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_column.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_column_as_feature.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_columns.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_columns_as_features.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_row.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_rows.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_as_table.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_features.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_filter_rows.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_from_table_to_time_series.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_from_tagged_table.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_init.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_keep_only_columns.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns_with_missing_values.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns_with_non_numerical_values.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_duplicate_rows.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_rows_with_missing_values.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_rows_with_outliers.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_rename_column.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_replace_column.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_slice_rows.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_sort_columns.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_time_target.py create mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_transform_column.py diff --git a/src/safeds/data/tabular/containers/__init__.py b/src/safeds/data/tabular/containers/__init__.py index e5ec2c697..6488edb2a 100644 --- a/src/safeds/data/tabular/containers/__init__.py +++ b/src/safeds/data/tabular/containers/__init__.py @@ -4,10 +4,12 @@ from ._row import Row from ._table import Table from ._tagged_table import TaggedTable +from ._time_series import TimeSeries __all__ = [ "Column", "Row", "Table", "TaggedTable", + "TimeSeries", ] diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 9615ebb05..a5a855a79 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -36,6 +36,7 @@ from safeds.data.tabular.transformation import InvertibleTableTransformer, TableTransformer from ._tagged_table import TaggedTable + from ._time_series import TimeSeries # Enable copy-on-write for pandas dataframes pd.options.mode.copy_on_write = True @@ -1715,6 +1716,43 @@ def tag_columns(self, target_name: str, feature_names: list[str] | None = None) return TaggedTable._from_table(self, target_name, feature_names) + def time_columns(self, target_name: str, time_name: str, feature_names: list[str] | None = None) -> TimeSeries: + """ + Return a new `TimeSeries` with columns marked as a target and time column or feature columns. + + The original table is not modified. + + Parameters + ---------- + target_name : str + Name of the target column. + time_name : str + Name of the time column. + feature_names : list[str] | None + Names of the feature columns. If None, all columns except the target and time columns are used. + + Returns + ------- + time_series : TimeSeries + A new time series with the given target, time and feature names. + + Raises + ------ + ValueError + If the target column is also a feature column. + ValueError + If there is no other column than the specified target and time columns left to be a feature column + + Examples + -------- + >>> from safeds.data.tabular.containers import Table, TimeSeries + >>> table = Table.from_dict({"time": ["01.01", "01.02", "01.03"], "price": [1.10, 1.19, 1.79], "amount_bought": [74, 72, 51]}) + >>> tagged_table = table.time_columns(target_name="amount_bought",time_name = "time", feature_names=["price"]) + """ + from ._time_series import TimeSeries + + return TimeSeries._from_table_to_time_series(self, target_name, time_name, feature_names) + def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Table: """ Return a new `Table` with the provided column transformed by calling the provided transformer. diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index e18bd6ce8..e0d092aef 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -350,7 +350,7 @@ def add_row(self, row: Row) -> TaggedTable: Returns ------- table : TaggedTable - A new table with the added row at the end. + A new tagged table with the added row at the end. Raises ------ @@ -373,7 +373,7 @@ def add_rows(self, rows: list[Row] | Table) -> TaggedTable: Returns ------- result : TaggedTable - A new table which combines the original table and the given rows. + A new tagged table which combines the original table and the given rows. Raises ------ @@ -386,7 +386,7 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: """ Return a new `TaggedTable` containing only rows that match the given Callable (e.g. lambda function). - The original table is not modified. + The original tagged table is not modified. Parameters ---------- @@ -395,8 +395,8 @@ def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: Returns ------- - table : TaggedTable - A table containing only the rows to match the query. + result : TaggedTable + A new tagged table containing only the rows to match the query. """ return TaggedTable._from_table( super().filter_rows(query), diff --git a/src/safeds/data/tabular/containers/_time_series.py b/src/safeds/data/tabular/containers/_time_series.py new file mode 100644 index 000000000..7f18b5475 --- /dev/null +++ b/src/safeds/data/tabular/containers/_time_series.py @@ -0,0 +1,848 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from safeds.data.tabular.containers import Column, Row, Table, TaggedTable +from safeds.exceptions import ( + ColumnIsTargetError, + ColumnIsTimeError, + IllegalSchemaModificationError, + UnknownColumnNameError, +) + +if TYPE_CHECKING: + from collections.abc import Callable, Mapping, Sequence + from typing import Any + + +class TimeSeries(TaggedTable): + + # ------------------------------------------------------------------------------------------------------------------ + # Creation + # ------------------------------------------------------------------------------------------------------------------ + @staticmethod + def _from_tagged_table( + tagged_table: TaggedTable, + time_name: str, + ) -> TimeSeries: + """Create a time series from a tagged table. + + Parameters + ---------- + table : TaggedTable + The tagged table. + time_name: str + Name of the time column. + + Returns + ------- + time_series : TimeSeries + the created time series + + Raises + ------ + UnknownColumnNameError + If time_name matches none of the column names. + Value Error + If time column is also a feature column + + Examples + -------- + >>> from safeds.data.tabular.containers import Table, TimeSeries + >>> tagged_table = TaggedTable({"date": ["01.01", "01.02", "01.03", "01.04"], "col1": ["a", "b", "c", "a"]}, "col1" ) + >>> timeseries = TimeSeries._from_tagged_table(tagged_table, time_name = "date") + """ + if time_name not in tagged_table.column_names: + raise UnknownColumnNameError([time_name]) + table = tagged_table._as_table() + # make sure that the time_name is not part of the features + result = object.__new__(TimeSeries) + feature_names = tagged_table.features.column_names + if time_name in feature_names: + feature_names.remove(time_name) + + if time_name == tagged_table.target.name: + raise ValueError(f"Column '{time_name}' cannot be both time column and target.") + + result._data = table._data + result._schema = table.schema + result._time = table.get_column(time_name) + result._features = table.keep_only_columns(feature_names) + result._target = table.get_column(tagged_table.target.name) + return result + + @staticmethod + def _from_table_to_time_series( + table: Table, + target_name: str, + time_name: str, + feature_names: list[str] | None = None, + ) -> TimeSeries: + """Create a TimeSeries from a table. + + Parameters + ---------- + table : Table + The table. + target_name : str + Name of the target column. + time_name: str + Name of the date column. + feature_names : list[str] | None + Names of the feature columns. If None, all columns except the target and time columns are used. + + Returns + ------- + time_series : TimeSeries + the created time series + + Raises + ------ + UnknownColumnNameError + If target_name or time_name matches none of the column names. + Value Error + If there is no other column than the specified target and time columns left to be a feature column + Value Error + If one column is target and feature + Value Error + If one column is time and feature + + Examples + -------- + >>> from safeds.data.tabular.containers import Table, TimeSeries + >>> table = Table({"date": ["01.01", "01.02", "01.03", "01.04"], "f1": ["a", "b", "c", "a"], "t": [1,2,3,4]}) + >>> timeseries = TimeSeries._from_table_to_time_series(table, "t", "date", ["f1"]) + """ + if feature_names is not None and time_name in feature_names: + raise ValueError(f"Column '{time_name}' can not be time and feature column.") + + if feature_names is None: + feature_names = table.column_names + if time_name in feature_names: + feature_names.remove(time_name) + if target_name in feature_names: + feature_names.remove(target_name) + tagged_table = TaggedTable._from_table(table=table, target_name=target_name, feature_names=feature_names) + # check if time column got added as feature column + return TimeSeries._from_tagged_table(tagged_table=tagged_table, time_name=time_name) + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__( + self, + data: Mapping[str, Sequence[Any]], + target_name: str, + time_name: str, + feature_names: list[str] | None = None, + ): + """ + Create a time series from a mapping of column names to their values. + + Parameters + ---------- + data : Mapping[str, Sequence[Any]] + The data. + target_name : str + Name of the target column. + time_name : str + Name of the time column + feature_names : list[str] | None + Names of the feature columns. If None, all columns except the target and time columns are used. + + Raises + ------ + ColumnLengthMismatchError + If columns have different lengths. + ValueError + If the target column is also a feature column. + ValueError + If no feature columns are specified. + ValueError + If time column is also a feature column + UnknownColumnNameError + If time column does not exist + + Examples + -------- + >>> from safeds.data.tabular.containers import TaggedTable + >>> table = TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"]) + """ + _data = Table(data) + + if feature_names is None: + feature_names = _data.column_names + if time_name in feature_names: + feature_names.remove(time_name) + if target_name in feature_names: + feature_names.remove(target_name) + + # Validate inputs + super().__init__(data, target_name, feature_names) + if time_name in feature_names: + raise ValueError(f"Column '{time_name}' can not be time and feature column.") + if time_name not in (_data.column_names): + raise UnknownColumnNameError([time_name]) + self._time: Column = _data.get_column(time_name) + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def time(self) -> Column: + """ + Get the time column of the time series. + + Returns + ------- + Column + The time column. + """ + return self._time + + # ------------------------------------------------------------------------------------------------------------------ + # Overriden methods from TaggedTable class: + # ------------------------------------------------------------------------------------------------------------------ + def _as_table(self: TimeSeries) -> Table: + """ + Return a new `Table` with the tagging removed. + + The original time series is not modified. + + Parameters + ---------- + self: TimeSeries + The Time Series. + + Returns + ------- + table: Table + The time series as an untagged Table, i.e. without the information about which columns are features, target or time. + + """ + return Table.from_columns(super().to_columns()) + + def add_column(self, column: Column) -> TimeSeries: + """ + Return a new `TimeSeries` with the provided column attached at the end, as neither target nor feature column. + + The original time series is not modified. + + Parameters + ---------- + column : Column + The column to be added. + + Returns + ------- + result : TimeSeries + The time series with the column attached as neither target nor feature column. + + Raises + ------ + DuplicateColumnNameError + If the new column already exists. + ColumnSizeError + If the size of the column does not match the number of rows. + """ + return TimeSeries._from_tagged_table( + super().add_column(column), + time_name=self.time.name, + ) + + def add_column_as_feature(self, column: Column) -> TimeSeries: + """ + Return a new `TimeSeries` with the provided column attached at the end, as a feature column. + + the original time series is not modified. + + Parameters + ---------- + column : Column + The column to be added. + + Returns + ------- + result : TimeSeries + The time series with the attached feature column. + + Raises + ------ + DuplicateColumnNameError + If the new column already exists. + ColumnSizeError + If the size of the column does not match the number of rows. + """ + return TimeSeries._from_tagged_table( + super().add_column_as_feature(column), + time_name=self.time.name, + ) + + def add_columns_as_features(self, columns: list[Column] | Table) -> TimeSeries: + """ + Return a new `TimeSeries` with the provided columns attached at the end, as feature columns. + + The original time series is not modified. + + Parameters + ---------- + columns : list[Column] | Table + The columns to be added as features. + + Returns + ------- + result : TimeSeries + The time series with the attached feature columns. + + Raises + ------ + DuplicateColumnNameError + If any of the new feature columns already exist. + ColumnSizeError + If the size of any feature column does not match the number of rows. + """ + return TimeSeries._from_tagged_table( + super().add_columns_as_features(columns), + time_name=self.time.name, + ) + + def add_columns(self, columns: list[Column] | Table) -> TimeSeries: + """ + Return a new `TimeSeries` with multiple added columns, as neither target nor feature columns. + + The original time series is not modified. + + Parameters + ---------- + columns : list[Column] or Table + The columns to be added. + + Returns + ------- + result: TimeSeries + A new time series combining the original table and the given columns as neither target nor feature columns. + + Raises + ------ + DuplicateColumnNameError + If at least one column name from the provided column list already exists in the time series. + ColumnSizeError + If at least one of the column sizes from the provided column list does not match the time series. + """ + return TimeSeries._from_tagged_table( + super().add_columns(columns), + time_name=self.time.name, + ) + + def add_row(self, row: Row) -> TimeSeries: + """ + Return a new `TimeSeries` with an extra Row attached. + + The original time series is not modified. + + Parameters + ---------- + row : Row + The row to be added. + + Returns + ------- + table : TimeSeries + A new time series with the added row at the end. + + Raises + ------ + UnknownColumnNameError + If the row has different column names than the time series. + """ + return TimeSeries._from_tagged_table(super().add_row(row), time_name=self.time.name) + + def add_rows(self, rows: list[Row] | Table) -> TimeSeries: + """ + Return a new `TimeSeries` with multiple extra Rows attached. + + The original time series is not modified. + + Parameters + ---------- + rows : list[Row] or Table + The rows to be added. + + Returns + ------- + result : TimeSeries + A new time series which combines the original time series and the given rows. + + Raises + ------ + UnknownColumnNameError + If at least one of the rows have different column names than the time series. + """ + return TimeSeries._from_tagged_table(super().add_rows(rows), time_name=self.time.name) + + def filter_rows(self, query: Callable[[Row], bool]) -> TimeSeries: + """ + Return a new `TimeSeries` containing only rows that match the given Callable (e.g. lambda function). + + The original time series is not modified. + + Parameters + ---------- + query : lambda function + A Callable that is applied to all rows. + + Returns + ------- + result: TimeSeries + A time series containing only the rows to match the query. + """ + return TimeSeries._from_tagged_table( + super().filter_rows(query), + time_name=self.time.name, + ) + + def keep_only_columns(self, column_names: list[str]) -> TimeSeries: + """ + Return a new `TimeSeries` with only the given column(s). + + The original time series is not modified. + + Parameters + ---------- + column_names : list[str] + A list containing the columns to be kept. + + Returns + ------- + table : TimeSeries + A time series containing only the given column(s). + + Raises + ------ + UnknownColumnNameError + If any of the given columns does not exist. + IllegalSchemaModificationError + If none of the given columns is the target or time column or any of the feature columns. + """ + if self.target.name not in column_names: + raise IllegalSchemaModificationError("Must keep the target column.") + if len(set(self.features.column_names).intersection(set(column_names))) == 0: + raise IllegalSchemaModificationError("Must keep at least one feature column.") + if self.time.name not in column_names: + raise IllegalSchemaModificationError("Must keep the time column.") + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().keep_only_columns(column_names), + target_name=self.target.name, + feature_names=sorted( + set(self.features.column_names).intersection(set(column_names)), + key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__, + ), + ), + time_name=self.time.name, + ) + + def remove_columns(self, column_names: list[str]) -> TimeSeries: + """ + Return a new `TimeSeries` with the given column(s) removed from the time series. + + The original time series is not modified. + + Parameters + ---------- + column_names : list[str] + The names of all columns to be dropped. + + Returns + ------- + table : TimeSeries + A time series without the given columns. + + Raises + ------ + UnknownColumnNameError + If any of the given columns does not exist. + ColumnIsTargetError + If any of the given columns is the target column. + ColumnIsTimeError + If any of the given columns is the time column. + IllegalSchemaModificationError + If the given columns contain all the feature columns. + """ + if self.target.name in column_names: + raise ColumnIsTargetError(self.target.name) + if len(set(self.features.column_names) - set(column_names)) == 0: + raise IllegalSchemaModificationError("You cannot remove every feature column.") + if self.time.name in column_names: + raise ColumnIsTimeError(self.time.name) + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().remove_columns(column_names), + target_name=self.target.name, + feature_names=sorted( + set(self.features.column_names) - set(column_names), + key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__, + ), + ), + time_name=self.time.name, + ) + + def remove_columns_with_missing_values(self) -> TimeSeries: + """ + Return a new `TimeSeries` with every column that misses values removed. + + The original time series is not modified. + + Returns + ------- + table : TimeSeries + A time series without the columns that contain missing values. + + Raises + ------ + ColumnIsTargetError + If any of the columns to be removed is the target column. + ColumnIsTimeError + If any of the columns to be removed is the time column. + IllegalSchemaModificationError + If the columns to remove contain all the feature columns. + """ + table = super().remove_columns_with_missing_values() + if self.time.name not in table.column_names: + raise ColumnIsTimeError(self.time.name) + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + table, + self.target.name, + feature_names=sorted( + set(self.features.column_names).intersection(set(table.column_names)), + key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__, + ), + ), + time_name=self.time.name, + ) + + def remove_columns_with_non_numerical_values(self) -> TimeSeries: + """ + Return a new `TimeSeries` with every column that contains non-numerical values removed. + + The original time series is not modified. + + Returns + ------- + table : TimeSeries + A time series without the columns that contain non-numerical values. + + Raises + ------ + ColumnIsTargetError + If any of the columns to be removed is the target column. + ColumnIsTimeError + If any of the columns to be removed is the time column. + IllegalSchemaModificationError + If the columns to remove contain all the feature columns. + """ + table = super().remove_columns_with_non_numerical_values() + if self.time.name not in table.column_names: + raise ColumnIsTimeError(self.time.name) + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + table, + self.target.name, + feature_names=sorted( + set(self.features.column_names).intersection(set(table.column_names)), + key={val: ix for ix, val in enumerate(self.features.column_names)}.__getitem__, + ), + ), + time_name=self.time.name, + ) + + def remove_duplicate_rows(self) -> TimeSeries: + """ + Return a new `TimeSeries` with all row duplicates removed. + + The original time series is not modified. + + Returns + ------- + result : TimeSeries + The time series with the duplicate rows removed. + """ + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().remove_duplicate_rows(), + target_name=self.target.name, + feature_names=self.features.column_names, + ), + time_name=self.time.name, + ) + + def remove_rows_with_missing_values(self) -> TimeSeries: + """ + Return a new `TimeSeries` without the rows that contain missing values. + + The original time series is not modified. + + Returns + ------- + table : TimeSeries + A time series without the rows that contain missing values. + """ + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().remove_rows_with_missing_values(), + target_name=self.target.name, + feature_names=self.features.column_names, + ), + time_name=self.time.name, + ) + + def remove_rows_with_outliers(self) -> TimeSeries: + """ + Return a new `TimeSeries` with all rows that contain at least one outlier removed. + + We define an outlier as a value that has a distance of more than 3 standard deviations from the column mean. + Missing values are not considered outliers. They are also ignored during the calculation of the standard + deviation. + + The original time series is not modified. + + Returns + ------- + new_time_series : TimeSeries + A new time series without rows containing outliers. + """ + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().remove_rows_with_outliers(), + target_name=self.target.name, + feature_names=self.features.column_names, + ), + time_name=self.time.name, + ) + + def rename_column(self, old_name: str, new_name: str) -> TimeSeries: + """ + Return a new `TimeSeries` with a single column renamed. + + The original time series is not modified. + + Parameters + ---------- + old_name : str + The old name of the column. + new_name : str + The new name of the column. + + Returns + ------- + table : TimeSeries + The time series with the renamed column. + + Raises + ------ + UnknownColumnNameError + If the specified old target column name does not exist. + DuplicateColumnNameError + If the specified new target column name already exists. + """ + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().rename_column(old_name, new_name), + target_name=new_name if self.target.name == old_name else self.target.name, + feature_names=( + self.features.column_names + if old_name not in self.features.column_names + else [ + column_name if column_name != old_name else new_name + for column_name in self.features.column_names + ] + ), + ), + time_name=new_name if self.time.name == old_name else self.time.name, + ) + + def replace_column(self, old_column_name: str, new_columns: list[Column]) -> TimeSeries: + """ + Return a new `TimeSeries` with the specified old column replaced by a list of new columns. + + If the column to be replaced is the target or time column, it must be replaced by exactly one column. That column + becomes the new target or time column. If the column to be replaced is a feature column, the new columns that replace it + all become feature columns. + + The order of columns is kept. The original time series is not modified. + + Parameters + ---------- + old_column_name : str + The name of the column to be replaced. + new_columns : list[Column] + The new columns replacing the old column. + + Returns + ------- + result : TimeSeries + A time series with the old column replaced by the new columns. + + Raises + ------ + UnknownColumnNameError + If the old column does not exist. + DuplicateColumnNameError + If the new column already exists and the existing column is not affected by the replacement. + ColumnSizeError + If the size of the column does not match the amount of rows. + IllegalSchemaModificationError + If the target or time column would be removed or replaced by more than one column. + """ + if old_column_name == self.time.name: + if len(new_columns) != 1: + raise IllegalSchemaModificationError( + f'Time column "{self.time.name}" can only be replaced by exactly one new column.', + ) + else: + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().replace_column(old_column_name, new_columns), + target_name=self.target.name, + feature_names=self.features.column_names, + ), + time_name=new_columns[0].name, + ) + if old_column_name == self.target.name: + if len(new_columns) != 1: + raise IllegalSchemaModificationError( + f'Target column "{self.target.name}" can only be replaced by exactly one new column.', + ) + else: + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().replace_column(old_column_name, new_columns), + target_name=new_columns[0].name, + feature_names=self.features.column_names, + ), + time_name=self.time.name, + ) + else: + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().replace_column(old_column_name, new_columns), + target_name=self.target.name, + feature_names=( + self.features.column_names + if old_column_name not in self.features.column_names + else self.features.column_names[: self.features.column_names.index(old_column_name)] + + [col.name for col in new_columns] + + self.features.column_names[self.features.column_names.index(old_column_name) + 1 :] + ), + ), + time_name=self.time.name, + ) + + def slice_rows( + self, + start: int | None = None, + end: int | None = None, + step: int = 1, + ) -> TimeSeries: + """ + Slice a part of the table into a new `TimeSeries`. + + The original time series is not modified. + + Parameters + ---------- + start : int | None + The first index of the range to be copied into a new time series, None by default. + end : int | None + The last index of the range to be copied into a new time series, None by default. + step : int + The step size used to iterate through the time series, 1 by default. + + Returns + ------- + result : TimeSeries + The resulting time series. + + Raises + ------ + IndexOutOfBoundsError + If the index is out of bounds. + """ + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().slice_rows(start, end, step), + target_name=self.target.name, + feature_names=self.features.column_names, + ), + time_name=self.time.name, + ) + + def sort_columns( + self, + comparator: Callable[[Column, Column], int] = lambda col1, col2: (col1.name > col2.name) + - (col1.name < col2.name), + ) -> TimeSeries: + """ + Sort the columns of a `TimeSeries` with the given comparator and return a new `TimeSeries`. + + The comparator is a function that takes two columns `col1` and `col2` and + returns an integer: + + * If the function returns a negative number, `col1` will be ordered before `col2`. + * If the function returns a positive number, `col1` will be ordered after `col2`. + * If the function returns 0, the original order of `col1` and `col2` will be kept. + + If no comparator is given, the columns will be sorted alphabetically by their name. + + The original time series is not modified. + + Parameters + ---------- + comparator : Callable[[Column, Column], int] + The function used to compare two columns. + + Returns + ------- + new_time_series : TimeSeries + A new time series with sorted columns. + """ + sorted_table = super().sort_columns(comparator) + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + sorted_table, + target_name=self.target.name, + feature_names=sorted( + set(sorted_table.column_names).intersection(self.features.column_names), + key={val: ix for ix, val in enumerate(sorted_table.column_names)}.__getitem__, + ), + ), + time_name=self.time.name, + ) + + def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> TimeSeries: + """ + Return a new `TimeSeries` with the provided column transformed by calling the provided transformer. + + The original time series is not modified. + + Returns + ------- + result : TimeSeries + The time series with the transformed column. + + Raises + ------ + UnknownColumnNameError + If the column does not exist. + """ + return TimeSeries._from_tagged_table( + TaggedTable._from_table( + super().transform_column(name, transformer), + target_name=self.target.name, + feature_names=self.features.column_names, + ), + time_name=self.time.name, + ) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index fa930f62f..ca193270b 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -2,6 +2,7 @@ from safeds.exceptions._data import ( ColumnIsTargetError, + ColumnIsTimeError, ColumnLengthMismatchError, ColumnSizeError, DuplicateColumnNameError, @@ -36,6 +37,7 @@ "OutOfBoundsError", # Data exceptions "ColumnIsTargetError", + "ColumnIsTimeError", "ColumnLengthMismatchError", "ColumnSizeError", "DuplicateColumnNameError", diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index 9b6904e51..a1318d8b7 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -150,6 +150,13 @@ def __init__(self, column_name: str) -> None: super().__init__(f'Column "{column_name}" is the target column and cannot be removed.') +class ColumnIsTimeError(IllegalSchemaModificationError): + """Exception raised in overriden methods of the Table class when removing Time Columns from a TimeSeries.""" + + def __init__(self, column_name: str) -> None: + super().__init__(f'Column "{column_name}" is the time column and cannot be removed.') + + class IllegalFormatError(Exception): """Exception raised when a format is not legal.""" diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index cbd8637a1..ea2a531e6 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -1,4 +1,13 @@ -from ._assertions import assert_that_tables_are_close, assert_that_tagged_tables_are_equal +from ._assertions import ( + assert_that_tables_are_close, + assert_that_tagged_tables_are_equal, + assert_that_time_series_are_equal, +) from ._resources import resolve_resource_path -__all__ = ["assert_that_tables_are_close", "assert_that_tagged_tables_are_equal", "resolve_resource_path"] +__all__ = [ + "assert_that_tables_are_close", + "assert_that_tagged_tables_are_equal", + "resolve_resource_path", + "assert_that_time_series_are_equal", +] diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index 2dcbcd1e0..308db5b4f 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -1,5 +1,5 @@ import pytest -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries def assert_that_tables_are_close(table1: Table, table2: Table) -> None: @@ -39,3 +39,21 @@ def assert_that_tagged_tables_are_equal(table1: TaggedTable, table2: TaggedTable assert table1.features == table2.features assert table1.target == table2.target assert table1 == table2 + + +def assert_that_time_series_are_equal(table1: TimeSeries, table2: TimeSeries) -> None: + """ + Assert that two time series are equal. + + Parameters + ---------- + table1: TimeSeries + The first timeseries. + table2: TimeSeries + The timeseries to compare the first timeseries to. + """ + assert table1.schema == table2.schema + assert table1.features == table2.features + assert table1.target == table2.target + assert table1.time == table2.time + assert table1 == table2 diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/__init__.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_column.py new file mode 100644 index 000000000..431fc1283 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_column.py @@ -0,0 +1,38 @@ +import pytest +from safeds.data.tabular.containers import Column, TimeSeries + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("time_series", "column", "expected_time_series"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + }, + target_name="target", + time_name="time", + feature_names=None, + ), + Column("other", [6, 7, 8]), + TimeSeries( + { + "time": [0, 1, 2], + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + "other": [6, 7, 8], + }, + target_name="target", + time_name="time", + feature_names=["feature_1"], + ), + ), + ], + ids=["add_column_as_non_feature"], +) +def test_should_add_column(time_series: TimeSeries, column: Column, expected_time_series: TimeSeries) -> None: + assert_that_time_series_are_equal(time_series.add_column(column), expected_time_series) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_column_as_feature.py new file mode 100644 index 000000000..0f190c4d4 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_column_as_feature.py @@ -0,0 +1,94 @@ +import pytest +from safeds.data.tabular.containers import Column, Table, TaggedTable, TimeSeries +from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("time_series", "column", "time_series_with_new_column"), + [ + ( + Table({"t": [1, 2], "f1": [1, 2], "target": [2, 3]}).time_columns( + target_name="target", + time_name="t", + feature_names=["f1"], + ), + Column("f2", [4, 5]), + Table({"t": [1, 2], "f1": [1, 2], "target": [2, 3], "f2": [4, 5]}).time_columns( + target_name="target", + time_name="t", + feature_names=["f1", "f2"], + ), + ), + ( + Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1]}).time_columns( + target_name="target", + time_name="other", + feature_names=["f1"], + ), + Column("f2", [4, 5]), + Table({"f1": [1, 2], "target": [2, 3], "other": [0, -1], "f2": [4, 5]}).time_columns( + target_name="target", + time_name="other", + feature_names=["f1", "f2"], + ), + ), + ], + ids=["new column as feature", "table contains a non feature/target column"], +) +def test_should_add_column_as_feature( + time_series: TimeSeries, + column: Column, + time_series_with_new_column: TimeSeries, +) -> None: + assert_that_time_series_are_equal( + time_series.add_column_as_feature(column), + time_series_with_new_column, + ) + + +@pytest.mark.parametrize( + ("tagged_table", "column", "error_msg"), + [ + ( + TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), + Column("A", [7, 8, 9]), + r"Column 'A' already exists.", + ), + ], + ids=["column_already_exists"], +) +def test_should_raise_duplicate_column_name_if_column_already_exists( + tagged_table: TaggedTable, + column: Column, + error_msg: str, +) -> None: + with pytest.raises(DuplicateColumnNameError, match=error_msg): + tagged_table.add_column_as_feature(column) + + +# here starts the second test for errors +@pytest.mark.parametrize( + ("time_series", "column", "error_msg"), + [ + ( + TimeSeries( + {"time": [0, 1, 2], "A": [1, 2, 3], "B": [4, 5, 6]}, + target_name="B", + time_name="time", + feature_names=["A"], + ), + Column("C", [5, 7, 8, 9]), + r"Expected a column of size 3 but got column of size 4.", + ), + ], + ids=["column_is_oversize"], +) +def test_should_raise_column_size_error_if_column_is_oversize( + time_series: TimeSeries, + column: Column, + error_msg: str, +) -> None: + with pytest.raises(ColumnSizeError, match=error_msg): + time_series.add_column_as_feature(column) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_columns.py new file mode 100644 index 000000000..ca86cc122 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_columns.py @@ -0,0 +1,43 @@ +import pytest +from safeds.data.tabular.containers import Column, TimeSeries + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("time_series", "columns", "expected_time_series"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + "time", + None, + ), + [Column("other", [6, 7, 8]), Column("other2", [9, 6, 3])], + TimeSeries( + { + "time": [0, 1, 2], + "feature_1": [0, 1, 2], + "target": [3, 4, 5], + "other": [6, 7, 8], + "other2": [9, 6, 3], + }, + "target", + "time", + ["feature_1"], + ), + ), + ], + ids=["add_columns_as_non_feature"], +) +def test_should_add_columns( + time_series: TimeSeries, + columns: list[Column], + expected_time_series: TimeSeries, +) -> None: + assert_that_time_series_are_equal(time_series.add_columns(columns), expected_time_series) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_columns_as_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_columns_as_features.py new file mode 100644 index 000000000..4bbbacccf --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_columns_as_features.py @@ -0,0 +1,115 @@ +import pytest +from safeds.data.tabular.containers import Column, Table, TimeSeries +from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("time_series", "columns", "time_series_with_new_columns"), + [ + ( + Table({"time": [0, 1], "f1": [1, 2], "target": [2, 3]}).time_columns( + target_name="target", + time_name="time", + feature_names=["f1"], + ), + [Column("f2", [4, 5]), Column("f3", [6, 7])], + Table({"time": [0, 1], "f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).time_columns( + target_name="target", + time_name="time", + feature_names=["f1", "f2", "f3"], + ), + ), + ( + Table({"time": [0, 1], "f1": [1, 2], "target": [2, 3]}).time_columns( + target_name="target", + time_name="time", + feature_names=["f1"], + ), + Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]), + Table({"time": [0, 1], "f1": [1, 2], "target": [2, 3], "f2": [4, 5], "f3": [6, 7]}).time_columns( + target_name="target", + time_name="time", + feature_names=["f1", "f2", "f3"], + ), + ), + ( + Table({"time": [0, 1], "f1": [1, 2], "target": [2, 3], "other": [0, -1]}).time_columns( + target_name="target", + time_name="time", + feature_names=["f1"], + ), + Table.from_columns([Column("f2", [4, 5]), Column("f3", [6, 7])]), + Table({ + "time": [0, 1], + "f1": [1, 2], + "target": [2, 3], + "other": [0, -1], + "f2": [4, 5], + "f3": [6, 7], + }).time_columns( + target_name="target", + time_name="time", + feature_names=["f1", "f2", "f3"], + ), + ), + ], + ids=["new columns as feature", "table added as features", "table contains a non feature/target column"], +) +def test_add_columns_as_features( + time_series: TimeSeries, + columns: list[Column] | Table, + time_series_with_new_columns: TimeSeries, +) -> None: + assert_that_time_series_are_equal(time_series.add_columns_as_features(columns), time_series_with_new_columns) + + +@pytest.mark.parametrize( + ("time_series", "columns", "error_msg"), + [ + ( + TimeSeries( + {"time": [0, 1, 2], "A": [1, 2, 3], "B": [4, 5, 6]}, + target_name="B", + time_name="time", + feature_names=["A"], + ), + [Column("A", [7, 8, 9]), Column("D", [10, 11, 12])], + r"Column 'A' already exists.", + ), + ], + ids=["column_already_exist"], +) +def test_add_columns_raise_duplicate_column_name_if_column_already_exist( + time_series: TimeSeries, + columns: list[Column] | Table, + error_msg: str, +) -> None: + with pytest.raises(DuplicateColumnNameError, match=error_msg): + time_series.add_columns_as_features(columns) + + +@pytest.mark.parametrize( + ("time_series", "columns", "error_msg"), + [ + ( + TimeSeries( + {"time": [0, 1, 2], "A": [1, 2, 3], "B": [4, 5, 6]}, + target_name="B", + time_name="time", + feature_names=["A"], + ), + [Column("C", [5, 7, 8, 9]), Column("D", [4, 10, 11, 12])], + r"Expected a column of size 3 but got column of size 4.", + ), + ], + ids=["columns_are_oversize"], +) +def test_should_raise_column_size_error_if_columns_are_oversize( + time_series: TimeSeries, + columns: list[Column] | Table, + error_msg: str, +) -> None: + with pytest.raises(ColumnSizeError, match=error_msg): + time_series.add_columns_as_features(columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_row.py new file mode 100644 index 000000000..a14aab68c --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_row.py @@ -0,0 +1,82 @@ +import pytest +from safeds.data.tabular.containers import Row, TimeSeries +from safeds.exceptions import UnknownColumnNameError + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("time_series", "row", "expected"), + [ + ( + TimeSeries( + { + "time": [0, 1], + "feature": [0, 1], + "target": [3, 4], + }, + "target", + "time", + ), + Row( + { + "time": 2, + "feature": 2, + "target": 5, + }, + ), + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, 1, 2], + "target": [3, 4, 5], + }, + "target", + "time", + ), + ), + ], + ids=["add_row"], +) +def test_should_add_row(time_series: TimeSeries, row: Row, expected: TimeSeries) -> None: + assert_that_time_series_are_equal(time_series.add_row(row), expected) + + +@pytest.mark.parametrize( + ("time_series", "row", "error_msg"), + [ + ( + TimeSeries({"time": [], "feature": [], "target": []}, "target", "time", ["feature"]), + Row({"feat": None, "targ": None}), + r"Could not find column\(s\) 'time, feature, target'.", + ), + ], + ids=["columns_missing"], +) +def test_should_raise_an_error_if_row_schema_invalid( + time_series: TimeSeries, + row: Row, + error_msg: str, +) -> None: + with pytest.raises(UnknownColumnNameError, match=error_msg): + time_series.add_row(row) + + +# the original tests throw a warning here aswell( test_add_row in tagged_table) +@pytest.mark.parametrize( + ("time_series", "row", "expected_time_series"), + [ + ( + TimeSeries({"time": [], "feature": [], "target": []}, "target", "time"), + Row({"time": 0, "feature": 2, "target": 5}), + TimeSeries({"time": [0], "feature": [2], "target": [5]}, "target", "time"), + ), + ], + ids=["empty_feature_column"], +) +def test_should_add_row_to_empty_table( + time_series: TimeSeries, + row: Row, + expected_time_series: TimeSeries, +) -> None: + assert_that_time_series_are_equal(time_series.add_row(row), expected_time_series) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_rows.py new file mode 100644 index 000000000..641a2ec05 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_add_rows.py @@ -0,0 +1,65 @@ +import pytest +from safeds.data.tabular.containers import Row, Table, TimeSeries +from safeds.exceptions import UnknownColumnNameError + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("time_series", "rows", "expected"), + [ + ( + TimeSeries( + { + "time": [0, 1], + "feature": [0, 1], + "target": [4, 5], + }, + "target", + "time", + ), + [ + Row( + { + "time": 2, + "feature": 2, + "target": 6, + }, + ), + Row({"time": 3, "feature": 3, "target": 7}), + ], + TimeSeries( + { + "time": [0, 1, 2, 3], + "feature": [0, 1, 2, 3], + "target": [4, 5, 6, 7], + }, + "target", + "time", + ), + ), + ], + ids=["add_rows"], +) +def test_should_add_rows(time_series: TimeSeries, rows: list[Row], expected: TimeSeries) -> None: + assert_that_time_series_are_equal(time_series.add_rows(rows), expected) + + +@pytest.mark.parametrize( + ("time_series", "rows", "error_msg"), + [ + ( + TimeSeries({"time": [], "feature": [], "target": []}, "target", "time", ["feature"]), + [Row({"feat": None, "targ": None}), Row({"targ": None, "feat": None})], + r"Could not find column\(s\) 'time, feature, target'.", + ), + ], + ids=["columns_missing"], +) +def test_should_raise_an_error_if_rows_schemas_are_invalid( + time_series: TimeSeries, + rows: list[Row] | Table, + error_msg: str, +) -> None: + with pytest.raises(UnknownColumnNameError, match=error_msg): + time_series.add_rows(rows) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_as_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_as_table.py new file mode 100644 index 000000000..443e6f7cf --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_as_table.py @@ -0,0 +1,58 @@ +import pytest +from safeds.data.tabular.containers import Table, TimeSeries + + +@pytest.mark.parametrize( + ("time_series", "expected"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + "target", + "time", + ["feature_1", "feature_2"], + ), + Table( + { + "time": [0, 1, 2], + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "other": [3, 9, 12], + "target": [1, 3, 2], + }, + "target", + "time", + ["feature_1", "feature_2"], + ), + Table( + { + "time": [0, 1, 2], + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "other": [3, 9, 12], + "target": [1, 3, 2], + }, + ), + ), + ], + ids=["normal", "table_with_column_as_non_feature"], +) +def test_should_return_table(time_series: TimeSeries, expected: Table) -> None: + table = time_series._as_table() + assert table.schema == expected.schema + assert table == expected diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_features.py new file mode 100644 index 000000000..aa9631fc5 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_features.py @@ -0,0 +1,41 @@ +import pytest +from safeds.data.tabular.containers import Table, TimeSeries + + +@pytest.mark.parametrize( + ("time_series", "features"), + [ + ( + TimeSeries( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + target_name="T", + time_name="time", + ), + Table({"A": [1, 4], "B": [2, 5], "C": [3, 6]}), + ), + ( + TimeSeries( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + target_name="T", + time_name="time", + feature_names=["A", "C"], + ), + Table({"A": [1, 4], "C": [3, 6]}), + ), + ], + ids=["only_target_and_features", "target_features_and_other"], +) +def test_should_return_features(time_series: TimeSeries, features: Table) -> None: + assert time_series.features == features diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_filter_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_filter_rows.py new file mode 100644 index 000000000..a7d38e257 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_filter_rows.py @@ -0,0 +1,124 @@ +from collections.abc import Callable + +import pytest +from safeds.data.tabular.containers import Row, TimeSeries + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("time_series", "expected", "query"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + "target", + "time", + ), + TimeSeries( + { + "time": [0, 2], + "feature_1": [3, 6], + "feature_2": [6, 9], + "target": [1, 2], + }, + "target", + "time", + ), + lambda row: all(row.get_value(col) < 10 for col in row.column_names), + ), + ( + TimeSeries( + { + "time": [0, 1, 2, 3], + "feature_1": [3, 9, 6, 2], + "feature_2": [6, 12, 9, 3], + "other": [1, 2, 3, 10], + "target": [1, 3, 2, 4], + }, + "target", + "time", + ["feature_1", "feature_2"], + ), + TimeSeries( + { + "time": [ + 0, + 2, + ], + "feature_1": [3, 6], + "feature_2": [6, 9], + "other": [1, 3], + "target": [1, 2], + }, + "target", + "time", + ["feature_1", "feature_2"], + ), + lambda row: all(row.get_value(col) < 10 for col in row.column_names), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + "target", + "time", + ), + TimeSeries( + { + "time": [0, 1, 2], + "feature_1": [3, 9, 6], + "feature_2": [6, 12, 9], + "target": [1, 3, 2], + }, + "target", + "time", + ), + lambda row: all(row.get_value(col) < 20 for col in row.column_names), + ), + ( + TimeSeries( + { + "time": [0, 1, 2, 3], + "feature_1": [3, 9, 6, 2], + "feature_2": [6, 12, 9, 3], + "other": [1, 2, 3, 10], + "target": [1, 3, 2, 4], + }, + "target", + "time", + ["feature_1", "feature_2"], + ), + TimeSeries( + { + "time": [0, 1, 2, 3], + "feature_1": [3, 9, 6, 2], + "feature_2": [6, 12, 9, 3], + "other": [1, 2, 3, 10], + "target": [1, 3, 2, 4], + }, + "target", + "time", + ["feature_1", "feature_2"], + ), + lambda row: all(row.get_value(col) < 20 for col in row.column_names), + ), + ], + ids=[ + "remove_rows_with_values_greater_9", + "remove_rows_with_values_greater_9_non_feature_columns", + "remove_no_rows", + "remove_no_rows_non_feature_columns", + ], +) +def test_should_filter_rows(time_series: TimeSeries, expected: TimeSeries, query: Callable[[Row], bool]) -> None: + assert_that_time_series_are_equal(time_series.filter_rows(query), expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_from_table_to_time_series.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_from_table_to_time_series.py new file mode 100644 index 000000000..73e9f602b --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_from_table_to_time_series.py @@ -0,0 +1,214 @@ +import pytest +from safeds.data.tabular.containers import Table, TimeSeries +from safeds.exceptions import UnknownColumnNameError + + +@pytest.mark.parametrize( + ("table", "target_name", "time_name", "feature_names", "error", "error_msg"), + [ + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "time", + ["A", "B", "C", "D", "E"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D, E'", + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "D", + "time", + ["A", "B", "C"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D'", + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "A", + "time", + ["A", "B", "C"], + ValueError, + r"Column 'A' cannot be both feature and target.", + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "A", + "time", + [], + ValueError, + r"At least one feature column must be specified.", + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + }, + ), + "A", + "time", + None, + ValueError, + r"At least one feature column must be specified.", + ), + ( + Table( + { + "r": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "time", + ["A", "B", "C"], + UnknownColumnNameError, + r"Could not find column\(s\) 'time'", + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "A", + ["A", "B", "C"], + ValueError, + r"Column 'A' can not be time and feature column.", + ), + ], + ids=[ + "feature_does_not_exist", + "target_does_not_exist", + "target_and_feature_overlap", + "features_are_empty-explicitly", + "features_are_empty_implicitly", + "time_does_not_exist", + "time_is_also_feature", + ], +) +def test_should_raise_error( + table: Table, + target_name: str, + time_name: str, + feature_names: list[str] | None, + error: type[Exception], + error_msg: str, +) -> None: + with pytest.raises(error, match=error_msg): + TimeSeries._from_table_to_time_series( + table, + target_name=target_name, + time_name=time_name, + feature_names=feature_names, + ) + + +@pytest.mark.parametrize( + ("table", "target_name", "time_name", "feature_names"), + [ + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "time", + ["A", "B", "C"], + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "time", + ["A", "C"], + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "time", + None, + ), + ], + ids=["create_tagged_table", "tagged_table_not_all_columns_are_features", "tagged_table_with_feature_names_as_None"], +) +def test_should_create_a_tagged_table( + table: Table, + target_name: str, + time_name: str, + feature_names: list[str] | None, +) -> None: + time_series = TimeSeries._from_table_to_time_series( + table, + target_name=target_name, + time_name=time_name, + feature_names=feature_names, + ) + feature_names = ( + feature_names if feature_names is not None else table.remove_columns([target_name, time_name]).column_names + ) + assert isinstance(time_series, TimeSeries) + assert time_series._features.column_names == feature_names + assert time_series._target.name == target_name + assert time_series._features == table.keep_only_columns(feature_names) + assert time_series._target == table.get_column(target_name) + assert time_series.time == table.get_column(time_name) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_from_tagged_table.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_from_tagged_table.py new file mode 100644 index 000000000..8a1814822 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_from_tagged_table.py @@ -0,0 +1,194 @@ +import pytest +from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries +from safeds.exceptions import UnknownColumnNameError + + +@pytest.mark.parametrize( + ("table", "target_name", "time_name", "feature_names", "error", "error_msg"), + [ + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "time", + ["A", "B", "C", "D", "E"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D, E'", + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "D", + "time", + ["A", "B", "C"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D'", + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "A", + "time", + ["A", "B", "C"], + ValueError, + r"Column 'A' cannot be both feature and target.", + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "A", + "time", + [], + ValueError, + r"At least one feature column must be specified.", + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "time", + "time", + ["A", "B", "C"], + ValueError, + r"Column 'time' cannot be both time column and target.", + ), + ( + Table( + { + "r": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "time", + ["A", "B", "C"], + UnknownColumnNameError, + r"Could not find column\(s\) 'time'", + ), + ], + ids=[ + "feature_does_not_exist", + "target_does_not_exist", + "target_and_feature_overlap", + "features_are_empty-explicitly", + "time_name_is_target", + "time_does_not_exist", + ], +) +def test_should_raise_error( + table: Table, + target_name: str, + time_name: str, + feature_names: list[str] | None, + error: type[Exception], + error_msg: str, +) -> None: + with pytest.raises(error, match=error_msg): + TimeSeries._from_tagged_table( + TaggedTable._from_table(table, target_name=target_name, feature_names=feature_names), + time_name=time_name, + ) + + +@pytest.mark.parametrize( + ("table", "target_name", "time_name", "feature_names"), + [ + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "time", + ["A", "B", "C"], + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "time", + ["A", "C"], + ), + ( + Table( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + ), + "T", + "time", + None, + ), + ], + ids=["create_tagged_table", "tagged_table_not_all_columns_are_features", "tagged_table_with_feature_names_as_None"], +) +def test_should_create_a_time_series( + table: Table, + target_name: str, + time_name: str, + feature_names: list[str] | None, +) -> None: + tagged_table = TaggedTable._from_table(table, target_name=target_name, feature_names=feature_names) + time_series = TimeSeries._from_tagged_table(tagged_table, time_name=time_name) + feature_names = ( + feature_names if feature_names is not None else table.remove_columns([target_name, time_name]).column_names + ) + assert isinstance(time_series, TimeSeries) + assert time_series._features.column_names == feature_names + assert time_series._target.name == target_name + assert time_series._features == table.keep_only_columns(feature_names) + assert time_series._target == table.get_column(target_name) + assert time_series.time == table.get_column(time_name) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_init.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_init.py new file mode 100644 index 000000000..8c7619fcc --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_init.py @@ -0,0 +1,185 @@ +import pytest +from safeds.data.tabular.containers import Table, TimeSeries +from safeds.exceptions import UnknownColumnNameError + + +@pytest.mark.parametrize( + ("data", "time_name", "target_name", "feature_names", "error", "error_msg"), + [ + ( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "time", + "T", + ["A", "B", "C", "D", "E"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D, E'", + ), + ( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "time", + "D", + ["A", "B", "C"], + UnknownColumnNameError, + r"Could not find column\(s\) 'D'", + ), + ( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "time", + "A", + ["A", "B", "C"], + ValueError, + r"Column 'A' cannot be both feature and target.", + ), + ( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "time", + "D", + [], + ValueError, + r"At least one feature column must be specified.", + ), + ( + { + "time": [0, 1], + "A": [1, 4], + }, + "time", + "A", + None, + ValueError, + r"At least one feature column must be specified.", + ), + ( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "random", + "B", + ["A"], + UnknownColumnNameError, + r"Could not find column\(s\) 'random'.", + ), + ( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "time", + "T", + ["A", "B", "C", "time"], + ValueError, + "Column 'time' can not be time and feature column.", + ), + ], + ids=[ + "feature_does_not_exist", + "target_does_not_exist", + "target_and_feature_overlap", + "features_are_empty-explicitly", + "features_are_empty_implicitly", + "time_column_does_not_exist", + "time_is_also_feature", + ], +) +def test_should_raise_error( + data: dict[str, list[int]], + time_name: str, + target_name: str, + feature_names: list[str] | None, + error: type[Exception], + error_msg: str, +) -> None: + with pytest.raises(error, match=error_msg): + TimeSeries(data, target_name=target_name, time_name=time_name, feature_names=feature_names) + + +@pytest.mark.parametrize( + ("data", "time_name", "target_name", "feature_names"), + [ + ( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "time", + "T", + ["A", "B", "C"], + ), + ( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "time", + "T", + ["A", "C"], + ), + ( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + "time", + "T", + None, + ), + ], + ids=["create_tagged_table", "tagged_table_not_all_columns_are_features", "tagged_table_with_feature_names_as_None"], +) +def test_should_create_a_time_series( + data: dict[str, list[int]], + time_name: str, + target_name: str, + feature_names: list[str] | None, +) -> None: + time_series = TimeSeries(data, target_name=target_name, time_name=time_name, feature_names=feature_names) + if feature_names is None: + feature_names = list(data.keys()) + feature_names.remove(target_name) + feature_names.remove(time_name) + assert isinstance(time_series, TimeSeries) + assert time_series._features.column_names == feature_names + assert time_series._target.name == target_name + assert time_series._features == Table(data).keep_only_columns(feature_names) + assert time_series._target == Table(data).get_column(target_name) + assert time_series.time == Table(data).get_column(time_name) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_keep_only_columns.py new file mode 100644 index 000000000..190c387d1 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_keep_only_columns.py @@ -0,0 +1,165 @@ +import pytest +from safeds.data.tabular.containers import Table, TimeSeries +from safeds.exceptions import IllegalSchemaModificationError + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("table", "column_names", "expected"), + [ + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ), + ["feat1", "target", "time"], + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat1": [1, 2, 3], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ), + ), + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "other": [3, 4, 5], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ), + ["feat1", "other", "target", "time"], + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat1": [1, 2, 3], + "other": [3, 4, 5], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ), + ), + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "other": [3, 4, 5], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ), + ["feat1", "target", "time"], + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat1": [1, 2, 3], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ), + ), + ], + ids=["keep_feature_and_target_column", "keep_non_feature_column", "don't_keep_non_feature_column"], +) +def test_should_return_table(table: TimeSeries, column_names: list[str], expected: TimeSeries) -> None: + new_table = table.keep_only_columns(column_names) + assert_that_time_series_are_equal(new_table, expected) + + +@pytest.mark.parametrize( + ("table", "column_names", "error_msg"), + [ + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "other": [3, 5, 7], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ["feat1", "feat2"], + ), + ["feat1", "feat2"], + r"Illegal schema modification: Must keep the target column.", + ), + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "other": [3, 5, 7], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ["feat1", "feat2"], + ), + ["target", "other"], + r"Illegal schema modification: Must keep at least one feature column.", + ), + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat1": [1, 2, 3], + "feat2": [4, 5, 6], + "other": [3, 5, 7], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ["feat1", "feat2"], + ), + ["target", "feat1", "other"], + r"Illegal schema modification: Must keep the time column.", + ), + ], + ids=["table_remove_target", "table_remove_all_features", "table_remove_time"], +) +def test_should_raise_illegal_schema_modification(table: TimeSeries, column_names: list[str], error_msg: str) -> None: + with pytest.raises( + IllegalSchemaModificationError, + match=error_msg, + ): + table.keep_only_columns(column_names) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns.py new file mode 100644 index 000000000..ccadbd877 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns.py @@ -0,0 +1,229 @@ +import pytest +from safeds.data.tabular.containers import Table, TimeSeries +from safeds.exceptions import ColumnIsTargetError, ColumnIsTimeError, IllegalSchemaModificationError + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("table", "columns", "expected"), + [ + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ["feat_1", "feat_2"], + ), + ["feat_2"], + TimeSeries._from_table_to_time_series( + Table({ + "time": [0, 1, 2], + "feat_1": [1, 2, 3], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9], + }), + "target", + "time", + ["feat_1"], + ), + ), + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ["feat_1", "feat_2"], + ), + ["non_feat_2"], + TimeSeries._from_table_to_time_series( + Table({ + "time": [0, 1, 2], + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "target": [7, 8, 9], + }), + "target", + "time", + ["feat_1", "feat_2"], + ), + ), + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ["feat_1", "feat_2"], + ), + ["non_feat_1", "non_feat_2"], + TimeSeries._from_table_to_time_series( + Table({"time": [0, 1, 2], "feat_1": [1, 2, 3], "feat_2": [4, 5, 6], "target": [7, 8, 9]}), + "target", + "time", + ["feat_1", "feat_2"], + ), + ), + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat_1": [1, 2, 3], + "feat_2": [4, 5, 6], + "non_feat_1": [2, 4, 6], + "non_feat_2": [3, 6, 9], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ["feat_1", "feat_2"], + ), + ["feat_2", "non_feat_2"], + TimeSeries._from_table_to_time_series( + Table({"time": [0, 1, 2], "feat_1": [1, 2, 3], "non_feat_1": [2, 4, 6], "target": [7, 8, 9]}), + "target", + "time", + ["feat_1"], + ), + ), + ( + TimeSeries._from_table_to_time_series( + Table( + { + "time": [0, 1, 2], + "feat_1": [1, 2, 3], + "non_feat_1": [2, 4, 6], + "target": [7, 8, 9], + }, + ), + "target", + "time", + ["feat_1"], + ), + [], + TimeSeries._from_table_to_time_series( + Table({"time": [0, 1, 2], "feat_1": [1, 2, 3], "non_feat_1": [2, 4, 6], "target": [7, 8, 9]}), + "target", + "time", + ["feat_1"], + ), + ), + ], + ids=[ + "remove_feature", + "remove_non_feature", + "remove_all_non_features", + "remove_some_feat_and_some_non_feat", + "remove_nothing", + ], +) +def test_should_remove_columns(table: TimeSeries, columns: list[str], expected: TimeSeries) -> None: + new_table = table.remove_columns(columns) + assert_that_time_series_are_equal(new_table, expected) + + +@pytest.mark.parametrize( + ("table", "columns", "error", "error_msg"), + [ + ( + TimeSeries._from_table_to_time_series( + Table({"time": [0, 1, 2], "feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}), + "target", + "time", + ["feat"], + ), + ["target"], + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ), + ( + TimeSeries._from_table_to_time_series( + Table({"time": [0, 1, 2], "feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}), + "target", + "time", + ["feat"], + ), + ["non_feat", "target"], + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ), + ( + TimeSeries._from_table_to_time_series( + Table({"time": [0, 1, 2], "feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}), + "target", + "time", + ["feat"], + ), + ["feat"], + IllegalSchemaModificationError, + r"Illegal schema modification: You cannot remove every feature column.", + ), + ( + TimeSeries._from_table_to_time_series( + Table({"time": [0, 1, 2], "feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}), + "target", + "time", + ["feat"], + ), + ["feat", "non_feat"], + IllegalSchemaModificationError, + r"Illegal schema modification: You cannot remove every feature column.", + ), + ( + TimeSeries._from_table_to_time_series( + Table({"time": [0, 1, 2], "feat": [1, 2, 3], "non_feat": [1, 2, 3], "target": [4, 5, 6]}), + "target", + "time", + ["feat"], + ), + ["time"], + ColumnIsTimeError, + r'Illegal schema modification: Column "time" is the time column and cannot be removed.', + ), + ], + ids=[ + "remove_only_target", + "remove_non_feat_and_target", + "remove_all_features", + "remove_non_feat_and_all_features", + "remove_time_column", + ], +) +def test_should_raise_in_remove_columns( + table: TimeSeries, + columns: list[str], + error: type[Exception], + error_msg: str, +) -> None: + with pytest.raises(error, match=error_msg): + table.remove_columns(columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns_with_missing_values.py new file mode 100644 index 000000000..01958f2dc --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns_with_missing_values.py @@ -0,0 +1,221 @@ +import pytest +from safeds.data.tabular.containers import TimeSeries +from safeds.exceptions import ColumnIsTargetError, ColumnIsTimeError, IllegalSchemaModificationError + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("table", "expected"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_complete": [0, 1, 2], + "feature_incomplete": [3, None, 5], + "non_feature_complete": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_complete", "feature_incomplete"], + ), + TimeSeries( + { + "time": [0, 1, 2], + "feature_complete": [0, 1, 2], + "non_feature_complete": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_complete"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_complete": [0, 1, 2], + "non_feature_complete": [7, 8, 9], + "non_feature_incomplete": [3, None, 5], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_complete"], + ), + TimeSeries( + { + "time": [0, 1, 2], + "feature_complete": [0, 1, 2], + "non_feature_complete": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_complete"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_complete": [0, 1, 2], + "non_feature_complete": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_complete"], + ), + TimeSeries( + { + "time": [0, 1, 2], + "feature_complete": [0, 1, 2], + "non_feature_complete": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_complete"], + ), + ), + ], + ids=["incomplete_feature", "incomplete_non_feature", "all_complete"], +) +def test_should_remove_columns_with_non_numerical_values(table: TimeSeries, expected: TimeSeries) -> None: + new_table = table.remove_columns_with_missing_values() + assert_that_time_series_are_equal(new_table, expected) + + +@pytest.mark.parametrize( + ("table", "error", "error_msg"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, 1, 2], + "non_feature": [1, 2, 3], + "target": [3, None, 5], + }, + "target", + "time", + ["feature"], + ), + ColumnIsTargetError, + 'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, None, 2], + "non_feature": [1, 2, 3], + "target": [None, 4, 5], + }, + "target", + "time", + ["feature"], + ), + ColumnIsTargetError, + 'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ), + ( + TimeSeries( + { + "time": [0, None, 2], + "feature": [0, 1, 2], + "non_feature": [1, 2, 3], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature"], + ), + ColumnIsTimeError, + 'Illegal schema modification: Column "time" is the time column and cannot be removed.', + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, 1, 2], + "non_feature": [1, 2, 3], + "target": [3, 4, None], + }, + "target", + "time", + ["feature"], + ), + ColumnIsTargetError, + 'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, None, 2], + "non_feature": [1, None, 3], + "target": [3, None, 5], + }, + "target", + "time", + ["feature"], + ), + ColumnIsTargetError, + 'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, None, 2], + "non_feature": [1, 2, 3], + "target": [3, 2, 5], + }, + "target", + "time", + ["feature"], + ), + IllegalSchemaModificationError, + "Illegal schema modification: You cannot remove every feature column.", + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, None, 2], + "non_feature": [1, None, 3], + "target": [3, 2, 5], + }, + "target", + "time", + ["feature"], + ), + IllegalSchemaModificationError, + "Illegal schema modification: You cannot remove every feature column.", + ), + ], + ids=[ + "only_target_incomplete", + "also_feature_incomplete", + "time_is_incomplete", + "also_non_feature_incomplete", + "all_incomplete", + "all_features_incomplete", + "all_features_and_non_feature_incomplete", + ], +) +def test_should_raise_in_remove_columns_with_missing_values( + table: TimeSeries, + error: type[Exception], + error_msg: str, +) -> None: + with pytest.raises( + error, + match=error_msg, + ): + table.remove_columns_with_missing_values() diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns_with_non_numerical_values.py new file mode 100644 index 000000000..d735edb51 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_columns_with_non_numerical_values.py @@ -0,0 +1,218 @@ +import pytest +from safeds.data.tabular.containers import TimeSeries +from safeds.exceptions import ColumnIsTargetError, ColumnIsTimeError, IllegalSchemaModificationError + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("table", "expected"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_numerical": [0, 1, 2], + "feature_non_numerical": ["a", "b", "c"], + "non_feature_numerical": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_numerical", "feature_non_numerical"], + ), + TimeSeries( + { + "time": [0, 1, 2], + "feature_numerical": [0, 1, 2], + "non_feature_numerical": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_numerical"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_numerical": [0, 1, 2], + "non_feature_numerical": [7, 8, 9], + "non_feature_non_numerical": ["a", "b", "c"], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_numerical"], + ), + TimeSeries( + { + "time": [0, 1, 2], + "feature_numerical": [0, 1, 2], + "non_feature_numerical": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_numerical"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_numerical": [0, 1, 2], + "non_feature_numerical": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_numerical"], + ), + TimeSeries( + { + "time": [0, 1, 2], + "feature_numerical": [0, 1, 2], + "non_feature_numerical": [7, 8, 9], + "target": [3, 4, 5], + }, + "target", + "time", + ["feature_numerical"], + ), + ), + ], + ids=["non_numerical_feature", "non_numerical_non_feature", "all_numerical"], +) +def test_should_remove_columns_with_non_numerical_values(table: TimeSeries, expected: TimeSeries) -> None: + new_table = table.remove_columns_with_non_numerical_values() + assert_that_time_series_are_equal(new_table, expected) + + +@pytest.mark.parametrize( + ("table", "error", "error_msg"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, 1, 2], + "non_feature": [1, 2, 3], + "target": ["a", "b", "c"], + }, + "target", + "time", + ["feature"], + ), + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, "x", 2], + "non_feature": [1, 2, 3], + "target": ["a", "b", "c"], + }, + "target", + "time", + ["feature"], + ), + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, 1, 2], + "non_feature": [1, "x", 3], + "target": ["a", "b", "c"], + }, + "target", + "time", + ["feature"], + ), + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ), + ( + TimeSeries( + { + "time": ["!", "x", "2"], + "feature": [0, 1, 2], + "non_feature": [1, "x", 3], + "target": [1, 2, 3], + }, + "target", + "time", + ["feature"], + ), + ColumnIsTimeError, + r'Illegal schema modification: Column "time" is the time column and cannot be removed.', + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, "x", 2], + "non_feature": [1, "x", 3], + "target": ["a", "b", "c"], + }, + "target", + "time", + ["feature"], + ), + ColumnIsTargetError, + r'Illegal schema modification: Column "target" is the target column and cannot be removed.', + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, "a", 2], + "non_feature": [1, 2, 3], + "target": [3, 2, 5], + }, + "target", + "time", + ["feature"], + ), + IllegalSchemaModificationError, + r"Illegal schema modification: You cannot remove every feature column.", + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, "a", 2], + "non_feature": [1, "b", 3], + "target": [3, 2, 5], + }, + "target", + "time", + ["feature"], + ), + IllegalSchemaModificationError, + r"Illegal schema modification: You cannot remove every feature column.", + ), + ], + ids=[ + "only_target_non_numerical", + "also_feature_non_numerical", + "also_non_feature_non_numerical", + "time_non_numerical", + "all_non_numerical", + "all_features_non_numerical", + "all_features_and_non_feature_non_numerical", + ], +) +def test_should_raise_in_remove_columns_with_non_numerical_values( + table: TimeSeries, + error: type[Exception], + error_msg: str, +) -> None: + with pytest.raises(error, match=error_msg): + table.remove_columns_with_non_numerical_values() diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_duplicate_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_duplicate_rows.py new file mode 100644 index 000000000..a4e0a3426 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_duplicate_rows.py @@ -0,0 +1,55 @@ +import pytest +from safeds.data.tabular.containers import TimeSeries + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("table", "expected"), + [ + ( + TimeSeries( + { + "time": [0, 0, 1], + "feature": [0, 0, 1], + "target": [2, 2, 3], + }, + "target", + "time", + ), + TimeSeries( + { + "time": [0, 1], + "feature": [0, 1], + "target": [2, 3], + }, + "target", + "time", + ), + ), + ( + TimeSeries( + { + "time": [0, 0, 1], + "feature": [0, 1, 2], + "target": [2, 2, 3], + }, + "target", + "time", + ), + TimeSeries( + { + "time": [0, 0, 1], + "feature": [0, 1, 2], + "target": [2, 2, 3], + }, + "target", + "time", + ), + ), + ], + ids=["with_duplicate_rows", "without_duplicate_rows"], +) +def test_should_remove_duplicate_rows(table: TimeSeries, expected: TimeSeries) -> None: + new_table = table.remove_duplicate_rows() + assert_that_time_series_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_rows_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_rows_with_missing_values.py new file mode 100644 index 000000000..078151ac9 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_rows_with_missing_values.py @@ -0,0 +1,55 @@ +import pytest +from safeds.data.tabular.containers import TimeSeries + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("table", "expected"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0.0, None, 2.0], + "target": [3.0, 4.0, 5.0], + }, + "target", + "time", + ), + TimeSeries( + { + "time": [0, 2], + "feature": [0.0, 2.0], + "target": [3.0, 5.0], + }, + "target", + "time", + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0.0, 1.0, 2.0], + "target": [3.0, 4.0, 5.0], + }, + "target", + "time", + ), + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0.0, 1.0, 2.0], + "target": [3.0, 4.0, 5.0], + }, + "target", + "time", + ), + ), + ], + ids=["with_missing_values", "without_missing_values"], +) +def test_should_remove_rows_with_missing_values(table: TimeSeries, expected: TimeSeries) -> None: + new_table = table.remove_rows_with_missing_values() + assert_that_time_series_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_rows_with_outliers.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_rows_with_outliers.py new file mode 100644 index 000000000..8d206c65c --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_remove_rows_with_outliers.py @@ -0,0 +1,55 @@ +import pytest +from safeds.data.tabular.containers import TimeSeries + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("table", "expected"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + "feature": [1.0, 11.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + "target", + "time", + ), + TimeSeries( + { + "time": [0, 2, 3, 4, 5, 6, 7, 8, 9], + "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + "target", + "time", + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + "target", + "time", + ), + TimeSeries( + { + "time": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + "feature": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "target": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + "target", + "time", + ), + ), + ], + ids=["with_outliers", "no_outliers"], +) +def test_should_remove_rows_with_outliers(table: TimeSeries, expected: TimeSeries) -> None: + new_table = table.remove_rows_with_outliers() + assert_that_time_series_are_equal(new_table, expected) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_rename_column.py new file mode 100644 index 000000000..a0214b4ab --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_rename_column.py @@ -0,0 +1,124 @@ +import pytest +from safeds.data.tabular.containers import TimeSeries + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("original_table", "old_column_name", "new_column_name", "result_table"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "no_feature": [2, 3, 4], + "target": [3, 4, 5], + }, + target_name="target", + time_name="time", + feature_names=["feature_old"], + ), + "feature_old", + "feature_new", + TimeSeries( + { + "time": [0, 1, 2], + "feature_new": [0, 1, 2], + "no_feature": [2, 3, 4], + "target": [3, 4, 5], + }, + target_name="target", + time_name="time", + feature_names=["feature_new"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, 1, 2], + "no_feature": [2, 3, 4], + "target_old": [3, 4, 5], + }, + target_name="target_old", + time_name="time", + feature_names=["feature"], + ), + "target_old", + "target_new", + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, 1, 2], + "no_feature": [2, 3, 4], + "target_new": [3, 4, 5], + }, + target_name="target_new", + time_name="time", + feature_names=["feature"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target": [3, 4, 5], + }, + target_name="target", + time_name="time", + feature_names=["feature"], + ), + "no_feature_old", + "no_feature_new", + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, 1, 2], + "no_feature_new": [2, 3, 4], + "target": [3, 4, 5], + }, + target_name="target", + time_name="time", + feature_names=["feature"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target": [3, 4, 5], + }, + target_name="target", + time_name="time", + feature_names=["feature"], + ), + "time", + "new_time", + TimeSeries( + { + "new_time": [0, 1, 2], + "feature": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target": [3, 4, 5], + }, + target_name="target", + time_name="new_time", + feature_names=["feature"], + ), + ), + ], + ids=["rename_feature_column", "rename_target_column", "rename_non_feature_column", "rename_time_column"], +) +def test_should_rename_column( + original_table: TimeSeries, + old_column_name: str, + new_column_name: str, + result_table: TimeSeries, +) -> None: + new_table = original_table.rename_column(old_column_name, new_column_name) + assert_that_time_series_are_equal(new_table, result_table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_replace_column.py new file mode 100644 index 000000000..818f6580a --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_replace_column.py @@ -0,0 +1,248 @@ +import pytest +from safeds.data.tabular.containers import Column, TimeSeries +from safeds.exceptions import IllegalSchemaModificationError + +from tests.helpers import assert_that_time_series_are_equal + + +@pytest.mark.parametrize( + ("original_table", "new_columns", "column_name_to_be_replaced", "result_table"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ["feature_old"], + ), + [Column("feature_new", [2, 1, 0])], + "feature_old", + TimeSeries( + { + "time": [0, 1, 2], + "feature_new": [2, 1, 0], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ["feature_new"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ["feature_old"], + ), + [Column("feature_new_a", [2, 1, 0]), Column("feature_new_b", [4, 2, 0])], + "feature_old", + TimeSeries( + { + "time": [0, 1, 2], + "feature_new_a": [2, 1, 0], + "feature_new_b": [4, 2, 0], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ["feature_new_a", "feature_new_b"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ["feature_old"], + ), + [Column("no_feature_new", [2, 1, 0])], + "no_feature_old", + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "no_feature_new": [2, 1, 0], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ["feature_old"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ["feature_old"], + ), + [Column("no_feature_new_a", [2, 1, 0]), Column("no_feature_new_b", [4, 2, 0])], + "no_feature_old", + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "no_feature_new_a": [2, 1, 0], + "no_feature_new_b": [4, 2, 0], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ["feature_old"], + ), + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ["feature_old"], + ), + [Column("target_new", [2, 1, 0])], + "target_old", + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target_new": [2, 1, 0], + }, + "target_new", + "time", + ["feature_old"], + ), + ), + ( + TimeSeries( + { + "time_old": [0, 1, 2], + "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + "time_old", + ["feature_old"], + ), + [Column("time_new", [1, 2, 3])], + "time_old", + TimeSeries( + { + "time_new": [1, 2, 3], + "feature_old": [0, 1, 2], + "no_feature_old": [2, 3, 4], + "target_old": [3, 4, 5], + }, + "target_old", + "time_new", + ["feature_old"], + ), + ), + ], + ids=[ + "replace_feature_column_with_one", + "replace_feature_column_with_multiple", + "replace_non_feature_column_with_one", + "replace_non_feature_column_with_multiple", + "replace_target_column", + "replace_time_column", + ], +) +def test_should_replace_column( + original_table: TimeSeries, + new_columns: list[Column], + column_name_to_be_replaced: str, + result_table: TimeSeries, +) -> None: + new_table = original_table.replace_column(column_name_to_be_replaced, new_columns) + assert_that_time_series_are_equal(new_table, result_table) + + +@pytest.mark.parametrize( + ("original_table", "new_columns", "column_name_to_be_replaced", "error"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ), + [], + "target_old", + 'Target column "target_old" can only be replaced by exactly one new column.', + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ), + [Column("target_new_a", [2, 1, 0]), Column("target_new_b"), [4, 2, 0]], + "target_old", + 'Target column "target_old" can only be replaced by exactly one new column.', + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "feature_old": [0, 1, 2], + "target_old": [3, 4, 5], + }, + "target_old", + "time", + ), + [Column("target_new_a", [2, 1, 0]), Column("target_new_b"), [4, 2, 0]], + "time", + 'Time column "time" can only be replaced by exactly one new column.', + ), + ], + ids=["zero_columns", "multiple_columns", "time_column"], +) +# here should be tested with time column as well but the test is weird to be extended +def test_should_throw_illegal_schema_modification( + original_table: TimeSeries, + new_columns: list[Column], + column_name_to_be_replaced: str, + error: str, +) -> None: + with pytest.raises( + IllegalSchemaModificationError, + match=error, + ): + original_table.replace_column(column_name_to_be_replaced, new_columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_slice_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_slice_rows.py new file mode 100644 index 000000000..1625ba17c --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_slice_rows.py @@ -0,0 +1,58 @@ +import pytest +from _pytest.python_api import raises +from safeds.data.tabular.containers import TimeSeries +from safeds.exceptions import IndexOutOfBoundsError + +from tests.helpers import assert_that_tagged_tables_are_equal + + +@pytest.mark.parametrize( + ("table", "test_table", "second_test_table"), + [ + ( + TimeSeries( + data={"time": [0, 1, 2], "feature": [1, 2, 1], "non_feature": [0, 2, 4], "target": [1, 2, 4]}, + target_name="target", + time_name="time", + feature_names=["non_feature"], + ), + TimeSeries( + data={"time": [0, 1], "feature": [1, 2], "non_feature": [0, 2], "target": [1, 2]}, + target_name="target", + time_name="time", + feature_names=["non_feature"], + ), + TimeSeries( + {"time": [0, 2], "feature": [1, 1], "non_feature": [0, 4], "target": [1, 4]}, + target_name="target", + time_name="time", + feature_names=["non_feature"], + ), + ), + ], + ids=["Table with three rows"], +) +def test_should_slice_rows(table: TimeSeries, test_table: TimeSeries, second_test_table: TimeSeries) -> None: + new_table = table.slice_rows(0, 2, 1) + second_new_table = table.slice_rows(0, 3, 2) + third_new_table = table.slice_rows() + assert_that_tagged_tables_are_equal(new_table, test_table) + assert_that_tagged_tables_are_equal(second_new_table, second_test_table) + assert_that_tagged_tables_are_equal(third_new_table, table) + + +@pytest.mark.parametrize( + ("start", "end", "step", "error_message"), + [ + (3, 2, 1, r"There is no element in the range \[3, 2\]"), + (4, 0, 1, r"There is no element in the range \[4, 0\]"), + (0, 4, 1, r"There is no element at index '4'"), + (-4, 0, 1, r"There is no element at index '-4'"), + (0, -4, 1, r"There is no element in the range \[0, -4\]"), + ], +) +def test_should_raise_if_index_out_of_bounds(start: int, end: int, step: int, error_message: str) -> None: + table = TimeSeries({"time": [0, 1, 2], "feature": [1, 2, 1], "target": [1, 2, 4]}, "target", "time") + + with raises(IndexOutOfBoundsError, match=error_message): + table.slice_rows(start, end, step) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_sort_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_sort_columns.py new file mode 100644 index 000000000..c50bb9f78 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_sort_columns.py @@ -0,0 +1,62 @@ +from collections.abc import Callable + +import pytest +from safeds.data.tabular.containers import Column, TimeSeries + + +@pytest.mark.parametrize( + ("query", "col1", "col2", "col3", "col4", "col5"), + [ + (None, 0, 1, 2, 3, 4), + ( + lambda col1, col2: (col1.name < col2.name) - (col1.name > col2.name), + 4, + 3, + 2, + 1, + 0, + ), + ], + ids=["no query", "with query"], +) +def test_should_return_sorted_table( + query: Callable[[Column, Column], int], + col1: int, + col2: int, + col3: int, + col4: int, + col5: int, +) -> None: + columns = [ + Column("col1", ["A", "B", "C", "A", "D"]), + Column("col2", ["Test1", "Test1", "Test3", "Test1", "Test4"]), + Column("col3", [1, 2, 3, 4, 5]), + Column("col4", [2, 3, 1, 4, 6]), + Column("time", [0, 1, 2, 3, 4]), + ] + table1 = TimeSeries( + { + "col2": ["Test1", "Test1", "Test3", "Test1", "Test4"], + "col3": [1, 2, 3, 4, 5], + "col4": [2, 3, 1, 4, 6], + "col1": ["A", "B", "C", "A", "D"], + "time": [0, 1, 2, 3, 4], + }, + target_name="col1", + time_name="time", + feature_names=["col4", "col3"], + ) + if query is not None: + table_sorted = table1.sort_columns(query) + else: + table_sorted = table1.sort_columns() + table_sorted_columns = table_sorted.to_columns() + assert table_sorted.schema == table1.schema + assert table_sorted_columns[0] == columns[col1] + assert table_sorted_columns[1] == columns[col2] + assert table_sorted_columns[2] == columns[col3] + assert table_sorted_columns[3] == columns[col4] + assert table_sorted_columns[4] == columns[col5] + assert table_sorted.features == table1.features + assert table_sorted.target == table1.target + assert table_sorted.time == table1.time diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_time_target.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_time_target.py new file mode 100644 index 000000000..31dc2b899 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_time_target.py @@ -0,0 +1,30 @@ +import pytest +from safeds.data.tabular.containers import Column, TimeSeries + +# test + + +@pytest.mark.parametrize( + ("time_series", "target_column", "time_column"), + [ + ( + TimeSeries( + { + "time": [0, 1], + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1], + }, + target_name="T", + time_name="time", + ), + Column("T", [0, 1]), + Column("time", [0, 1]), + ), + ], + ids=["target"], +) +def test_should_return_target(time_series: TimeSeries, target_column: Column, time_column: Column) -> None: + assert time_series.target == target_column + assert time_series.time == time_column diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_transform_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_transform_column.py new file mode 100644 index 000000000..176533570 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/_time_series/test_transform_column.py @@ -0,0 +1,116 @@ +import pytest +from safeds.data.tabular.containers import TimeSeries +from safeds.exceptions import UnknownColumnNameError + +from tests.helpers import assert_that_time_series_are_equal + + +# here is the time column transformable +@pytest.mark.parametrize( + ("table", "column_name", "table_transformed"), + [ + ( + TimeSeries( + {"time": [0, 1, 2], "feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [1, 2, 3]}, + "target", + "time", + ), + "feature_a", + TimeSeries( + {"time": [0, 1, 2], "feature_a": [2, 4, 6], "feature_b": [4, 5, 6], "target": [1, 2, 3]}, + "target", + "time", + ), + ), + ( + TimeSeries( + {"time": [0, 1, 2], "feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [1, 2, 3]}, + "target", + "time", + ), + "target", + TimeSeries( + {"time": [0, 1, 2], "feature_a": [1, 2, 3], "feature_b": [4, 5, 6], "target": [2, 4, 6]}, + "target", + "time", + ), + ), + ( + TimeSeries( + {"time": [0, 1, 2], "feature_a": [1, 2, 3], "b": [4, 5, 6], "target": [1, 2, 3]}, + target_name="target", + time_name="time", + feature_names=["feature_a"], + ), + "b", + TimeSeries( + {"time": [0, 1, 2], "feature_a": [1, 2, 3], "b": [8, 10, 12], "target": [1, 2, 3]}, + target_name="target", + time_name="time", + feature_names=["feature_a"], + ), + ), + ( + TimeSeries( + {"time": [0, 1, 2], "feature_a": [1, 2, 3], "b": [4, 5, 6], "target": [1, 2, 3]}, + target_name="target", + time_name="time", + feature_names=["feature_a"], + ), + "time", + TimeSeries( + {"time": [0, 2, 4], "feature_a": [1, 2, 3], "b": [4, 5, 6], "target": [1, 2, 3]}, + target_name="target", + time_name="time", + feature_names=["feature_a"], + ), + ), + ], + ids=[ + "transform_feature_column", + "transform_target_column", + "transform_column_that_is_neither", + "transform_time_col", + ], +) +def test_should_transform_column(table: TimeSeries, column_name: str, table_transformed: TimeSeries) -> None: + result = table.transform_column(column_name, lambda row: row.get_value(column_name) * 2) + assert_that_time_series_are_equal(result, table_transformed) + + +@pytest.mark.parametrize( + ("table", "column_name"), + [ + ( + TimeSeries( + { + "time": [0, 1, 2], + "A": [1, 2, 3], + "B": [4, 5, 6], + "C": ["a", "b", "c"], + }, + "C", + "time", + ), + "D", + ), + ( + TimeSeries( + { + "time": [0, 1, 2], + "A": [1, 2, 3], + "B": [4, 5, 6], + "C": ["a", "b", "c"], + }, + target_name="C", + time_name="time", + feature_names=["A"], + ), + "D", + ), + ], + ids=["has_only_features_and_target", "has_columns_that_are_neither"], +) +def test_should_raise_if_column_not_found(table: TimeSeries, column_name: str) -> None: + with pytest.raises(UnknownColumnNameError, match=rf"Could not find column\(s\) '{column_name}'"): + table.transform_column(column_name, lambda row: row.get_value("A") * 2) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index 72b773adc..201e13ba0 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -140,7 +140,7 @@ def test_should_replace_column( @pytest.mark.parametrize( - ("original_table", "new_columns", "column_name_to_be_replaced"), + ("original_table", "new_columns", "column_name_to_be_replaced", "error"), [ ( TaggedTable( @@ -152,6 +152,7 @@ def test_should_replace_column( ), [], "target_old", + 'Target column "target_old" can only be replaced by exactly one new column.', ), ( TaggedTable( @@ -163,6 +164,7 @@ def test_should_replace_column( ), [Column("target_new_a", [2, 1, 0]), Column("target_new_b"), [4, 2, 0]], "target_old", + 'Target column "target_old" can only be replaced by exactly one new column.', ), ], ids=["zero_columns", "multiple_columns"], @@ -171,9 +173,10 @@ def test_should_throw_illegal_schema_modification( original_table: TaggedTable, new_columns: list[Column], column_name_to_be_replaced: str, + error: str, ) -> None: with pytest.raises( IllegalSchemaModificationError, - match='Target column "target_old" can only be replaced by exactly one new column.', + match=error, ): original_table.replace_column(column_name_to_be_replaced, new_columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py index 4ecdb78a7..7a6e2309a 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_sort_columns.py @@ -4,6 +4,7 @@ from safeds.data.tabular.containers import Column, TaggedTable +# this tests seems written very specific //Gerhardsa0 @pytest.mark.parametrize( ("query", "col1", "col2", "col3", "col4"), [