Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add methods that tell which columns would be affected by a transformer #304

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions src/safeds/data/tabular/transformation/_imputer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import warnings
from typing import Any

import pandas as pd
Expand Down Expand Up @@ -178,3 +179,60 @@ def is_fitted(self) -> bool:
Whether the transformer is fitted.
"""
return self._wrapped_transformer is not None

def get_names_of_added_columns(self) -> list[str]:
"""
Get the names of all new columns that have been added by the Imputer.

Returns
-------
added_columns : list[str]
A list of names of the added columns, ordered as they will appear in the table.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""
warnings.warn("Imputer only changes data within columns, but does not add any columns.", stacklevel=1)
if not self.is_fitted():
raise TransformerNotFittedError
return []

# (Must implement abstract method, cannot instantiate class otherwise.)
def get_names_of_changed_columns(self) -> list[str]:
"""
Get the names of all columns that may have been changed by the Imputer.

Returns
-------
changed_columns : list[str]
The list of (potentially) changed column names, as passed to fit.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""
if self._column_names is None:
raise TransformerNotFittedError
return self._column_names

def get_names_of_removed_columns(self) -> list[str]:
"""
Get the names of all columns that have been removed by the Imputer.

Returns
-------
removed_columns : list[str]
A list of names of the removed columns, ordered as they appear in the table the Imputer was fitted on.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""
warnings.warn("Imputer only changes data within columns, but does not remove any columns.", stacklevel=1)
if not self.is_fitted():
raise TransformerNotFittedError
return []
59 changes: 59 additions & 0 deletions src/safeds/data/tabular/transformation/_label_encoder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import warnings

from sklearn.preprocessing import OrdinalEncoder as sk_OrdinalEncoder

from safeds.data.tabular.containers import Table
Expand Down Expand Up @@ -126,3 +128,60 @@ def is_fitted(self) -> bool:
Whether the transformer is fitted.
"""
return self._wrapped_transformer is not None

def get_names_of_added_columns(self) -> list[str]:
"""
Get the names of all new columns that have been added by the LabelEncoder.

Returns
-------
added_columns : list[str]
A list of names of the added columns, ordered as they will appear in the table.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""
warnings.warn("LabelEncoder only changes data within columns, but does not add any columns.", stacklevel=1)
if not self.is_fitted():
raise TransformerNotFittedError
return []

# (Must implement abstract method, cannot instantiate class otherwise.)
def get_names_of_changed_columns(self) -> list[str]:
"""
Get the names of all columns that may have been changed by the LabelEncoder.

Returns
-------
changed_columns : list[str]
The list of (potentially) changed column names, as passed to fit.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""
if self._column_names is None:
raise TransformerNotFittedError
return self._column_names

def get_names_of_removed_columns(self) -> list[str]:
"""
Get the names of all columns that have been removed by the LabelEncoder.

Returns
-------
removed_columns : list[str]
A list of names of the removed columns, ordered as they appear in the table the LabelEncoder was fitted on.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""
warnings.warn("LabelEncoder only changes data within columns, but does not remove any columns.", stacklevel=1)
if not self.is_fitted():
raise TransformerNotFittedError
return []
57 changes: 57 additions & 0 deletions src/safeds/data/tabular/transformation/_one_hot_encoder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import warnings
from collections import Counter
from typing import Any

Expand Down Expand Up @@ -249,3 +250,59 @@ def is_fitted(self) -> bool:
Whether the transformer is fitted.
"""
return self._column_names is not None and self._value_to_column is not None

def get_names_of_added_columns(self) -> list[str]:
"""
Get the names of all new columns that have been added by the OneHotEncoder.

Returns
-------
added_columns : list[str]
A list of names of the added columns, ordered as they will appear in the table.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""
if self._column_names is None:
raise TransformerNotFittedError
return [name for column_names in self._column_names.values() for name in column_names]

# (Must implement abstract method, cannot instantiate class otherwise.)
def get_names_of_changed_columns(self) -> list[str]:
"""
Get the names of all columns that have been changed by the OneHotEncoder (none).

Returns
-------
changed_columns : list[str]
The empty list.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""
warnings.warn("OneHotEncoder only removes and adds, but does not change any columns.", stacklevel=1)
if not self.is_fitted():
raise TransformerNotFittedError
return []

def get_names_of_removed_columns(self) -> list[str]:
"""
Get the names of all columns that have been removed by the OneHotEncoder.

Returns
-------
removed_columns : list[str]
A list of names of the removed columns, ordered as they appear in the table the OneHotEncoder was fitted on.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""
if self._column_names is None:
raise TransformerNotFittedError
return list(self._column_names.keys())
48 changes: 48 additions & 0 deletions src/safeds/data/tabular/transformation/_table_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,54 @@ def transform(self, table: Table) -> Table:
If the transformer has not been fitted yet.
"""

@abstractmethod
def get_names_of_added_columns(self) -> list[str]:
"""
Get the names of all new columns that have been added by the transformer.

Returns
-------
added_columns : list[str]
A list of names of the added columns, ordered as they will appear in the table.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""

@abstractmethod
def get_names_of_changed_columns(self) -> list[str]:
"""
Get the names of all columns that have been changed by the transformer.

Returns
-------
changed_columns : list[str]
A list of names of changed columns, ordered as they appear in the table.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""

@abstractmethod
def get_names_of_removed_columns(self) -> list[str]:
"""
Get the names of all columns that have been removed by the transformer.

Returns
-------
removed_columns : list[str]
A list of names of the removed columns, ordered as they appear in the table the transformer was fitted on.

Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
"""

@abstractmethod
def is_fitted(self) -> bool:
"""
Expand Down
52 changes: 52 additions & 0 deletions tests/safeds/data/tabular/transformation/test_imputer.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,55 @@ def test_should_not_change_original_table(self) -> None:
)

assert table == expected

def test_get_names_of_added_columns(self) -> None:
transformer = Imputer(strategy=Imputer.Strategy.Constant(1))
with pytest.warns(
UserWarning,
match="Imputer only changes data within columns, but does not add any columns.",
), pytest.raises(TransformerNotFittedError):
transformer.get_names_of_added_columns()

table = Table(
{
"a": [1, None],
"b": [1, 1],
},
)
transformer = transformer.fit(table, None)
with pytest.warns(UserWarning, match="Imputer only changes data within columns, but does not add any columns."):
assert transformer.get_names_of_added_columns() == []

def test_get_names_of_changed_columns(self) -> None:
transformer = Imputer(strategy=Imputer.Strategy.Constant(1))
with pytest.raises(TransformerNotFittedError):
transformer.get_names_of_changed_columns()
table = Table(
{
"a": [1, None],
"b": [1, 1],
},
)
transformer = transformer.fit(table, None)
assert transformer.get_names_of_changed_columns() == ["a", "b"]

def test_get_names_of_removed_columns(self) -> None:
transformer = Imputer(strategy=Imputer.Strategy.Constant(1))
with pytest.warns(
UserWarning,
match="Imputer only changes data within columns, but does not remove any columns.",
), pytest.raises(TransformerNotFittedError):
transformer.get_names_of_removed_columns()

table = Table(
{
"a": [1, None],
"b": [1, 1],
},
)
transformer = transformer.fit(table, None)
with pytest.warns(
UserWarning,
match="Imputer only changes data within columns, but does not remove any columns.",
):
assert transformer.get_names_of_removed_columns() == []
52 changes: 52 additions & 0 deletions tests/safeds/data/tabular/transformation/test_label_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,58 @@ def test_should_not_change_original_table(self) -> None:

assert table == expected

def test_get_names_of_added_columns(self) -> None:
transformer = LabelEncoder()
with pytest.warns(
UserWarning,
match="LabelEncoder only changes data within columns, but does not add any columns.",
), pytest.raises(TransformerNotFittedError):
transformer.get_names_of_added_columns()

table = Table(
{
"a": ["b"],
},
)
transformer = transformer.fit(table, None)
with pytest.warns(
UserWarning,
match="LabelEncoder only changes data within columns, but does not add any columns.",
):
assert transformer.get_names_of_added_columns() == []

def test_get_names_of_changed_columns(self) -> None:
transformer = LabelEncoder()
with pytest.raises(TransformerNotFittedError):
transformer.get_names_of_changed_columns()
table = Table(
{
"a": ["b"],
},
)
transformer = transformer.fit(table, None)
assert transformer.get_names_of_changed_columns() == ["a"]

def test_get_names_of_removed_columns(self) -> None:
transformer = LabelEncoder()
with pytest.warns(
UserWarning,
match="LabelEncoder only changes data within columns, but does not remove any columns.",
), pytest.raises(TransformerNotFittedError):
transformer.get_names_of_removed_columns()

table = Table(
{
"a": ["b"],
},
)
transformer = transformer.fit(table, None)
with pytest.warns(
UserWarning,
match="LabelEncoder only changes data within columns, but does not remove any columns.",
):
assert transformer.get_names_of_removed_columns() == []


class TestInverseTransform:
@pytest.mark.parametrize(
Expand Down
Loading