From 1b37cd64fe84969c6959249c4fca1f58394d703c Mon Sep 17 00:00:00 2001 From: grefrathc Date: Fri, 21 Jun 2024 15:44:29 +0200 Subject: [PATCH 1/6] issue #750 regularization strength for logistic classifier --- .../classification/_logistic_classifier.py | 11 ++++---- .../test_logistic_classifier.py | 26 +++++++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 tests/safeds/ml/classical/classification/test_logistic_classifier.py diff --git a/src/safeds/ml/classical/classification/_logistic_classifier.py b/src/safeds/ml/classical/classification/_logistic_classifier.py index e312e6b25..0c8e2856b 100644 --- a/src/safeds/ml/classical/classification/_logistic_classifier.py +++ b/src/safeds/ml/classical/classification/_logistic_classifier.py @@ -17,9 +17,9 @@ class LogisticClassifier(Classifier): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self) -> None: + def __init__(self, c: float=1.0) -> None: super().__init__() - + self.c = c def __hash__(self) -> int: return _structural_hash( super().__hash__(), @@ -30,12 +30,13 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ def _clone(self) -> LogisticClassifier: - return LogisticClassifier() - + return LogisticClassifier(c=self.c) + def _get_sklearn_model(self) -> ClassifierMixin: from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression return SklearnLogisticRegression( random_state=_get_random_seed(), n_jobs=-1, - ) + C=self.c, + ) \ No newline at end of file diff --git a/tests/safeds/ml/classical/classification/test_logistic_classifier.py b/tests/safeds/ml/classical/classification/test_logistic_classifier.py new file mode 100644 index 000000000..768beae22 --- /dev/null +++ b/tests/safeds/ml/classical/classification/test_logistic_classifier.py @@ -0,0 +1,26 @@ +import pytest +from safeds.data.labeled.containers import TabularDataset +from safeds.data.tabular.containers import Table +from safeds.ml.classical.classification import LogisticClassifier + + +@pytest.fixture() +def training_set() -> TabularDataset: + table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) + return table.to_tabular_dataset(target_name="col1") + +class TestC: + def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: + fitted_model = LogisticClassifier(c=2).fit(training_set) + assert fitted_model.c == 2 + + def test_should_be_passed_to_sklearn(self, training_set: TabularDataset) -> None: + fitted_model = LogisticClassifier(c=2).fit(training_set) + assert fitted_model._wrapped_model is not None + assert fitted_model._wrapped_model.C == 2 + + def test_clone(self, training_set: TabularDataset) -> None: + fitted_model = LogisticClassifier(c=2).fit(training_set) + cloned_classifier = fitted_model._clone() + assert isinstance(cloned_classifier, LogisticClassifier) + assert cloned_classifier.c == fitted_model.c \ No newline at end of file From 763b94c00e76b51b728cb0362077a055eee228b7 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 28 Jun 2024 09:08:37 +0000 Subject: [PATCH 2/6] style: apply automated linter fixes --- .../classical/classification/_logistic_classifier.py | 11 ++++++----- .../classification/test_logistic_classifier.py | 5 +++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/safeds/ml/classical/classification/_logistic_classifier.py b/src/safeds/ml/classical/classification/_logistic_classifier.py index 0c8e2856b..626e52ea5 100644 --- a/src/safeds/ml/classical/classification/_logistic_classifier.py +++ b/src/safeds/ml/classical/classification/_logistic_classifier.py @@ -17,9 +17,10 @@ class LogisticClassifier(Classifier): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, c: float=1.0) -> None: + def __init__(self, c: float = 1.0) -> None: super().__init__() self.c = c + def __hash__(self) -> int: return _structural_hash( super().__hash__(), @@ -30,13 +31,13 @@ def __hash__(self) -> int: # ------------------------------------------------------------------------------------------------------------------ def _clone(self) -> LogisticClassifier: - return LogisticClassifier(c=self.c) - + return LogisticClassifier(c=self.c) + def _get_sklearn_model(self) -> ClassifierMixin: from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression return SklearnLogisticRegression( random_state=_get_random_seed(), n_jobs=-1, - C=self.c, - ) \ No newline at end of file + C=self.c, + ) diff --git a/tests/safeds/ml/classical/classification/test_logistic_classifier.py b/tests/safeds/ml/classical/classification/test_logistic_classifier.py index 768beae22..78c604dad 100644 --- a/tests/safeds/ml/classical/classification/test_logistic_classifier.py +++ b/tests/safeds/ml/classical/classification/test_logistic_classifier.py @@ -9,7 +9,8 @@ def training_set() -> TabularDataset: table = Table({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) return table.to_tabular_dataset(target_name="col1") -class TestC: + +class TestC: def test_should_be_passed_to_fitted_model(self, training_set: TabularDataset) -> None: fitted_model = LogisticClassifier(c=2).fit(training_set) assert fitted_model.c == 2 @@ -23,4 +24,4 @@ def test_clone(self, training_set: TabularDataset) -> None: fitted_model = LogisticClassifier(c=2).fit(training_set) cloned_classifier = fitted_model._clone() assert isinstance(cloned_classifier, LogisticClassifier) - assert cloned_classifier.c == fitted_model.c \ No newline at end of file + assert cloned_classifier.c == fitted_model.c From 0c475e461481cdadfe495a34eacc8032f97888de Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 28 Jun 2024 11:31:13 +0200 Subject: [PATCH 3/6] feat: validate `c` parameter --- .../classification/_logistic_classifier.py | 17 ++++++++++++++++- .../classification/test_logistic_classifier.py | 6 ++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/safeds/ml/classical/classification/_logistic_classifier.py b/src/safeds/ml/classical/classification/_logistic_classifier.py index 626e52ea5..418e749d0 100644 --- a/src/safeds/ml/classical/classification/_logistic_classifier.py +++ b/src/safeds/ml/classical/classification/_logistic_classifier.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from safeds._utils import _get_random_seed, _structural_hash +from safeds._validation import _check_bounds, _OpenBound from ._classifier import Classifier @@ -19,13 +20,27 @@ class LogisticClassifier(Classifier): def __init__(self, c: float = 1.0) -> None: super().__init__() - self.c = c + + # Validation + _check_bounds("c", c, lower_bound=_OpenBound(0)) + + # Hyperparameters + self._c: float = c def __hash__(self) -> int: return _structural_hash( super().__hash__(), ) + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def c(self) -> float: + """The regularization strength. Lower values imply stronger regularization.""" + return self._c + # ------------------------------------------------------------------------------------------------------------------ # Template methods # ------------------------------------------------------------------------------------------------------------------ diff --git a/tests/safeds/ml/classical/classification/test_logistic_classifier.py b/tests/safeds/ml/classical/classification/test_logistic_classifier.py index 78c604dad..92bd4b8da 100644 --- a/tests/safeds/ml/classical/classification/test_logistic_classifier.py +++ b/tests/safeds/ml/classical/classification/test_logistic_classifier.py @@ -1,6 +1,7 @@ import pytest from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table +from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.classification import LogisticClassifier @@ -25,3 +26,8 @@ def test_clone(self, training_set: TabularDataset) -> None: cloned_classifier = fitted_model._clone() assert isinstance(cloned_classifier, LogisticClassifier) assert cloned_classifier.c == fitted_model.c + + @pytest.mark.parametrize("c", [-1.0, 0.0], ids=["minus_one", "zero"]) + def test_should_raise_if_less_than_or_equal_to_0(self, c: float) -> None: + with pytest.raises(OutOfBoundsError): + LogisticClassifier(c=c) From f6998df89a7c89e23bc99398461d4965f70dce71 Mon Sep 17 00:00:00 2001 From: grefrathc Date: Fri, 28 Jun 2024 16:36:58 +0200 Subject: [PATCH 4/6] The new parameter is now documented in the Parameters section of the class's docstring. --- .../ml/classical/classification/_logistic_classifier.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/safeds/ml/classical/classification/_logistic_classifier.py b/src/safeds/ml/classical/classification/_logistic_classifier.py index 418e749d0..b0cd4996a 100644 --- a/src/safeds/ml/classical/classification/_logistic_classifier.py +++ b/src/safeds/ml/classical/classification/_logistic_classifier.py @@ -12,7 +12,14 @@ class LogisticClassifier(Classifier): - """Regularized logistic regression for classification.""" + """ + Regularized logistic regression for classification. + + Parameters + ---------- + c: + The regularization strength. Lower values imply stronger regularization. Must be greater than 0. + """ # ------------------------------------------------------------------------------------------------------------------ # Dunder methods From 3f184598381d818d534a8c59325ce71d1685a0be Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 28 Jun 2024 14:38:33 +0000 Subject: [PATCH 5/6] style: apply automated linter fixes --- src/safeds/ml/classical/classification/_logistic_classifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/ml/classical/classification/_logistic_classifier.py b/src/safeds/ml/classical/classification/_logistic_classifier.py index b0cd4996a..17228af20 100644 --- a/src/safeds/ml/classical/classification/_logistic_classifier.py +++ b/src/safeds/ml/classical/classification/_logistic_classifier.py @@ -14,7 +14,7 @@ class LogisticClassifier(Classifier): """ Regularized logistic regression for classification. - + Parameters ---------- c: From 50efcbf618ec687fcf622a38503d15e6b21430da Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sat, 29 Jun 2024 14:37:28 +0200 Subject: [PATCH 6/6] feat: make `c` parameter keyword-only --- src/safeds/ml/classical/classification/_logistic_classifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/ml/classical/classification/_logistic_classifier.py b/src/safeds/ml/classical/classification/_logistic_classifier.py index 17228af20..e00a8d399 100644 --- a/src/safeds/ml/classical/classification/_logistic_classifier.py +++ b/src/safeds/ml/classical/classification/_logistic_classifier.py @@ -25,7 +25,7 @@ class LogisticClassifier(Classifier): # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __init__(self, c: float = 1.0) -> None: + def __init__(self, *, c: float = 1.0) -> None: super().__init__() # Validation