eurobios-mews-labs · vincent-laurent · Jan 3, 2024 · Jan 3, 2024 · Jan 3, 2024 · Jan 3, 2024
diff --git a/examples/classification_threshold.ipynb b/examples/classification_threshold.ipynb
diff --git a/palma/components/checker.py b/palma/components/checker.py
@@ -43,3 +43,5 @@ def check_no_leakage_in_validation(df):
         for i, c in enumerate(df.columns):
             if i % 2 == 1:
                 assert sum(df.loc[:, c] & df.loc[:, df.columns[i - 1]]) == 0
+
+
diff --git a/palma/components/performance.py b/palma/components/performance.py
@@ -59,17 +59,6 @@ def _add(self, project, model):
         self.preproc_estimators, self.only_estimators = __tmp
         self._is_regression = project.problem == "regression"
 
-    def add(self, X, y, cv_indexes, estimators, predictions):
-        self.X = X
-        self.y = y
-        self.indexes = cv_indexes
-        self.estimators = estimators
-
-        if len(self.indexes) != len(estimators):
-            msg = "Arguments 'cv' and estimators must have the same length"
-            raise AssertionError(msg)
-        self.predictions = predictions
-
     def variable_importance(self):
         feature_importance = pd.DataFrame(columns=self.X.columns)
         for i, _ in enumerate(self.indexes):
@@ -222,8 +211,6 @@ def __init__(self, on):
         self._on = on
 
     def __call__(self, project: "Project", model: "ModelEvaluation"):
-        if project.problem != "classification":
-            raise ValueError("Problem not recognise")
         self._add(project, model)
 
     def confusion_matrix(self, in_percentage=False):
@@ -296,8 +283,6 @@ def plot_roc_curve(
         -------
 
         """
-        if "roc_curve" not in self.metrics.keys():
-            pass
         self._compute_metric("roc_curve", metrics.roc_curve)
         self.label = label
         if cv_iter is not None:
@@ -351,7 +336,7 @@ def compute_threshold(
         elif method == "optimize_metric":
             name = "threshold_criterion"
             if metric is None:
-                raise ValueError("Argument metric must not be not")
+                raise ValueError("Argument metric must not be not None")
             self._metrics[name] = {}
             for i, (train, test) in enumerate(self.indexes):
                 ths = np.unique(self.predictions[i]["test"])

diff --git a/tests/test_component/test_checker.py b/tests/test_component/test_checker.py
@@ -6,12 +6,13 @@
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and limitations under the License.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
-import pytest
-from palma.components.checker import ValidationStrategyChecker
 from sklearn import model_selection
 
+from palma.components.checker import ValidationStrategyChecker
+
 
 def test_validation_checker(unbuilt_classification_project, regression_data):
     project = unbuilt_classification_project

diff --git a/tests/test_component/test_performance.py b/tests/test_component/test_performance.py
@@ -6,7 +6,8 @@
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and limitations under the License.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import matplotlib
 from sklearn import metrics
 
@@ -22,6 +23,10 @@ def test_classification_perf(get_scoring_analyser):
         label="train")
     get_scoring_analyser.plot_roc_curve(
         plot_method="mean", mode="minmax")
+    with pytest.raises(ValueError) as e:
+        get_scoring_analyser.plot_roc_curve(
+            plot_method="test", mode="minmax")
+    assert str(e.value) == "argument plot_method=test is not recognize"
 
     performance.plot.figure(figsize=(6, 6), dpi=200)
     get_scoring_analyser.variable_importance()
@@ -37,6 +42,7 @@ def test_raise_value_when_no_threshold(get_scoring_analyser):
 
 
 def test_compute_threshold(get_scoring_analyser):
+    get_scoring_analyser.metrics = {}
     get_scoring_analyser.compute_threshold("fpr", value=0.2)
     get_scoring_analyser.confusion_matrix(in_percentage=True)
 
@@ -45,6 +51,15 @@ def test_compute_threshold(get_scoring_analyser):
 
     get_scoring_analyser.compute_threshold("total_population",
                                            metric=metrics.f1_score)
+    get_scoring_analyser.plot_threshold()
+    with pytest.raises(ValueError) as e:
+        get_scoring_analyser.compute_threshold("test",
+                                               metric=metrics.f1_score)
+    assert str(e.value) == "method test is not recognized"
+
+    with pytest.raises(ValueError) as e:
+        get_scoring_analyser.compute_threshold("optimize_metric")
+    assert str(e.value) == "Argument metric must not be not None"
 
 
 def test_shap_scoring(get_shap_analyser):
@@ -58,6 +73,22 @@ def test_shap_regression(get_shap_analyser):
     get_shap_analyser.plot_shap_decision_plot()
 
 
+def test_analyser_raise_error_parameters(
+        get_shap_analyser, learning_data):
+    project, model, X, y = learning_data
+    get_shap_analyser._on = "test"
+    with pytest.raises(ValueError) as e:
+        get_shap_analyser._add(project, model)
+    assert (str(e.value) == "on parameter : test is not understood."
+                            " The possible values are 'indexes_train_test'"
+                            " or 'indexes_val'")
+
+
+def test_shap_regression_compute(get_shap_analyser):
+    get_shap_analyser._compute_shap_values(100, is_regression=True,
+                                           explainer_method="auto")
+
+
 def test_regression_perf(get_regression_analyser):
     get_regression_analyser.plot_prediction_versus_real()
     get_regression_analyser.plot_variable_importance()
@@ -76,3 +107,6 @@ def test_performance_get_metric_dataframe(get_regression_analyser):
     assert len(get_regression_analyser.get_test_metrics().columns) >= len(
         get_regression_analyser.metrics.keys())
     print(get_regression_analyser.get_train_metrics())
+    assert get_regression_analyser.get_train_metrics()["r2_score"].iloc[0] < 0.2
+    get_regression_analyser.plot_errors_pairgrid()
+
Original file line number	Diff line number	Diff line change
Expand Up		@@ -43,3 +43,5 @@ def check_no_leakage_in_validation(df):
		for i, c in enumerate(df.columns):
		if i % 2 == 1:
		assert sum(df.loc[:, c] & df.loc[:, df.columns[i - 1]]) == 0