From 03fb07ee4936804634328f7f428af4f41312f8d8 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Mon, 19 Dec 2022 14:23:36 +0100 Subject: [PATCH] MCC: Add Matthews correlation coefficient score --- Orange/evaluation/scoring.py | 9 +++- Orange/tests/test_evaluation_scoring.py | 50 +++++++++++++++++-- .../source/widgets/evaluate/testandscore.md | 1 + 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/Orange/evaluation/scoring.py b/Orange/evaluation/scoring.py index 0f3eb47d3bb..78c8811766f 100644 --- a/Orange/evaluation/scoring.py +++ b/Orange/evaluation/scoring.py @@ -20,7 +20,7 @@ from Orange.misc.wrapper_meta import WrapperMeta __all__ = ["CA", "Precision", "Recall", "F1", "PrecisionRecallFSupport", "AUC", - "MSE", "RMSE", "MAE", "R2", "LogLoss"] + "MSE", "RMSE", "MAE", "R2", "LogLoss", "MatthewsCorrCoefficient"] class ScoreMetaType(WrapperMeta): @@ -344,6 +344,13 @@ def compute_score(self, results, target=None, average="binary"): elif target is not None: return self.single_class_specificity(results, target) + +class MatthewsCorrCoefficient(ClassificationScore): + __wraps__ = skl_metrics.matthews_corrcoef + name = "MCC" + long_name = "Matthews correlation coefficient" + + # Regression scores diff --git a/Orange/tests/test_evaluation_scoring.py b/Orange/tests/test_evaluation_scoring.py index 291ed7d2e2a..64d1ed18a20 100644 --- a/Orange/tests/test_evaluation_scoring.py +++ b/Orange/tests/test_evaluation_scoring.py @@ -7,12 +7,14 @@ from Orange.data import DiscreteVariable, ContinuousVariable, Domain from Orange.data import Table -from Orange.classification import LogisticRegressionLearner, SklTreeLearner, NaiveBayesLearner,\ - MajorityLearner +from Orange.classification import LogisticRegressionLearner, SklTreeLearner, \ + NaiveBayesLearner, MajorityLearner, RandomForestLearner from Orange.evaluation import AUC, CA, Results, Recall, \ - Precision, TestOnTrainingData, scoring, LogLoss, F1, CrossValidation + Precision, TestOnTrainingData, scoring, LogLoss, F1, CrossValidation, \ + MatthewsCorrCoefficient, TestOnTestData from Orange.evaluation.scoring import Specificity from Orange.preprocess import discretize, Discretize +from Orange.regression import MeanLearner from Orange.tests import test_filename @@ -346,6 +348,48 @@ def test_log_loss_calc(self): self.assertAlmostEqual(ll_calc, ll_orange[0]) +class TestMatthewsCorrCoefficient(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.heart = Table("heart_disease") + cls.iris = Table("iris") + cls.housing = Table("housing") + cls.scorer = MatthewsCorrCoefficient() + + def test_mcc_binary(self): + rf = RandomForestLearner(random_state=0) + results = TestOnTrainingData()(self.heart, [rf]) + mcc = self.scorer(results) + self.assertGreater(mcc, 0.95) + + def test_mcc_multiclass(self): + rf = RandomForestLearner(random_state=0) + results = TestOnTrainingData()(self.iris, [rf]) + mcc = self.scorer(results) + self.assertGreater(mcc, 0.95) + + def test_mcc_random(self): + majority = MajorityLearner() + results = TestOnTrainingData()(self.iris, [majority]) + mcc = self.scorer(results) + self.assertEqual(mcc, 0) + + def test_mcc_neg(self): + rf = RandomForestLearner(random_state=0) + test_data = self.heart.copy() + mask = test_data.Y == 0 + test_data.Y[mask] = 1 + test_data.Y[~mask] = 0 + results = TestOnTestData()(self.heart, test_data, [rf]) + mcc = self.scorer(results) + self.assertLess(mcc, -0.95) + + def test_mcc_continuous(self): + majority = MeanLearner() + results = TestOnTrainingData()(self.housing, [majority]) + self.assertRaises(ValueError, self.scorer, results) + + class TestSpecificity(unittest.TestCase): @classmethod def setUpClass(cls): diff --git a/doc/visual-programming/source/widgets/evaluate/testandscore.md b/doc/visual-programming/source/widgets/evaluate/testandscore.md index 5b5e9d2e0e4..d33a92afc76 100644 --- a/doc/visual-programming/source/widgets/evaluate/testandscore.md +++ b/doc/visual-programming/source/widgets/evaluate/testandscore.md @@ -37,6 +37,7 @@ The *Learner* signal has an uncommon property: it can be connected to more than - [Recall](https://en.wikipedia.org/wiki/Precision_and_recall) is the proportion of true positives among all positive instances in the data, e.g. the number of sick among all diagnosed as sick. - [Specificity](https://en.wikipedia.org/wiki/Sensitivity_and_specificity) is the proportion of true negatives among all negative instances, e.g. the number of non-sick among all diagnosed as non-sick. - [LogLoss](https://en.wikipedia.org/wiki/Cross_entropy) or cross-entropy loss takes into account the uncertainty of your prediction based on how much it varies from the actual label. + - [Matthews correlation coefficient](https://en.wikipedia.org/wiki/Phi_coefficient) takes into account true and false positives and negatives and is generally regarded as a balanced measure which can be used even if the classes are of very different sizes. - Train time - cumulative time in seconds used for training models. - Test time - cumulative time in seconds used for testing models. - Regression