From 03fb07ee4936804634328f7f428af4f41312f8d8 Mon Sep 17 00:00:00 2001
From: Vesna Tanko <tankovesna@hotmail.com>
Date: Mon, 19 Dec 2022 14:23:36 +0100
Subject: [PATCH] MCC: Add Matthews correlation coefficient score

---
 Orange/evaluation/scoring.py                  |  9 +++-
 Orange/tests/test_evaluation_scoring.py       | 50 +++++++++++++++++--
 .../source/widgets/evaluate/testandscore.md   |  1 +
 3 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/Orange/evaluation/scoring.py b/Orange/evaluation/scoring.py
index 0f3eb47d3bb..78c8811766f 100644
--- a/Orange/evaluation/scoring.py
+++ b/Orange/evaluation/scoring.py
@@ -20,7 +20,7 @@
 from Orange.misc.wrapper_meta import WrapperMeta
 
 __all__ = ["CA", "Precision", "Recall", "F1", "PrecisionRecallFSupport", "AUC",
-           "MSE", "RMSE", "MAE", "R2", "LogLoss"]
+           "MSE", "RMSE", "MAE", "R2", "LogLoss", "MatthewsCorrCoefficient"]
 
 
 class ScoreMetaType(WrapperMeta):
@@ -344,6 +344,13 @@ def compute_score(self, results, target=None, average="binary"):
         elif target is not None:
             return self.single_class_specificity(results, target)
 
+
+class MatthewsCorrCoefficient(ClassificationScore):
+    __wraps__ = skl_metrics.matthews_corrcoef
+    name = "MCC"
+    long_name = "Matthews correlation coefficient"
+
+
 # Regression scores
 
 
diff --git a/Orange/tests/test_evaluation_scoring.py b/Orange/tests/test_evaluation_scoring.py
index 291ed7d2e2a..64d1ed18a20 100644
--- a/Orange/tests/test_evaluation_scoring.py
+++ b/Orange/tests/test_evaluation_scoring.py
@@ -7,12 +7,14 @@
 
 from Orange.data import DiscreteVariable, ContinuousVariable, Domain
 from Orange.data import Table
-from Orange.classification import LogisticRegressionLearner, SklTreeLearner, NaiveBayesLearner,\
-                                  MajorityLearner
+from Orange.classification import LogisticRegressionLearner, SklTreeLearner, \
+    NaiveBayesLearner, MajorityLearner, RandomForestLearner
 from Orange.evaluation import AUC, CA, Results, Recall, \
-    Precision, TestOnTrainingData, scoring, LogLoss, F1, CrossValidation
+    Precision, TestOnTrainingData, scoring, LogLoss, F1, CrossValidation, \
+    MatthewsCorrCoefficient, TestOnTestData
 from Orange.evaluation.scoring import Specificity
 from Orange.preprocess import discretize, Discretize
+from Orange.regression import MeanLearner
 from Orange.tests import test_filename
 
 
@@ -346,6 +348,48 @@ def test_log_loss_calc(self):
         self.assertAlmostEqual(ll_calc, ll_orange[0])
 
 
+class TestMatthewsCorrCoefficient(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.heart = Table("heart_disease")
+        cls.iris = Table("iris")
+        cls.housing = Table("housing")
+        cls.scorer = MatthewsCorrCoefficient()
+
+    def test_mcc_binary(self):
+        rf = RandomForestLearner(random_state=0)
+        results = TestOnTrainingData()(self.heart, [rf])
+        mcc = self.scorer(results)
+        self.assertGreater(mcc, 0.95)
+
+    def test_mcc_multiclass(self):
+        rf = RandomForestLearner(random_state=0)
+        results = TestOnTrainingData()(self.iris, [rf])
+        mcc = self.scorer(results)
+        self.assertGreater(mcc, 0.95)
+
+    def test_mcc_random(self):
+        majority = MajorityLearner()
+        results = TestOnTrainingData()(self.iris, [majority])
+        mcc = self.scorer(results)
+        self.assertEqual(mcc, 0)
+
+    def test_mcc_neg(self):
+        rf = RandomForestLearner(random_state=0)
+        test_data = self.heart.copy()
+        mask = test_data.Y == 0
+        test_data.Y[mask] = 1
+        test_data.Y[~mask] = 0
+        results = TestOnTestData()(self.heart, test_data, [rf])
+        mcc = self.scorer(results)
+        self.assertLess(mcc, -0.95)
+
+    def test_mcc_continuous(self):
+        majority = MeanLearner()
+        results = TestOnTrainingData()(self.housing, [majority])
+        self.assertRaises(ValueError, self.scorer, results)
+
+
 class TestSpecificity(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
diff --git a/doc/visual-programming/source/widgets/evaluate/testandscore.md b/doc/visual-programming/source/widgets/evaluate/testandscore.md
index 5b5e9d2e0e4..d33a92afc76 100644
--- a/doc/visual-programming/source/widgets/evaluate/testandscore.md
+++ b/doc/visual-programming/source/widgets/evaluate/testandscore.md
@@ -37,6 +37,7 @@ The *Learner* signal has an uncommon property: it can be connected to more than
         - [Recall](https://en.wikipedia.org/wiki/Precision_and_recall) is the proportion of true positives among all positive instances in the data, e.g. the number of sick among all diagnosed as sick.
         - [Specificity](https://en.wikipedia.org/wiki/Sensitivity_and_specificity) is the proportion of true negatives among all negative instances, e.g. the number of non-sick among all diagnosed as non-sick.
         - [LogLoss](https://en.wikipedia.org/wiki/Cross_entropy) or cross-entropy loss takes into account the uncertainty of your prediction based on how much it varies from the actual label. 
+        - [Matthews correlation coefficient](https://en.wikipedia.org/wiki/Phi_coefficient) takes into account true and false positives and negatives and is generally regarded as a balanced measure which can be used even if the classes are of very different sizes.
         - Train time - cumulative time in seconds used for training models.
         - Test time - cumulative time in seconds used for testing models.
    - Regression