Skip to content

Commit

Permalink
Merge pull request #6264 from VesnaT/mcc
Browse files Browse the repository at this point in the history
[ENH] MCC: Add Matthews correlation coefficient score
  • Loading branch information
lanzagar authored Jan 11, 2023
2 parents 4348f1e + 03fb07e commit d56940c
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 4 deletions.
9 changes: 8 additions & 1 deletion Orange/evaluation/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from Orange.misc.wrapper_meta import WrapperMeta

__all__ = ["CA", "Precision", "Recall", "F1", "PrecisionRecallFSupport", "AUC",
"MSE", "RMSE", "MAE", "R2", "LogLoss"]
"MSE", "RMSE", "MAE", "R2", "LogLoss", "MatthewsCorrCoefficient"]


class ScoreMetaType(WrapperMeta):
Expand Down Expand Up @@ -367,6 +367,13 @@ def compute_score(self, results, target=None, average="binary"):
elif target is not None:
return self.single_class_specificity(results, target)


class MatthewsCorrCoefficient(ClassificationScore):
__wraps__ = skl_metrics.matthews_corrcoef
name = "MCC"
long_name = "Matthews correlation coefficient"


# Regression scores


Expand Down
50 changes: 47 additions & 3 deletions Orange/tests/test_evaluation_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@

from Orange.data import DiscreteVariable, ContinuousVariable, Domain
from Orange.data import Table
from Orange.classification import LogisticRegressionLearner, SklTreeLearner, NaiveBayesLearner,\
MajorityLearner
from Orange.classification import LogisticRegressionLearner, SklTreeLearner, \
NaiveBayesLearner, MajorityLearner, RandomForestLearner
from Orange.evaluation import AUC, CA, Results, Recall, \
Precision, TestOnTrainingData, scoring, LogLoss, F1, CrossValidation
Precision, TestOnTrainingData, scoring, LogLoss, F1, CrossValidation, \
MatthewsCorrCoefficient, TestOnTestData
from Orange.evaluation.scoring import Specificity
from Orange.preprocess import discretize, Discretize
from Orange.regression import MeanLearner
from Orange.tests import test_filename


Expand Down Expand Up @@ -346,6 +348,48 @@ def test_log_loss_calc(self):
self.assertAlmostEqual(ll_calc, ll_orange[0])


class TestMatthewsCorrCoefficient(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.heart = Table("heart_disease")
cls.iris = Table("iris")
cls.housing = Table("housing")
cls.scorer = MatthewsCorrCoefficient()

def test_mcc_binary(self):
rf = RandomForestLearner(random_state=0)
results = TestOnTrainingData()(self.heart, [rf])
mcc = self.scorer(results)
self.assertGreater(mcc, 0.95)

def test_mcc_multiclass(self):
rf = RandomForestLearner(random_state=0)
results = TestOnTrainingData()(self.iris, [rf])
mcc = self.scorer(results)
self.assertGreater(mcc, 0.95)

def test_mcc_random(self):
majority = MajorityLearner()
results = TestOnTrainingData()(self.iris, [majority])
mcc = self.scorer(results)
self.assertEqual(mcc, 0)

def test_mcc_neg(self):
rf = RandomForestLearner(random_state=0)
test_data = self.heart.copy()
mask = test_data.Y == 0
test_data.Y[mask] = 1
test_data.Y[~mask] = 0
results = TestOnTestData()(self.heart, test_data, [rf])
mcc = self.scorer(results)
self.assertLess(mcc, -0.95)

def test_mcc_continuous(self):
majority = MeanLearner()
results = TestOnTrainingData()(self.housing, [majority])
self.assertRaises(ValueError, self.scorer, results)


class TestSpecificity(unittest.TestCase):
@classmethod
def setUpClass(cls):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ The *Learner* signal has an uncommon property: it can be connected to more than
- [Recall](https://en.wikipedia.org/wiki/Precision_and_recall) is the proportion of true positives among all positive instances in the data, e.g. the number of sick among all diagnosed as sick.
- [Specificity](https://en.wikipedia.org/wiki/Sensitivity_and_specificity) is the proportion of true negatives among all negative instances, e.g. the number of non-sick among all diagnosed as non-sick.
- [LogLoss](https://en.wikipedia.org/wiki/Cross_entropy) or cross-entropy loss takes into account the uncertainty of your prediction based on how much it varies from the actual label.
- [Matthews correlation coefficient](https://en.wikipedia.org/wiki/Phi_coefficient) takes into account true and false positives and negatives and is generally regarded as a balanced measure which can be used even if the classes are of very different sizes.
- Train time - cumulative time in seconds used for training models.
- Test time - cumulative time in seconds used for testing models.
- Regression
Expand Down

0 comments on commit d56940c

Please sign in to comment.