pints-team · MichaelClerx · Aug 11, 2022 · Aug 6, 2021 · Aug 6, 2021 · Aug 6, 2021
diff --git a/docs/source/log_likelihoods.rst b/docs/source/log_likelihoods.rst
@@ -24,6 +24,7 @@ Overview:
 - :class:`GaussianKnownSigmaLogLikelihood`
 - :class:`GaussianLogLikelihood`
 - :class:`KnownNoiseLogLikelihood`
+- :class:`LogNormalLogLikelihood`
 - :class:`MultiplicativeGaussianLogLikelihood`
 - :class:`ScaledLogLikelihood`
 - :class:`StudentTLogLikelihood`
@@ -46,11 +47,12 @@ Overview:
 
 .. autoclass:: KnownNoiseLogLikelihood
 
+.. autoclass:: LogNormalLogLikelihood
+
 .. autoclass:: MultiplicativeGaussianLogLikelihood
 
 .. autoclass:: ScaledLogLikelihood
 
 .. autoclass:: StudentTLogLikelihood
 
 .. autoclass:: UnknownNoiseLogLikelihood
-
diff --git a/pints/__init__.py b/pints/__init__.py
@@ -119,6 +119,7 @@ def version(formatted=False):
     GaussianKnownSigmaLogLikelihood,
     GaussianLogLikelihood,
     KnownNoiseLogLikelihood,
+    LogNormalLogLikelihood,
     MultiplicativeGaussianLogLikelihood,
     ScaledLogLikelihood,
     StudentTLogLikelihood,

diff --git a/pints/_log_likelihoods.py b/pints/_log_likelihoods.py
@@ -777,6 +777,123 @@ def __init__(self, problem, sigma):
         super(KnownNoiseLogLikelihood, self).__init__(problem, sigma)
 
 
+class LogNormalLogLikelihood(pints.ProblemLogLikelihood):
+    r"""
+    Calculates a log-likelihood assuming independent log-normal noise at each
+    time point, and adds a parameter representing the standard deviation
+    (sigma) of the noise on the log scale for each output.
+
+    Specifically, the sampling distribution takes the form:
+
+    .. math::
+        y(t) \sim \text{log-Normal}(\log f(\theta, t), \sigma)
+
+    which can alternatively be written:
+
+    .. math::
+        \log y(t) \sim \text{Normal}(\log f(\theta, t), \sigma)
+
+    Important to note is that:
+
+    .. math::
+        \mathbb{E}(y(t)) = f(\theta, t) \exp(\sigma^2 / 2)
+
+    As such, the optional parameter `mean_adjust` (if true) adjusts the
+    mean of the distribution so that its expectation is ``f(\theta, t)``. This
+    shifted distribution is of the form:
+
+    .. math::
+        y(t) \sim \text{log-Normal}(\log f(\theta, t) - \sigma^2/2, \sigma)
+
+    Extends :class:`ProblemLogLikelihood`.
+
+    Parameters
+    ----------
+    problem
+        A :class:`SingleOutputProblem` or :class:`MultiOutputProblem`. For a
+        single-output problem a single parameter is added, for a multi-output
+        problem ``n_outputs`` parameters are added.
+    mean_adjust
+        A Boolean. Adjusts the distribution so that its mean corresponds to the
+        function value. By default, this parameter if False.
+    """
+
+    def __init__(self, problem, mean_adjust=False):
+        super(LogNormalLogLikelihood, self).__init__(problem)
+
+        # Get number of times, number of outputs
+        self._nt = len(self._times)
+        self._no = problem.n_outputs()
+
+        # Add parameters to problem
+        self._n_parameters = problem.n_parameters() + self._no
+
+        # Pre-calculate parts
+        self._logn = 0.5 * self._nt * np.log(2 * np.pi)
+
+        # For a log-normal sampling distribution any data points being below
+        # zero would mean that the log-likelihood is always -infinity
+        vals = np.asarray(self._values)
+        if np.any(vals <= 0):
+            raise ValueError('All data points must exceed zero.')
+        self._log_values = np.log(self._values)
+
+        self._mean_adjust = mean_adjust
+
+    def __call__(self, x):
+        sigma = np.asarray(x[-self._no:])
+        if any(sigma < 0):
+            return -np.inf
+
+        soln = self._problem.evaluate(x[:-self._no])
+        if np.any(soln < 0):
+            return -np.inf
+
+        error = np.log(self._values) - np.log(soln)
+        if self._mean_adjust:
+            error += 0.5 * sigma**2
+        return np.sum(- self._logn - self._nt * np.log(sigma)
+                      - np.sum(self._log_values, axis=0)
+                      - np.sum(error**2, axis=0) / (2 * sigma**2))
+
+    def evaluateS1(self, x):
+        """ See :meth:`LogPDF.evaluateS1()`. """
+        # Calculate log-likelihood and when log-prob is infinite, gradients are
+        # not defined
+        L = self.__call__(x)
+        if L == -np.inf:
+            return L, np.tile(np.nan, self._n_parameters)
+
+        sigma = np.asarray(x[-self._no:])
+
+        # Evaluate, and get residuals
+        y, dy = self._problem.evaluateS1(x[:-self._no])
+
+        # Reshape dy, in case we're working with a single-output problem
+        dy = dy.reshape(self._nt, self._no, self._n_parameters - self._no)
+
+        # Note: Must be (np.log(data) - simulation), sign matters now since it
+        # must match that of the partial derivative below
+        error = np.log(self._values) - np.log(y)
+        if self._mean_adjust:
+            error += 0.5 * sigma**2
+
+        # Calculate derivatives in the model parameters
+        dL = np.sum(
+            (sigma**(-2.0) * np.sum((error.T * dy.T / y.T).T, axis=0).T).T,
+            axis=0)
+
+        # Calculate derivative wrt sigma
+        dsigma = -self._nt / sigma + sigma**(-3.0) * np.sum(error**2, axis=0)
+        if self._mean_adjust:
+            dsigma -= 1 / sigma * np.sum(error, axis=0)
+
+        dL = np.concatenate((dL, np.array(list(dsigma))))
+
+        # Return
+        return L, dL
+
+
 class MultiplicativeGaussianLogLikelihood(pints.ProblemLogLikelihood):
     r"""
     Calculates the log-likelihood for a time-series model assuming a

diff --git a/pints/tests/test_log_likelihoods.py b/pints/tests/test_log_likelihoods.py
@@ -1405,6 +1405,150 @@ def simulate(self, x, times):
         self.assertAlmostEqual(log1(0) + log2(0), log3(0))
 
 
+class TestLogNormalLogLikelihood(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # sreate test single output test model
+        cls.model_single = pints.toy.ConstantModel(1)
+        cls.model_multiple = pints.toy.ConstantModel(2)
+        cls.times = [1, 2, 3, 4]
+        cls.data_single = [3, 4, 5.5, 7.2]
+        cls.data_multiple = [[3, 1.1],
+                             [4, 3.2],
+                             [5.5, 4.5],
+                             [7.2, 10.1]]
+        cls.problem_single = pints.SingleOutputProblem(
+            cls.model_single, cls.times, cls.data_single)
+        cls.problem_multiple = pints.MultiOutputProblem(
+            cls.model_multiple, cls.times, cls.data_multiple)
+        cls.log_likelihood = pints.LogNormalLogLikelihood(cls.problem_single)
+        cls.log_likelihood_adj = pints.LogNormalLogLikelihood(
+            cls.problem_single, mean_adjust=True)
+        cls.log_likelihood_multiple = pints.LogNormalLogLikelihood(
+            cls.problem_multiple)
+        cls.log_likelihood_multiple_adj = pints.LogNormalLogLikelihood(
+            cls.problem_multiple, mean_adjust=True)
+
+    def test_bad_constructor(self):
+        # tests that bad data types result in error
+        data = [0, 4, 5.5, 7.2]
+        problem = pints.SingleOutputProblem(
+            self.model_single, self.times, data)
+        self.assertRaises(ValueError, pints.LogNormalLogLikelihood,
+                          problem)
+
+    def test_call(self):
+        # test calls of log-likelihood
+
+        # single output problem
+        sigma = 1
+        mu = 3.7
+        log_like = self.log_likelihood([mu, sigma])
+        self.assertAlmostEqual(log_like, -10.164703123713256)
+        log_like_adj = self.log_likelihood_adj([mu, sigma])
+        self.assertAlmostEqual(log_like_adj, -11.129905368437115)
+
+        sigma = -1
+        log_like = self.log_likelihood([mu, sigma])
+        self.assertEqual(log_like, -np.inf)
+        log_like = self.log_likelihood([-1, sigma])
+
+        mu = -1
+        sigma = 1
+        log_like = self.log_likelihood([-1, sigma])
+        self.assertEqual(log_like, -np.inf)
+
+        # two dim output problem
+        mu1 = 1.5
+        mu2 = 3.4 / 2
+        sigma1 = 3
+        sigma2 = 1.2
+        log_like = self.log_likelihood_multiple([mu1, mu2, sigma1, sigma2])
+        self.assertAlmostEqual(log_like, -24.906992140695426)
+
+        # adjusts mean
+        log_like = self.log_likelihood_multiple_adj([mu1, mu2, sigma1, sigma2])
+        self.assertAlmostEqual(log_like, -32.48791585037583)
+
+        sigma1 = -1
+        log_like = self.log_likelihood_multiple([mu1, mu2, sigma1, sigma2])
+        self.assertEqual(log_like, -np.inf)
+
+    def test_evaluateS1(self):
+        # tests sensitivity
+
+        # single output problem
+        sigma = 1
+        mu = 3.7
+        y, dL = self.log_likelihood.evaluateS1([mu, sigma])
+        self.assertEqual(len(dL), 2)
+        y_call = self.log_likelihood([mu, sigma])
+        self.assertEqual(y, y_call)
+        correct_vals = [0.2514606728237081, -3.3495735543077423]
+        for i in range(len(dL)):
+            self.assertAlmostEqual(dL[i], correct_vals[i])
+
+        # mean-adjustment
+        y, dL = self.log_likelihood_adj.evaluateS1([mu, sigma])
+        self.assertEqual(len(dL), 2)
+        y_call = self.log_likelihood_adj([mu, sigma])
+        self.assertEqual(y, y_call)
+        correct_vals = [0.7920012133642484, -4.349573554307744]
+        for i in range(len(dL)):
+            self.assertAlmostEqual(dL[i], correct_vals[i])
+
+        sigma = -1
+        y, dL = self.log_likelihood.evaluateS1([mu, sigma])
+        self.assertEqual(y, -np.inf)
+        for dl in dL:
+            self.assertTrue(np.isnan(dL[i]))
+
+        mu = -1
+        sigma = 1
+        y, dL = self.log_likelihood.evaluateS1([mu, sigma])
+        self.assertEqual(y, -np.inf)
+        for dl in dL:
+            self.assertTrue(np.isnan(dL[i]))
+
+        # two dim output problem
+        mu1 = 1.5
+        mu2 = 3.4 / 2
+        sigma1 = 3
+        sigma2 = 1.2
+        y, dL = self.log_likelihood_multiple.evaluateS1(
+            [mu1, mu2, sigma1, sigma2])
+        self.assertEqual(len(dL), 4)
+        y_call = self.log_likelihood_multiple([mu1, mu2, sigma1, sigma2])
+        self.assertEqual(y, y_call)
+        # note that 2x needed for second output due to df / dtheta for
+        # constant model
+        correct_vals = [0.33643521004561316, 0.03675900403289047 * 2,
+                        -1.1262529182124121, -1.8628028462558714]
+        for i in range(len(dL)):
+            self.assertAlmostEqual(dL[i], correct_vals[i])
+
+        # mean-adjustment
+        y, dL = self.log_likelihood_multiple_adj.evaluateS1(
+            [mu1, mu2, sigma1, sigma2])
+        self.assertEqual(len(dL), 4)
+        y_call = self.log_likelihood_multiple_adj([mu1, mu2, sigma1, sigma2])
+        self.assertEqual(y, y_call)
+        # note that 2x needed for second output due to df / dtheta for
+        # constant model
+        correct_vals = [1.6697685433789466, 0.6249942981505375 * 2,
+                        -4.126252918212412, -3.062802846255874]
+        for i in range(len(dL)):
+            self.assertAlmostEqual(dL[i], correct_vals[i])
+
+        sigma2 = -2
+        y, dL = self.log_likelihood_multiple.evaluateS1(
+            [mu1, mu2, sigma1, sigma2])
+        self.assertEqual(y, -np.inf)
+        for dl in dL:
+            self.assertTrue(np.isnan(dL[i]))
+
+
 class TestMultiplicativeGaussianLogLikelihood(unittest.TestCase):
 
     @classmethod