Extend docstrings of loss functions (#64)

This PR revises documentation of the loss functions. Co-authored-by: Manoel Marques <Manoel.Marques@ibm.com> Co-authored-by: Anton Dekusar <62334182+adekusar-drl@users.noreply.github.com> Co-authored-by: Anton Dekusar <adekusar@ie.ibm.com>
qiskit-community · Jun 16, 2021 · df5d9f8 · df5d9f8
1 parent 07c416c
commit df5d9f8
Show file tree

Hide file tree

Showing 4 changed files with 105 additions and 42 deletions.
diff --git a/.pylintdict b/.pylintdict
@@ -22,6 +22,7 @@ codec
 config
 contravariance
 creg
+crossentropyloss
 csr
 ctrl
 ctx

diff --git a/qiskit_machine_learning/algorithms/objective_functions.py b/qiskit_machine_learning/algorithms/objective_functions.py
@@ -111,8 +111,8 @@ def objective(self, weights: np.ndarray) -> float:
         # predict is of shape (N, 1), where N is a number of samples
         predict = self._neural_network_forward(weights)
         target = np.array(self._y).reshape(predict.shape)
-        value = np.sum(self._loss(predict, target))
-        return value
+        # float(...) is for mypy compliance
+        return float(np.sum(self._loss(predict, target)))
 
     def gradient(self, weights: np.ndarray) -> np.ndarray:
         # check that we have supported output shape
@@ -125,14 +125,15 @@ def gradient(self, weights: np.ndarray) -> np.ndarray:
         # weight grad is of shape (N, 1, num_weights)
         _, weight_grad = self._neural_network.backward(self._X, weights)
 
-        grad = np.zeros((1, self._neural_network.num_weights))
         # we reshape _y since the output has the shape (N, 1) and _y has (N,)
         # loss_gradient is of shape (N, 1)
         loss_gradient = self._loss.gradient(output, self._y.reshape(-1, 1))
 
         # for the output we compute a dot product(matmul) of loss gradient for this output
         # and weights for this output.
-        grad += loss_gradient[:, 0] @ weight_grad[:, 0, :]
+        grad = loss_gradient[:, 0] @ weight_grad[:, 0, :]
+        # we keep the shape of (1, num_weights)
+        grad = grad.reshape(1, -1)
 
         return grad
 
@@ -183,9 +184,8 @@ class OneHotObjectiveFunction(ObjectiveFunction):
     def objective(self, weights: np.ndarray) -> float:
         # probabilities is of shape (N, num_outputs)
         probs = self._neural_network_forward(weights)
-        val = np.sum(self._loss(probs, self._y))
-
-        return val
+        # float(...) is for mypy compliance
+        return float(np.sum(self._loss(probs, self._y)))
 
     def gradient(self, weights: np.ndarray) -> np.ndarray:
         # predict is of shape (N, num_outputs)

diff --git a/qiskit_machine_learning/utils/loss_functions/loss_functions.py b/qiskit_machine_learning/utils/loss_functions/loss_functions.py
@@ -13,79 +13,141 @@
 """ Loss utilities """
 
 from abc import ABC, abstractmethod
+
 import numpy as np
 
 from ...exceptions import QiskitMachineLearningError
 
 
 class Loss(ABC):
     """
-    Abstract base class for Loss.
+    Abstract base class for computing Loss.
     """
 
-    def __call__(self, predict, target):
+    def __call__(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
+        """
+        This method calls the ``evaluate`` method. This is a convenient method to compute loss.
+        """
         return self.evaluate(predict, target)
 
     @abstractmethod
-    def evaluate(self, predict, target):
-        """evaluate"""
-        raise NotImplementedError
-
-    @abstractmethod
-    def gradient(self, predict, target):
-        """gradient"""
+    def evaluate(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
+        """
+        An abstract method for evaluating the loss function. Inputs are expected in a shape
+        of ``(N, *)``. Where ``N`` is a number of samples. Loss is computed for each sample
+        individually.
+
+        Args:
+            predict: an array of predicted values using the model.
+            target: an array of the true values.
+
+        Returns:
+            An array with values of the loss function of the shape ``(N, 1)``.
+
+        Raises:
+            QiskitMachineLearningError: shapes of predict and target do not match
+        """
         raise NotImplementedError
 
     @staticmethod
-    def _validate(predict, target):
-        predict = np.asarray(predict)
-        target = np.asarray(target)
+    def _validate_shapes(predict: np.ndarray, target: np.ndarray) -> None:
+        """
+        Validates that shapes of both parameters are identical.
+
+        Args:
+            predict: an array of predicted values using the model
+            target: an array of the true values
+
+        Raises:
+            QiskitMachineLearningError: shapes of predict and target do not match.
+        """
+
         if predict.shape != target.shape:
             raise QiskitMachineLearningError(
                 f"Shapes don't match, predict: {predict.shape}, target: {target.shape}!"
             )
-        return predict, target
+
+    @abstractmethod
+    def gradient(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
+        """
+        An abstract method for computing the gradient. Inputs are expected in a shape
+        of ``(N, *)``. Where ``N`` is a number of samples. Gradient is computed for each sample
+        individually.
+
+        Args:
+            predict: an array of predicted values using the model.
+            target: an array of the true values.
+
+        Returns:
+            An array with gradient values of the shape ``(N, *)``. The output shape depends on
+            the loss function.
+
+        Raises:
+            QiskitMachineLearningError: shapes of predict and target do not match.
+        """
+        raise NotImplementedError
 
 
 class L1Loss(Loss):
-    """L1Loss"""
+    r"""
+    This class computes the L1 loss for each sample as:
+
+    .. math::
 
-    def evaluate(self, predict, target):
-        predict, target = self._validate(predict, target)
+        \text{L1Loss}(predict, target) = \sum_{i=0}^{N_{\text{elements}}} \left| predict_i -
+        target_i \right|.
+    """
+
+    def evaluate(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
+        self._validate_shapes(predict, target)
 
         if len(predict.shape) <= 1:
             return np.abs(predict - target)
         else:
             return np.linalg.norm(predict - target, ord=1, axis=tuple(range(1, len(predict.shape))))
 
-    def gradient(self, predict, target):
-        predict, target = self._validate(predict, target)
+    def gradient(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
+        self._validate_shapes(predict, target)
 
         return np.sign(predict - target)
 
 
 class L2Loss(Loss):
-    """L2Loss"""
+    r"""
+    This class computes the L2 loss for each sample as:
+
+    .. math::
+
+        \text{L2Loss}(predict, target) = \sum_{i=0}^{N_{\text{elements}}} (predict_i - target_i)^2.
+
+    """
 
-    def evaluate(self, predict, target):
-        predict, target = self._validate(predict, target)
+    def evaluate(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
+        self._validate_shapes(predict, target)
 
         if len(predict.shape) <= 1:
             return (predict - target) ** 2
         else:
             return np.linalg.norm(predict - target, axis=tuple(range(1, len(predict.shape)))) ** 2
 
-    def gradient(self, predict, target):
-        predict, target = self._validate(predict, target)
+    def gradient(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
+        self._validate_shapes(predict, target)
 
         return 2 * (predict - target)
 
 
 class CrossEntropyLoss(Loss):
-    """CrossEntropyLoss"""
+    r"""
+    This class computes the cross entropy loss for each sample as:
 
-    def evaluate(self, predict, target):
-        predict, target = self._validate(predict, target)
+    .. math::
+
+        \text{CrossEntropyLoss}(predict, target) = -\sum_{i=0}^{N_{\text{classes}}}
+        target_i * log(predict_i).
+    """
+
+    def evaluate(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
+        self._validate_shapes(predict, target)
         if len(predict.shape) == 1:
             predict = predict.reshape(1, -1)
             target = target.reshape(1, -1)
@@ -97,10 +159,10 @@ def evaluate(self, predict, target):
 
         return val
 
-    def gradient(self, predict, target):
+    def gradient(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
         """Assume softmax is used, and target vector may or may not be one-hot encoding"""
 
-        predict, target = self._validate(predict, target)
+        self._validate_shapes(predict, target)
         if len(predict.shape) == 1:
             predict = predict.reshape(1, -1)
             target = target.reshape(1, -1)
@@ -113,10 +175,12 @@ def gradient(self, predict, target):
 
 
 class CrossEntropySigmoidLoss(Loss):
-    """This is used for binary classification"""
+    """
+    This class computes the cross entropy sigmoid loss and should be used for binary classification.
+    """
 
-    def evaluate(self, predict, target):
-        predict, target = self._validate(predict, target)
+    def evaluate(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
+        self._validate_shapes(predict, target)
 
         if len(set(target)) != 2:
             raise QiskitMachineLearningError(
@@ -126,8 +190,8 @@ def evaluate(self, predict, target):
         x = CrossEntropyLoss()
         return 1.0 / (1.0 + np.exp(-x.evaluate(predict, target)))
 
-    def gradient(self, predict, target):
-        predict, target = self._validate(predict, target)
+    def gradient(self, predict: np.ndarray, target: np.ndarray) -> np.ndarray:
+        self._validate_shapes(predict, target)
 
         return target * (1.0 / (1.0 + np.exp(-predict)) - 1) + (1 - target) * (
             1.0 / (1.0 + np.exp(-predict))

diff --git a/test/utils/loss_functions/test_loss_functions.py b/test/utils/loss_functions/test_loss_functions.py
@@ -27,10 +27,8 @@ class TestLossFunctions(QiskitMachineLearningTestCase):
 
     @data(
         # input shape, loss shape
-        (None, (), "l1"),
         ((5,), (5,), "l1"),
         ((5, 2), (5,), "l1"),
-        (None, (), "l2"),
         ((5,), (5,), "l2"),
         ((5, 2), (5,), "l2"),
     )
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,6 +22,7 @@ codec @@
     config
     contravariance
     creg
+    crossentropyloss
     csr
     ctrl
     ctx
@@ Expand Down @@