Source code for optiml.ml.neural_network.losses

from abc import ABC

import autograd.numpy as np
from scipy.special import xlogy

from .activations import Linear
from .layers import ParamLayer
from .regularizers import L2
from ...opti import OptimizationFunction



[docs]
class NeuralNetworkLoss(OptimizationFunction, ABC):
    """
    Base abstract class for all neural network loss functions. It defines the
    objective minimized during training, i.e., the data loss averaged over the
    samples plus the layers regularization terms, together with its jacobian
    computed via back-propagation.

    Subclasses must implement ``loss`` and, optionally, override ``delta``.
    """

    def __init__(self, neural_net, X, y):
        """
        Parameters
        ----------

        neural_net : `NeuralNetwork` instance
            The neural network estimator this loss is attached to. It provides
            the layers and the forward/backward passes used by the objective.

        X : ndarray of shape (n_samples, n_features)
            Training data over which the loss is evaluated.

        y : ndarray of shape (n_samples, n_outputs)
            Target values associated with ``X``.
        """
        super(NeuralNetworkLoss, self).__init__(X.shape[1])
        self.neural_net = neural_net
        self.X = X
        self.y = y


[docs]
    def args(self):
        return self.X, self.y



[docs]
    def loss(self, y_pred, y_true):
        raise NotImplementedError



[docs]
    def delta(self, y_pred, y_true):
        return y_pred - y_true



[docs]
    def function(self, packed_coef_inter, X_batch=None, y_batch=None):
        if X_batch is None:
            X_batch = self.X
        if y_batch is None:
            y_batch = self.y

        self.neural_net._unpack(packed_coef_inter)

        n_samples = X_batch.shape[0]
        coef_regs = sum(layer.coef_reg(layer.coef_) for layer in self.neural_net.layers
                        if isinstance(layer, ParamLayer)) / (2 * n_samples)
        inter_regs = sum(layer.inter_reg(layer.inter_) for layer in self.neural_net.layers
                         if isinstance(layer, ParamLayer) and layer.fit_intercept) / (2 * n_samples)
        return 1 / (2 * n_samples) * self.loss(self.neural_net.forward(X_batch), y_batch) + coef_regs + inter_regs



[docs]
    def jacobian(self, packed_coef_inter, X_batch=None, y_batch=None):
        if X_batch is None:
            X_batch = self.X
        if y_batch is None:
            y_batch = self.y

        self.neural_net._unpack(packed_coef_inter)

        n_samples = X_batch.shape[0]
        delta = 1 / n_samples * self.delta(self.neural_net.forward(X_batch), y_batch)
        return self.neural_net._pack(*self.neural_net.backward(delta))





[docs]
class MeanSquaredError(NeuralNetworkLoss):
    r"""
    Compute the mean squared error loss for regression as:

    .. math::

        L(y_{pred}, y_{true}) = \sum (y_{pred} - y_{true})^2
    """


[docs]
    def x_star(self):
        if (len(self.neural_net.layers) == 1 and
                isinstance(self.neural_net.layers[-1].activation, Linear) and
                isinstance(self.neural_net.layers[-1].coef_reg, L2) and
                not self.neural_net.layers[-1].fit_intercept):
            if not hasattr(self, 'x_opt'):
                if self.neural_net.layers[-1].coef_reg.lmbda == 0.:
                    self.x_opt = np.linalg.inv(self.X.T.dot(self.X)).dot(self.X.T).dot(self.y)
                else:
                    self.x_opt = np.linalg.inv(self.X.T.dot(self.X) + np.eye(self.ndim) *
                                               self.neural_net.layers[-1].coef_reg.lmbda).dot(self.X.T).dot(self.y)
            return self.x_opt
        return np.full(fill_value=np.nan, shape=self.ndim)



[docs]
    def f_star(self):
        if not np.isnan(self.x_star()).all():
            return self.function(self.x_star())
        return np.inf



[docs]
    def loss(self, y_pred, y_true):
        return np.sum(np.square(y_pred - y_true))





[docs]
class MeanAbsoluteError(NeuralNetworkLoss):
    r"""
    Compute the mean absolute error loss for regression as:

    .. math::

        L(y_{pred}, y_{true}) = \sum \lvert y_{pred} - y_{true} \rvert
    """


[docs]
    def loss(self, y_pred, y_true):
        return np.sum(np.abs(y_pred - y_true))



[docs]
    def delta(self, y_pred, y_true):
        return np.sign(y_pred - y_true)





[docs]
class BinaryCrossEntropy(NeuralNetworkLoss):
    r"""Binary Cross-Entropy aka Sigmoid Cross-Entropy loss
    function for binary and multi-label classification
    or regression between 0 and 1 with sigmoid output layer:

    .. math::

        L(y_{pred}, y_{true}) = -\sum \left[ y_{true} \log(y_{pred}) +
        (1 - y_{true}) \log(1 - y_{pred}) \right]
    """


[docs]
    def loss(self, y_pred, y_true):
        return -np.sum(xlogy(y_true, y_pred) + xlogy(1. - y_true, 1. - y_pred))





[docs]
class CategoricalCrossEntropy(NeuralNetworkLoss):
    r"""Categorical Cross-Entropy loss function for multi-class (single-label)
    classification with softmax output layer and one-hot encoded target data:

    .. math::

        L(y_{pred}, y_{true}) = -\sum y_{true} \log(y_{pred})
    """


[docs]
    def loss(self, y_pred, y_true):
        return -np.sum(xlogy(y_true, y_pred))



[docs]
    def delta(self, y_pred, y_true):
        # according to: https://deepnotes.io/softmax-crossentropy
        one_hot_mask = y_true.astype(bool)
        y_pred[one_hot_mask] -= 1.
        return y_pred





[docs]
class SparseCategoricalCrossEntropy(NeuralNetworkLoss):
    """Sparse Categorical Cross-Entropy loss function for multi-class
    (single-label) classification with softmax output layer"""


[docs]
    def loss(self, y_pred, y_true):
        assert y_pred.shape[0] == y_true.shape[0]
        return -np.sum(np.log(y_pred[np.arange(y_pred.shape[0]), y_true.astype(int).ravel()]))



[docs]
    def delta(self, y_pred, y_true):
        y_pred[np.arange(y_pred.shape[0]), y_true.astype(int).ravel()] -= 1.
        return y_pred




mean_squared_error = MeanSquaredError
mean_absolute_error = MeanAbsoluteError
binary_cross_entropy = BinaryCrossEntropy
categorical_cross_entropy = CategoricalCrossEntropy
sparse_categorical_cross_entropy = SparseCategoricalCrossEntropy