Source code for optiml.ml.neural_network.losses

from abc import ABC

import autograd.numpy as np
from scipy.special import xlogy

from .activations import Linear
from .layers import ParamLayer
from .regularizers import L2
from ...opti import OptimizationFunction


[docs] class NeuralNetworkLoss(OptimizationFunction, ABC): """ Base abstract class for all neural network loss functions. It defines the objective minimized during training, i.e., the data loss averaged over the samples plus the layers regularization terms, together with its jacobian computed via back-propagation. Subclasses must implement ``loss`` and, optionally, override ``delta``. """ def __init__(self, neural_net, X, y): """ Parameters ---------- neural_net : `NeuralNetwork` instance The neural network estimator this loss is attached to. It provides the layers and the forward/backward passes used by the objective. X : ndarray of shape (n_samples, n_features) Training data over which the loss is evaluated. y : ndarray of shape (n_samples, n_outputs) Target values associated with ``X``. """ super(NeuralNetworkLoss, self).__init__(X.shape[1]) self.neural_net = neural_net self.X = X self.y = y
[docs] def args(self): return self.X, self.y
[docs] def loss(self, y_pred, y_true): raise NotImplementedError
[docs] def delta(self, y_pred, y_true): return y_pred - y_true
[docs] def function(self, packed_coef_inter, X_batch=None, y_batch=None): if X_batch is None: X_batch = self.X if y_batch is None: y_batch = self.y self.neural_net._unpack(packed_coef_inter) n_samples = X_batch.shape[0] coef_regs = sum(layer.coef_reg(layer.coef_) for layer in self.neural_net.layers if isinstance(layer, ParamLayer)) / (2 * n_samples) inter_regs = sum(layer.inter_reg(layer.inter_) for layer in self.neural_net.layers if isinstance(layer, ParamLayer) and layer.fit_intercept) / (2 * n_samples) return 1 / (2 * n_samples) * self.loss(self.neural_net.forward(X_batch), y_batch) + coef_regs + inter_regs
[docs] def jacobian(self, packed_coef_inter, X_batch=None, y_batch=None): if X_batch is None: X_batch = self.X if y_batch is None: y_batch = self.y self.neural_net._unpack(packed_coef_inter) n_samples = X_batch.shape[0] delta = 1 / n_samples * self.delta(self.neural_net.forward(X_batch), y_batch) return self.neural_net._pack(*self.neural_net.backward(delta))
[docs] class MeanSquaredError(NeuralNetworkLoss): r""" Compute the mean squared error loss for regression as: .. math:: L(y_{pred}, y_{true}) = \sum (y_{pred} - y_{true})^2 """
[docs] def x_star(self): if (len(self.neural_net.layers) == 1 and isinstance(self.neural_net.layers[-1].activation, Linear) and isinstance(self.neural_net.layers[-1].coef_reg, L2) and not self.neural_net.layers[-1].fit_intercept): if not hasattr(self, 'x_opt'): if self.neural_net.layers[-1].coef_reg.lmbda == 0.: self.x_opt = np.linalg.inv(self.X.T.dot(self.X)).dot(self.X.T).dot(self.y) else: self.x_opt = np.linalg.inv(self.X.T.dot(self.X) + np.eye(self.ndim) * self.neural_net.layers[-1].coef_reg.lmbda).dot(self.X.T).dot(self.y) return self.x_opt return np.full(fill_value=np.nan, shape=self.ndim)
[docs] def f_star(self): if not np.isnan(self.x_star()).all(): return self.function(self.x_star()) return np.inf
[docs] def loss(self, y_pred, y_true): return np.sum(np.square(y_pred - y_true))
[docs] class MeanAbsoluteError(NeuralNetworkLoss): r""" Compute the mean absolute error loss for regression as: .. math:: L(y_{pred}, y_{true}) = \sum \lvert y_{pred} - y_{true} \rvert """
[docs] def loss(self, y_pred, y_true): return np.sum(np.abs(y_pred - y_true))
[docs] def delta(self, y_pred, y_true): return np.sign(y_pred - y_true)
[docs] class BinaryCrossEntropy(NeuralNetworkLoss): r"""Binary Cross-Entropy aka Sigmoid Cross-Entropy loss function for binary and multi-label classification or regression between 0 and 1 with sigmoid output layer: .. math:: L(y_{pred}, y_{true}) = -\sum \left[ y_{true} \log(y_{pred}) + (1 - y_{true}) \log(1 - y_{pred}) \right] """
[docs] def loss(self, y_pred, y_true): return -np.sum(xlogy(y_true, y_pred) + xlogy(1. - y_true, 1. - y_pred))
[docs] class CategoricalCrossEntropy(NeuralNetworkLoss): r"""Categorical Cross-Entropy loss function for multi-class (single-label) classification with softmax output layer and one-hot encoded target data: .. math:: L(y_{pred}, y_{true}) = -\sum y_{true} \log(y_{pred}) """
[docs] def loss(self, y_pred, y_true): return -np.sum(xlogy(y_true, y_pred))
[docs] def delta(self, y_pred, y_true): # according to: https://deepnotes.io/softmax-crossentropy one_hot_mask = y_true.astype(bool) y_pred[one_hot_mask] -= 1. return y_pred
[docs] class SparseCategoricalCrossEntropy(NeuralNetworkLoss): """Sparse Categorical Cross-Entropy loss function for multi-class (single-label) classification with softmax output layer"""
[docs] def loss(self, y_pred, y_true): assert y_pred.shape[0] == y_true.shape[0] return -np.sum(np.log(y_pred[np.arange(y_pred.shape[0]), y_true.astype(int).ravel()]))
[docs] def delta(self, y_pred, y_true): y_pred[np.arange(y_pred.shape[0]), y_true.astype(int).ravel()] -= 1. return y_pred
mean_squared_error = MeanSquaredError mean_absolute_error = MeanAbsoluteError binary_cross_entropy = BinaryCrossEntropy categorical_cross_entropy = CategoricalCrossEntropy sparse_categorical_cross_entropy = SparseCategoricalCrossEntropy