from abc import ABC
import autograd.numpy as np
from scipy.special import xlogy
from .activations import Linear
from .layers import ParamLayer
from .regularizers import L2
from ...opti import OptimizationFunction
[docs]
class NeuralNetworkLoss(OptimizationFunction, ABC):
"""
Base abstract class for all neural network loss functions. It defines the
objective minimized during training, i.e., the data loss averaged over the
samples plus the layers regularization terms, together with its jacobian
computed via back-propagation.
Subclasses must implement ``loss`` and, optionally, override ``delta``.
"""
def __init__(self, neural_net, X, y):
"""
Parameters
----------
neural_net : `NeuralNetwork` instance
The neural network estimator this loss is attached to. It provides
the layers and the forward/backward passes used by the objective.
X : ndarray of shape (n_samples, n_features)
Training data over which the loss is evaluated.
y : ndarray of shape (n_samples, n_outputs)
Target values associated with ``X``.
"""
super(NeuralNetworkLoss, self).__init__(X.shape[1])
self.neural_net = neural_net
self.X = X
self.y = y
[docs]
def args(self):
return self.X, self.y
[docs]
def loss(self, y_pred, y_true):
raise NotImplementedError
[docs]
def delta(self, y_pred, y_true):
return y_pred - y_true
[docs]
def function(self, packed_coef_inter, X_batch=None, y_batch=None):
if X_batch is None:
X_batch = self.X
if y_batch is None:
y_batch = self.y
self.neural_net._unpack(packed_coef_inter)
n_samples = X_batch.shape[0]
coef_regs = sum(layer.coef_reg(layer.coef_) for layer in self.neural_net.layers
if isinstance(layer, ParamLayer)) / (2 * n_samples)
inter_regs = sum(layer.inter_reg(layer.inter_) for layer in self.neural_net.layers
if isinstance(layer, ParamLayer) and layer.fit_intercept) / (2 * n_samples)
return 1 / (2 * n_samples) * self.loss(self.neural_net.forward(X_batch), y_batch) + coef_regs + inter_regs
[docs]
def jacobian(self, packed_coef_inter, X_batch=None, y_batch=None):
if X_batch is None:
X_batch = self.X
if y_batch is None:
y_batch = self.y
self.neural_net._unpack(packed_coef_inter)
n_samples = X_batch.shape[0]
delta = 1 / n_samples * self.delta(self.neural_net.forward(X_batch), y_batch)
return self.neural_net._pack(*self.neural_net.backward(delta))
[docs]
class MeanSquaredError(NeuralNetworkLoss):
r"""
Compute the mean squared error loss for regression as:
.. math::
L(y_{pred}, y_{true}) = \sum (y_{pred} - y_{true})^2
"""
[docs]
def x_star(self):
if (len(self.neural_net.layers) == 1 and
isinstance(self.neural_net.layers[-1].activation, Linear) and
isinstance(self.neural_net.layers[-1].coef_reg, L2) and
not self.neural_net.layers[-1].fit_intercept):
if not hasattr(self, 'x_opt'):
if self.neural_net.layers[-1].coef_reg.lmbda == 0.:
self.x_opt = np.linalg.inv(self.X.T.dot(self.X)).dot(self.X.T).dot(self.y)
else:
self.x_opt = np.linalg.inv(self.X.T.dot(self.X) + np.eye(self.ndim) *
self.neural_net.layers[-1].coef_reg.lmbda).dot(self.X.T).dot(self.y)
return self.x_opt
return np.full(fill_value=np.nan, shape=self.ndim)
[docs]
def f_star(self):
if not np.isnan(self.x_star()).all():
return self.function(self.x_star())
return np.inf
[docs]
def loss(self, y_pred, y_true):
return np.sum(np.square(y_pred - y_true))
[docs]
class MeanAbsoluteError(NeuralNetworkLoss):
r"""
Compute the mean absolute error loss for regression as:
.. math::
L(y_{pred}, y_{true}) = \sum \lvert y_{pred} - y_{true} \rvert
"""
[docs]
def loss(self, y_pred, y_true):
return np.sum(np.abs(y_pred - y_true))
[docs]
def delta(self, y_pred, y_true):
return np.sign(y_pred - y_true)
[docs]
class BinaryCrossEntropy(NeuralNetworkLoss):
r"""Binary Cross-Entropy aka Sigmoid Cross-Entropy loss
function for binary and multi-label classification
or regression between 0 and 1 with sigmoid output layer:
.. math::
L(y_{pred}, y_{true}) = -\sum \left[ y_{true} \log(y_{pred}) +
(1 - y_{true}) \log(1 - y_{pred}) \right]
"""
[docs]
def loss(self, y_pred, y_true):
return -np.sum(xlogy(y_true, y_pred) + xlogy(1. - y_true, 1. - y_pred))
[docs]
class CategoricalCrossEntropy(NeuralNetworkLoss):
r"""Categorical Cross-Entropy loss function for multi-class (single-label)
classification with softmax output layer and one-hot encoded target data:
.. math::
L(y_{pred}, y_{true}) = -\sum y_{true} \log(y_{pred})
"""
[docs]
def loss(self, y_pred, y_true):
return -np.sum(xlogy(y_true, y_pred))
[docs]
def delta(self, y_pred, y_true):
# according to: https://deepnotes.io/softmax-crossentropy
one_hot_mask = y_true.astype(bool)
y_pred[one_hot_mask] -= 1.
return y_pred
[docs]
class SparseCategoricalCrossEntropy(NeuralNetworkLoss):
"""Sparse Categorical Cross-Entropy loss function for multi-class
(single-label) classification with softmax output layer"""
[docs]
def loss(self, y_pred, y_true):
assert y_pred.shape[0] == y_true.shape[0]
return -np.sum(np.log(y_pred[np.arange(y_pred.shape[0]), y_true.astype(int).ravel()]))
[docs]
def delta(self, y_pred, y_true):
y_pred[np.arange(y_pred.shape[0]), y_true.astype(int).ravel()] -= 1.
return y_pred
mean_squared_error = MeanSquaredError
mean_absolute_error = MeanAbsoluteError
binary_cross_entropy = BinaryCrossEntropy
categorical_cross_entropy = CategoricalCrossEntropy
sparse_categorical_cross_entropy = SparseCategoricalCrossEntropy