Source code for optiml.opti.unconstrained.stochastic._base

import itertools
import warnings
from abc import ABC
from collections.abc import Iterable

import numpy as np
from sklearn.utils import shuffle

from .schedules import constant
from ... import Optimizer



[docs]
class StochasticOptimizer(Optimizer, ABC):
    """
    Abstract base class for stochastic (mini-batch) first-order optimizers.

    At each iteration the gradient is estimated on a mini batch of the data
    drawn from f.args() rather than on the whole sample, and the point is moved
    by a step controlled by a (possibly adaptive or scheduled) learning rate.
    The mini batches are produced by iterating over the data, optionally shuffled
    once per epoch, where an epoch is a full pass over all the samples.
    """

    def __init__(self,
                 f,
                 x=None,
                 step_size=0.01,
                 batch_size=None,
                 eps=1e-6,
                 tol=1e-8,
                 epochs=1000,
                 callback=None,
                 callback_args=(),
                 shuffle=True,
                 random_state=None,
                 verbose=False):
        """

        :param f:             the objective function.
        :param x:             ([n x 1] real column vector): the point where to start the algorithm from.
        :param step_size:     (real scalar > 0, callable or iterable, optional, default value 0.01): the
                              learning rate, i.e., the size of the step taken along the search direction.
                              It can be a positive scalar (kept constant for all iterations), a callable
                              with signature step_size(X_batch, y_batch) returning an iterator (e.g., one
                              of the schedules in schedules.py) or an iterable yielding the step size to
                              use at each iteration.
        :param batch_size:    (integer scalar or None, optional, default value None): the size of the mini
                              batches used to estimate the gradient. If None the full sample is used at
                              every iteration (i.e., plain batch gradient descent); otherwise it is clipped
                              to lie in [1, n_samples].
        :param eps:           (real scalar, optional, default value 1e-6): the accuracy in the stopping
                              criterion: the algorithm is stopped when the norm of the gradient is less
                              than or equal to eps.
        :param tol:           (real scalar, optional, default value 1e-8): the tolerance used in the
                              optimality conditions of the Lagrangian dual (when f is a Lagrangian dual
                              function), i.e., the algorithm is stopped when the variables or the
                              constraints change by less than tol.
        :param epochs:        (integer scalar, optional, default value 1000): the maximum number of epochs,
                              i.e., full passes over the whole sample, before the algorithm is stopped.
        :param callback:      (callable, optional, default value None): a function called at each iteration
                              with the optimizer instance (and callback_args) as arguments; it can raise
                              StopIteration to interrupt the optimization.
        :param callback_args: (tuple, optional, default value ()): additional positional arguments passed
                              to the callback at each call.
        :param shuffle:       (boolean, optional, default value True): whether to shuffle the order of the
                              mini batches at the beginning of each epoch.
        :param random_state:  (integer scalar or None, optional, default value None): seed for the random
                              number generator used to initialize x (when not provided) and to shuffle the
                              mini batches, for reproducibility.
        :param verbose:       (boolean or integer, optional, default value False): print details about each
                              iteration if True (or every `verbose` epochs if an integer), nothing otherwise.
        """
        super(StochasticOptimizer, self).__init__(f=f,
                                                  x=x,
                                                  eps=eps,
                                                  tol=tol,
                                                  max_iter=epochs,
                                                  callback=callback,
                                                  callback_args=callback_args,
                                                  random_state=random_state,
                                                  verbose=verbose)
        if not callable(step_size) and not isinstance(step_size, Iterable) and not step_size > 0:
            raise ValueError('step_size must be > 0 or a callable or an iterator')
        if isinstance(step_size, Iterable):  # wrap the iterable into a function to deal with mini batches, i.e., *args
            self.step_size = lambda *args: step_size
        elif callable(step_size):  # i.e., step_size(X_batch, y_batch)
            self.step_size = step_size
        else:
            self.step_size = lambda *args: constant(step_size)
        self.epochs = epochs
        self.epoch = 0
        self.shuffle = shuffle
        self.step = 0

        if batch_size is None:
            self.batch_size = None
            self.batches = itertools.repeat(f.args())
        else:
            n_samples = len(f.args()[0])

            if batch_size < 1 or batch_size > n_samples:
                warnings.warn('Got `batch_size` less than 1 or larger than '
                              'sample size. It is going to be clipped.')
            self.batch_size = np.clip(batch_size, 1, n_samples)

            self.n_batches, rest = divmod(len(f.args()[0]), self.batch_size)
            if rest:
                self.n_batches += 1

            self.max_iter *= self.n_batches

            self.batches = (i for i in self.iter_mini_batches())


[docs]
    def iter_mini_batches(self):
        """
        Return an iterator that successively yields tuples of aligned mini batches
        of size ``batch_size`` from the sliceable arrays returned by ``f.args()``,
        in random order (when ``shuffle`` is True) without replacement.

        :return: an infinite iterator of mini batches (one tuple of aligned slices per step).
        """

        while True:
            idx = list(range(self.n_batches))
            while True:
                if self.shuffle:
                    shuffle(idx, random_state=self.random_state)
                for i in idx:
                    start = i * self.batch_size
                    stop = (i + 1) * self.batch_size
                    yield [param[slice(start, stop)] for param in self.f.args()]



[docs]
    def is_batch_end(self):
        return (self.batch_size is None or self.batch_size == len(self.f.args()[0])
                or (self.iter and not self.iter % self.n_batches))



[docs]
    def is_verbose(self):
        return self.verbose and not self.epoch % self.verbose


    def _print_header(self):
        if self.verbose:
            print('epoch\titer\t cost\t', end='')
            if self.f.f_star() < np.inf:
                print('\t gap\t\t rate', end='')
                self.prev_f_x = np.inf

    def _print_info(self):
        if self.is_batch_end() and self.is_verbose():
            print('\n{:4d}\t{:4d}\t{: 1.4e}'.format(self.epoch, self.iter, self.f_x), end='')
            if self.f.f_star() < np.inf:
                print('\t{: 1.4e}'.format((self.f_x - self.f.f_star()) /
                                          max(abs(self.f.f_star()), 1)), end='')
                if self.prev_f_x < np.inf:
                    print('\t{: 1.4e}'.format((self.f_x - self.f.f_star()) /
                                              max(abs(self.prev_f_x - self.f.f_star()), 1)), end='')
                else:
                    print('\t\t', end='')
                self.prev_f_x = self.f_x




[docs]
class StochasticMomentumOptimizer(StochasticOptimizer, ABC):
    """
    Abstract base class for stochastic optimizers that support a momentum term.

    In addition to the plain stochastic step it keeps a velocity that accumulates
    an exponentially decaying fraction of the past steps, which damps oscillations
    and accelerates convergence along consistent descent directions. Both the
    classical heavy-ball (Polyak) momentum and Nesterov's accelerated momentum are
    supported, as selected by momentum_type.
    """

    def __init__(self,
                 f,
                 x=None,
                 step_size=0.01,
                 momentum_type='none',
                 momentum=0.9,
                 batch_size=None,
                 eps=1e-6,
                 tol=1e-8,
                 epochs=1000,
                 callback=None,
                 callback_args=(),
                 shuffle=True,
                 random_state=None,
                 verbose=False):
        """

        :param f:             the objective function.
        :param x:             ([n x 1] real column vector): the point where to start the algorithm from.
        :param step_size:     (real scalar > 0, callable or iterable, optional, default value 0.01): the
                              learning rate, i.e., the size of the step taken along the search direction
                              (see StochasticOptimizer for the accepted forms).
        :param momentum_type: (string in {'none', 'polyak', 'nesterov'}, optional, default value 'none'):
                              the kind of momentum to apply: 'none' disables momentum, 'polyak' uses the
                              classical heavy-ball momentum and 'nesterov' uses Nesterov's accelerated
                              momentum (the gradient is evaluated after the momentum jump).
        :param momentum:      (real scalar in [0, 1) or iterable, optional, default value 0.9): the momentum
                              factor, i.e., the fraction of the previous step retained in the current one.
                              It can be a scalar (kept constant) or an iterable yielding the value to use at
                              each iteration.
        :param batch_size:    (integer scalar or None, optional, default value None): the size of the mini
                              batches used to estimate the gradient; if None the full sample is used.
        :param eps:           (real scalar, optional, default value 1e-6): the accuracy in the stopping
                              criterion: the algorithm is stopped when the norm of the gradient is less
                              than or equal to eps.
        :param tol:           (real scalar, optional, default value 1e-8): the tolerance used in the
                              optimality conditions of the Lagrangian dual (when f is a Lagrangian dual).
        :param epochs:        (integer scalar, optional, default value 1000): the maximum number of epochs
                              before the algorithm is stopped.
        :param callback:      (callable, optional, default value None): a function called at each iteration
                              with the optimizer instance (and callback_args) as arguments; it can raise
                              StopIteration to interrupt the optimization.
        :param callback_args: (tuple, optional, default value ()): additional positional arguments passed
                              to the callback at each call.
        :param shuffle:       (boolean, optional, default value True): whether to shuffle the order of the
                              mini batches at the beginning of each epoch.
        :param random_state:  (integer scalar or None, optional, default value None): seed for the random
                              number generator, for reproducibility.
        :param verbose:       (boolean or integer, optional, default value False): print details about each
                              iteration if True (or every `verbose` epochs if an integer), nothing otherwise.
        """
        super(StochasticMomentumOptimizer, self).__init__(f=f,
                                                          x=x,
                                                          step_size=step_size,
                                                          batch_size=batch_size,
                                                          eps=eps,
                                                          tol=tol,
                                                          epochs=epochs,
                                                          callback=callback,
                                                          callback_args=callback_args,
                                                          shuffle=shuffle,
                                                          random_state=random_state,
                                                          verbose=verbose)
        if momentum_type not in ('polyak', 'nesterov', 'none'):
            raise ValueError(f'unknown momentum type {momentum_type}')
        self.momentum_type = momentum_type
        if not isinstance(momentum, Iterable) and not 0 <= momentum < 1:
            raise ValueError('momentum must be between 0 and 1 or an iterator')
        if isinstance(momentum, Iterable):
            self.momentum = momentum
        else:
            self.momentum = constant(momentum)