blob: df070ccd9841dadc104e11723ffec8bf3b1ff87b [file] [log] [blame]
// Copyright 2014 Google Inc. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Author: nevena@google.com (Nevena Lazic)
//
// GradientEvaluator is a class for computing the value and gradient of a
// loss function on a labeled dataset {(instance_i, label_i)}, given parameters
// 'weights'. Its methods are called by gradient descent algorithms implementing
// the LossMinimizer interface.
//
// By default the loss function (whose value and gradient are computed), is the
// linear regression objective function:
//
// f(x) == (0.5 / N) * || A * x - b ||_2^2,
//
// where x == 'weights', A = instances_, b == labels_, N = instances_.rows().
#ifndef COBALT_UTIL_LOSSMIN_MINIMIZERS_GRADIENT_EVALUATOR_H_
#define COBALT_UTIL_LOSSMIN_MINIMIZERS_GRADIENT_EVALUATOR_H_
#include <algorithm>
#include <mutex>
#include <string>
#include <vector>
#include "util/lossmin/eigen-types.h"
namespace cobalt_lossmin {
class GradientEvaluator {
public:
// Constructor sets up the dataset.
GradientEvaluator(const InstanceSet &instances, const LabelSet &labels)
: instances_(instances),
instances_transposed_(instances.transpose()),
labels_(labels) {}
virtual ~GradientEvaluator() {}
// Returns the residual between the predicted labels at 'weights', and
// labels_. The default implementation returns A * x - b, where A ==
// instances_, x = 'weights', b == labels_. The implementation exploits
// sparsity using iterators over rows of instances_ to implement matrix-vector
// multiplication straight from definition.
// Note: It is not as efficient as calling
// Eigen's matrix-vector multiply directly but can potentially be easily
// parallelized (when parallel implementation is desired but open mp is not
// available).
virtual Weights Residual(const Weights &weights) const;
// Returns the loss for given parameters 'weights'.
// The default implementation computes the normalized norm of vector returned
// by Residual(weights): 0.5 / N * || A * x - y ||_2^2, where A == instances_
// y == labels_, x == 'weights', N = instances_.rows().
virtual double Loss(const Weights &weights) const;
// This is the same as Loss except (by default) instead of calling Residual,
// it uses Eigen's efficient matrix-vector multiplication directly.
// Note: It can be parallelized using open mp, if it is supported.
virtual double SparseLoss(const Weights &weights) const;
// Computes the gradient wrt the given parameters 'weights'. 'gradient' is
// owned by the caller and should be initialized to zero.
// The default implementation computes (1 / N) * A^T (A * x - b) where A ==
// instances_ y == labels_, x == 'weights', N = instances_.rows(). The default
// implementation exploits sparsity using iterators over rows of
// instances_transposed_ to implement matrix-vector multiplication straight
// from definition, in the same way as the default implementation of
// Residual().
// Note: It is not as efficient as calling Eigen's matrix-vector
// multiply directly but can potentially be easily parallelized (when parallel
// implementation is desired but open mp is not supported).
virtual void Gradient(const Weights &weights, Weights *gradient) const;
// This is the same as Gradient except it uses (by default)
// Eigen's efficient matrix-vector multiplication directly.
// Note: It can be parallelized using open mp, if it is supported.
virtual void SparseGradient(const Weights &weights, Weights *gradient) const;
// Returns the number of examples in the dataset.
int NumExamples() const { return instances_.rows(); }
// Returns the number of features.
int NumFeatures() const { return instances_.cols(); }
// Alias for the number of features (these are often used interchangeably).
int NumWeights() const { return NumFeatures(); }
// Returns the per-coordinate curvature of the data. Used to set the learning
// rates of ParallelBoostingWithMomentum.
void PerCoordinateCurvature(VectorXd *per_coordinate_curvature) const;
// Returns sparsity, defined as the maximum instance l0 norm. Used to help
// set learning rates in ParallelBoostingWithMomentum.
// TODO(bazyli): exploit sparsity in the implementation.
double Sparsity() const;
// Returns the instances.
const InstanceSet &instances() const { return instances_; }
// Returns the transpose pf instances.
const InstanceSet &instances_transposed() const {
return instances_transposed_;
}
// Returns the labels.
const LabelSet &labels() const { return labels_; }
private:
// Training instances.
const InstanceSet &instances_;
// The transpose of instances. This is needed for fast gradient computations
// and should be computed once so it is initialized at construction (not each
// time gradient is computed).
const InstanceSet instances_transposed_;
// Instance labels.
const LabelSet &labels_;
};
} // namespace cobalt_lossmin
#endif // COBALT_UTIL_LOSSMIN_MINIMIZERS_GRADIENT_EVALUATOR_H_