| // Copyright 2014 Google Inc. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // Author: nevena@google.com (Nevena Lazic) |
| // |
| // GradientEvaluator is a class for computing the value and gradient of a loss |
| // LossFunction on a labeled dataset {(instance_i, label_i)}, given parameters |
| // 'weights'. Its methods are called by gradient descent algorithms implementing |
| // the LossMinimizer interface. |
| |
| #pragma once |
| |
| #include <algorithm> |
| #include <mutex> |
| #include <string> |
| #include <vector> |
| |
| #include "lossmin/eigen-types.h" |
| #include "lossmin/losses/loss-function.h" |
| |
| class BlockingCounter; |
| |
| namespace lossmin { |
| |
| class GradientEvaluator { |
| public: |
| // Constructor sets up the dataset and the loss function. |
| GradientEvaluator(const InstanceSet &instances, const LabelSet &labels, |
| const LossFunction *loss_function) |
| : instances_(instances), |
| instances_transposed_(instances.transpose()), |
| labels_(labels), |
| loss_function_(loss_function) {} |
| |
| virtual ~GradientEvaluator() {} |
| |
| // Returns the loss for given parameters 'weights'. Multi-threading is used |
| // if num_threads_ > 1. |
| virtual double Loss(const Weights &weights) const; |
| |
| // Returns the loss for given parameters 'weights' and a subset of examples |
| // 'example_indices'. |
| virtual double Loss(const Weights &weights, |
| const std::vector<int> &example_indices) const; |
| |
| // Returns the loss for given parameters 'weights' and a different |
| // dataset (typically used for validation). |
| virtual double Loss(const Weights &weights, |
| const InstanceSet &validation_instances, |
| const LabelSet &validation_labels) const; |
| |
| // Computes the gradient wrt the given parameters 'weights'. 'gradient' is |
| // owned by the caller and should be initialized to zero. |
| // Multithreading is used if 'num_threads' > 1. The training examples are |
| // divided into 'num_batches' batches; each thread computes the gradient of a |
| // batch, adds it to 'gradient', and takes the next batch. These updates are |
| // asynchronous, and behaviour is non-deterministic. |
| virtual void Gradient(const Weights &weights, Weights *gradient) const; |
| |
| // Adds the gradient wrt 'weight_scale * weights' for 'example' to the vector |
| // 'gradient' in place. The gradient is scaled by 'example_scale'. |
| virtual void AddExampleGradient(const Weights &weights, int example, |
| double weights_scale, double example_scale, |
| Weights *gradient) const { |
| loss_function_->AddExampleGradient(weights, instances_, labels_, example, |
| weights_scale, example_scale, gradient); |
| } |
| |
| // Returns the gradient wrt 'weights' as a vector<pair<int, double>> rather |
| // than Eigen::SparseVector<double>, since Eigen is very inefficient with |
| // sparse vectors. This is only necessary if running SGDAdaGrad. |
| virtual void ExampleGradient( |
| const Weights &weights, int example, double weights_scale, |
| double example_scale, |
| std::vector<std::pair<int, double>> *example_gradient) const { |
| loss_function_->ExampleGradient(weights, instances_, labels_, example, |
| weights_scale, example_scale, |
| example_gradient); |
| } |
| |
| // Returns the number of examples in the dataset. |
| virtual int NumExamples() const { return instances_.rows(); } |
| |
| // Returns the number of features. |
| virtual int NumFeatures() const { return instances_.cols(); } |
| |
| // Returns the number of weights for the given number of features. |
| virtual int NumWeights() const { |
| return loss_function_->NumWeights(NumFeatures()); |
| } |
| |
| // Returns an upper bound on the curvature of the loss function. Used to set |
| // the learning rate of some LossMinimizer algorithms. |
| virtual double LossCurvature() const { |
| return loss_function_->LossCurvature(instances_); |
| } |
| |
| // Returns the per-coordinate curvature of the data. Used to set the learning |
| // rates of ParallelBoostingWithMomentum. |
| virtual void PerCoordinateCurvature( |
| VectorXd *per_coordinate_curvature) const { |
| loss_function_->PerCoordinateCurvature(instances_, |
| per_coordinate_curvature); |
| } |
| |
| // Returns sparsity, defined as the maximum instance l0 norm. Used to help |
| // set learning rates in ParallelBoostingWithMomentum. |
| double Sparsity() const { |
| typename Instance::Index sparsity = 0; |
| for (int i = 0; i < instances_.rows(); ++i) { |
| sparsity = std::max(sparsity, instances_.innerVector(i).nonZeros()); |
| } |
| return static_cast<double>(sparsity); |
| } |
| |
| // Returns the loss function. |
| const LossFunction *loss_function() const { return loss_function_; } |
| |
| // Returns the instances. |
| const InstanceSet &instances() const { return instances_; } |
| |
| // Returns the transpose pf instances. |
| const InstanceSet &instances_transposed() const { |
| return instances_transposed_; |
| } |
| |
| // Returns the labels. |
| const LabelSet &labels() const { return labels_; } |
| |
| private: |
| // Training instances. |
| const InstanceSet &instances_; |
| |
| // The transpose of instances. This is needed for fast gradient computations |
| // and should be computed once so it is computed at construction (not each |
| // time gradient is computed) |
| const InstanceSet instances_transposed_; |
| |
| // Instance labels. |
| const LabelSet &labels_; |
| |
| // Function for computing the loss and gradient of a single training example. |
| // Not owned. |
| const LossFunction *loss_function_; |
| }; |
| |
| } // namespace lossmin |