| // Copyright 2014 Google Inc. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // Author: nevena@google.com (Nevena Lazic) |
| // |
| // GradientEvaluator is a class for computing the value and gradient of a loss |
| // LossFunction on a labeled dataset {(instance_i, label_i)}, given parameters |
| // 'weights'. Its methods are called by gradient descent algorithms implementing |
| // the LossMinimizer interface. |
| |
| #pragma once |
| |
| #include <algorithm> |
| #include <mutex> |
| #include <string> |
| #include <vector> |
| |
| #include "lossmin/eigen-types.h" |
| #include "lossmin/losses/loss-function.h" |
| |
| class BlockingCounter; |
| |
| namespace lossmin { |
| |
| class GradientEvaluator { |
| public: |
| // Constructor sets up the dataset and the loss function. |
| GradientEvaluator(const InstanceSet &instances, const LabelSet &labels, |
| const LossFunction *loss_function) |
| : instances_(instances), labels_(labels), |
| loss_function_(loss_function) {} |
| |
| virtual ~GradientEvaluator() {} |
| |
| // Returns the loss for given parameters 'weights'. Multi-threading is used |
| // if num_threads_ > 1. |
| virtual float Loss(const Weights &weights) const; |
| |
| // Returns the loss for given parameters 'weights' and a subset of examples |
| // 'example_indices'. |
| virtual float Loss(const Weights &weights, |
| const std::vector<int> &example_indices) const; |
| |
| // Returns the loss for given parameters 'weights' and a different |
| // dataset (typically used for validation). |
| virtual float Loss( |
| const Weights &weights, const InstanceSet &validation_instances, |
| const LabelSet &validation_labels) const; |
| |
| // Computes the gradient wrt the given parameters 'weights'. 'gradient' is |
| // owned by the caller and should be initialized to zero. |
| // Multithreading is used if 'num_threads' > 1. The training examples are |
| // divided into 'num_batches' batches; each thread computes the gradient of a |
| // batch, adds it to 'gradient', and takes the next batch. These updates are |
| // asynchronous, and behaviour is non-deterministic. |
| virtual void Gradient(const Weights &weights, Weights *gradient) const; |
| |
| // Adds the gradient wrt 'weight_scale * weights' for 'example' to the vector |
| // 'gradient' in place. The gradient is scaled by 'example_scale'. |
| virtual void AddExampleGradient( |
| const Weights &weights, int example, float weights_scale, |
| float example_scale, Weights *gradient) const { |
| loss_function_->AddExampleGradient( |
| weights, instances_, labels_, example, weights_scale, example_scale, |
| gradient); |
| } |
| |
| // Returns the gradient wrt 'weights' as a vector<pair<int, float>> rather |
| // than Eigen::SparseVector<float>, since Eigen is very inefficient with |
| // sparse vectors. This is only necessary if running SGDAdaGrad. |
| virtual void ExampleGradient( |
| const Weights &weights, int example, float weights_scale, |
| float example_scale, |
| std::vector<std::pair<int, float>> *example_gradient) const { |
| loss_function_->ExampleGradient( |
| weights, instances_, labels_, example, weights_scale, example_scale, |
| example_gradient); |
| } |
| |
| // Sets the number of threads and batch size for multithreaded computation. |
| // Assigns examples to 'batch_examples_'. |
| void set_num_threads(int num_threads, int batch_size); |
| |
| // Convenience method that sets the number of batches to a default. |
| void set_num_threads(int num_threads) { |
| int batch_size = static_cast<int>(NumExamples() / num_threads / 2); |
| set_num_threads(num_threads, batch_size); |
| } |
| |
| // Returns the number of examples in the dataset. |
| virtual int NumExamples() const { return instances_.rows(); } |
| |
| // Returns the number of features. |
| virtual int NumFeatures() const { return instances_.cols(); } |
| |
| // Returns the number of weights for the given number of features. |
| virtual int NumWeights() const { |
| return loss_function_->NumWeights(NumFeatures()); |
| } |
| |
| // Returns the number of batches for multithreaded computation. |
| int num_batches() const { return num_batches_; } |
| |
| // Returns the number of threads. |
| int num_threads() const { return num_threads_; } |
| |
| // Returns an upper bound on the curvature of the loss function. Used to set |
| // the learning rate of some LossMinimizer algorithms. |
| virtual float LossCurvature() const { |
| return loss_function_->LossCurvature(instances_); |
| } |
| |
| // Returns the per-coordinate curvature of the data. Used to set the learning |
| // rates of ParallelBoostingWithMomentum. |
| virtual void PerCoordinateCurvature( |
| VectorXf *per_coordinate_curvature) const { |
| loss_function_->PerCoordinateCurvature(instances_, |
| per_coordinate_curvature); |
| } |
| |
| // Returns sparsity, defined as the maximum instance l0 norm. Used to help |
| // set learning rates in ParallelBoostingWithMomentum. |
| float Sparsity() const { |
| typename Instance::Index sparsity = 0; |
| for (int i = 0; i < instances_.rows(); ++i) { |
| sparsity = std::max(sparsity, instances_.innerVector(i).nonZeros()); |
| } |
| return static_cast<float>(sparsity); |
| } |
| |
| // Returns the loss function. |
| const LossFunction *loss_function() const { return loss_function_; } |
| |
| // Returns the instances. |
| const InstanceSet &instances() const { return instances_; } |
| |
| // Returns the labels. |
| const LabelSet &labels() const { return labels_; } |
| |
| // Returns a list of examples in batch 'batch'. |
| const std::vector<int> &batch_examples(int batch) const { |
| return batch_examples_[batch]; |
| } |
| |
| protected: |
| /* |
| // Computes the loss for a single example 'example' and returns it in |
| // 'loss_values[example_index]'. |
| virtual void ThreadLoss( |
| const Weights *weights, int example, VectorXf *loss_values, |
| BlockingCounter *num_jobs_remaining) const; |
| |
| // Computes the gradient for examples in batch 'batch', adds it to 'gradient'. |
| virtual void ThreadGradient( |
| const Weights *weights, int batch, Weights *gradient, |
| BlockingCounter *num_jobs_remaining) const; |
| */ |
| |
| private: |
| // Training instances. |
| const InstanceSet &instances_; |
| |
| // Instance labels. |
| const LabelSet &labels_; |
| |
| // Function for computing the loss and gradient of a single training example. |
| // Not owned. |
| const LossFunction *loss_function_; |
| |
| // Mutex for multithreaded gradient computation. |
| mutable std::mutex gradient_update_mutex_; |
| |
| // Number of threads used to parallelize gradient computation. |
| // If num_threads_ = 1, multithreading is not used. |
| int num_threads_ = 1; |
| |
| // List of examples assigned to each thread. |
| std::vector<std::vector<int>> batch_examples_; |
| |
| // Number of batches. |
| int num_batches_; |
| }; |
| |
| } // namespace lossmin |
| |