blob: e63b842c7244481770ba43e8784bbb29b5960040 [file] [log] [blame]
// Copyright 2014 Google Inc. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Author: nevena@google.com (Nevena Lazic)
//
// GradientEvaluator is a class for computing the value and gradient of a loss
// LossFunction on a labeled dataset {(instance_i, label_i)}, given parameters
// 'weights'. Its methods are called by gradient descent algorithms implementing
// the LossMinimizer interface.
#pragma once
#include <algorithm>
#include <mutex>
#include <string>
#include <vector>
#include "lossmin/eigen-types.h"
#include "lossmin/losses/loss-function.h"
class BlockingCounter;
namespace lossmin {
class GradientEvaluator {
public:
// Constructor sets up the dataset and the loss function.
GradientEvaluator(const InstanceSet &instances, const LabelSet &labels,
const LossFunction *loss_function)
: instances_(instances), labels_(labels),
loss_function_(loss_function) {}
virtual ~GradientEvaluator() {}
// Returns the loss for given parameters 'weights'. Multi-threading is used
// if num_threads_ > 1.
virtual float Loss(const Weights &weights) const;
// Returns the loss for given parameters 'weights' and a subset of examples
// 'example_indices'.
virtual float Loss(const Weights &weights,
const std::vector<int> &example_indices) const;
// Returns the loss for given parameters 'weights' and a different
// dataset (typically used for validation).
virtual float Loss(
const Weights &weights, const InstanceSet &validation_instances,
const LabelSet &validation_labels) const;
// Computes the gradient wrt the given parameters 'weights'. 'gradient' is
// owned by the caller and should be initialized to zero.
// Multithreading is used if 'num_threads' > 1. The training examples are
// divided into 'num_batches' batches; each thread computes the gradient of a
// batch, adds it to 'gradient', and takes the next batch. These updates are
// asynchronous, and behaviour is non-deterministic.
virtual void Gradient(const Weights &weights, Weights *gradient) const;
// Adds the gradient wrt 'weight_scale * weights' for 'example' to the vector
// 'gradient' in place. The gradient is scaled by 'example_scale'.
virtual void AddExampleGradient(
const Weights &weights, int example, float weights_scale,
float example_scale, Weights *gradient) const {
loss_function_->AddExampleGradient(
weights, instances_, labels_, example, weights_scale, example_scale,
gradient);
}
// Returns the gradient wrt 'weights' as a vector<pair<int, float>> rather
// than Eigen::SparseVector<float>, since Eigen is very inefficient with
// sparse vectors. This is only necessary if running SGDAdaGrad.
virtual void ExampleGradient(
const Weights &weights, int example, float weights_scale,
float example_scale,
std::vector<std::pair<int, float>> *example_gradient) const {
loss_function_->ExampleGradient(
weights, instances_, labels_, example, weights_scale, example_scale,
example_gradient);
}
// Sets the number of threads and batch size for multithreaded computation.
// Assigns examples to 'batch_examples_'.
void set_num_threads(int num_threads, int batch_size);
// Convenience method that sets the number of batches to a default.
void set_num_threads(int num_threads) {
int batch_size = static_cast<int>(NumExamples() / num_threads / 2);
set_num_threads(num_threads, batch_size);
}
// Returns the number of examples in the dataset.
virtual int NumExamples() const { return instances_.rows(); }
// Returns the number of features.
virtual int NumFeatures() const { return instances_.cols(); }
// Returns the number of weights for the given number of features.
virtual int NumWeights() const {
return loss_function_->NumWeights(NumFeatures());
}
// Returns the number of batches for multithreaded computation.
int num_batches() const { return num_batches_; }
// Returns the number of threads.
int num_threads() const { return num_threads_; }
// Returns an upper bound on the curvature of the loss function. Used to set
// the learning rate of some LossMinimizer algorithms.
virtual float LossCurvature() const {
return loss_function_->LossCurvature(instances_);
}
// Returns the per-coordinate curvature of the data. Used to set the learning
// rates of ParallelBoostingWithMomentum.
virtual void PerCoordinateCurvature(
VectorXf *per_coordinate_curvature) const {
loss_function_->PerCoordinateCurvature(instances_,
per_coordinate_curvature);
}
// Returns sparsity, defined as the maximum instance l0 norm. Used to help
// set learning rates in ParallelBoostingWithMomentum.
float Sparsity() const {
typename Instance::Index sparsity = 0;
for (int i = 0; i < instances_.rows(); ++i) {
sparsity = std::max(sparsity, instances_.innerVector(i).nonZeros());
}
return static_cast<float>(sparsity);
}
// Returns the loss function.
const LossFunction *loss_function() const { return loss_function_; }
// Returns the instances.
const InstanceSet &instances() const { return instances_; }
// Returns the labels.
const LabelSet &labels() const { return labels_; }
// Returns a list of examples in batch 'batch'.
const std::vector<int> &batch_examples(int batch) const {
return batch_examples_[batch];
}
protected:
/*
// Computes the loss for a single example 'example' and returns it in
// 'loss_values[example_index]'.
virtual void ThreadLoss(
const Weights *weights, int example, VectorXf *loss_values,
BlockingCounter *num_jobs_remaining) const;
// Computes the gradient for examples in batch 'batch', adds it to 'gradient'.
virtual void ThreadGradient(
const Weights *weights, int batch, Weights *gradient,
BlockingCounter *num_jobs_remaining) const;
*/
private:
// Training instances.
const InstanceSet &instances_;
// Instance labels.
const LabelSet &labels_;
// Function for computing the loss and gradient of a single training example.
// Not owned.
const LossFunction *loss_function_;
// Mutex for multithreaded gradient computation.
mutable std::mutex gradient_update_mutex_;
// Number of threads used to parallelize gradient computation.
// If num_threads_ = 1, multithreading is not used.
int num_threads_ = 1;
// List of examples assigned to each thread.
std::vector<std::vector<int>> batch_examples_;
// Number of batches.
int num_batches_;
};
} // namespace lossmin