blob: d756759815f52ae6e1c4f65ddbb488d0aa234149 [file] [log] [blame]
// Copyright 2014 Google Inc. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Author: nevena@google.com (Nevena Lazic)
//
// GradientEvaluator is a class for computing the value and gradient of a loss
// LossFunction on a labeled dataset {(instance_i, label_i)}, given parameters
// 'weights'. Its methods are called by gradient descent algorithms implementing
// the LossMinimizer interface.
#pragma once
#include <algorithm>
#include <mutex>
#include <string>
#include <vector>
#include "lossmin/eigen-types.h"
#include "lossmin/losses/loss-function.h"
class BlockingCounter;
namespace lossmin {
class GradientEvaluator {
public:
// Constructor sets up the dataset and the loss function.
GradientEvaluator(const InstanceSet &instances, const LabelSet &labels,
const LossFunction *loss_function)
: instances_(instances), labels_(labels),
loss_function_(loss_function) {}
virtual ~GradientEvaluator() {}
// Returns the loss for given parameters 'weights'. Multi-threading is used
// if num_threads_ > 1.
virtual float Loss(const Weights &weights) const;
// Returns the loss for given parameters 'weights' and a subset of examples
// 'example_indices'.
virtual float Loss(const Weights &weights,
const std::vector<int> &example_indices) const;
// Returns the loss for given parameters 'weights' and a different
// dataset (typically used for validation).
virtual float Loss(
const Weights &weights, const InstanceSet &validation_instances,
const LabelSet &validation_labels) const;
// Computes the gradient wrt the given parameters 'weights'. 'gradient' is
// owned by the caller and should be initialized to zero.
// Multithreading is used if 'num_threads' > 1. The training examples are
// divided into 'num_batches' batches; each thread computes the gradient of a
// batch, adds it to 'gradient', and takes the next batch. These updates are
// asynchronous, and behaviour is non-deterministic.
virtual void Gradient(const Weights &weights, Weights *gradient) const;
// Adds the gradient wrt 'weight_scale * weights' for 'example' to the vector
// 'gradient' in place. The gradient is scaled by 'example_scale'.
virtual void AddExampleGradient(
const Weights &weights, int example, float weights_scale,
float example_scale, Weights *gradient) const {
loss_function_->AddExampleGradient(
weights, instances_, labels_, example, weights_scale, example_scale,
gradient);
}
// Returns the gradient wrt 'weights' as a vector<pair<int, float>> rather
// than Eigen::SparseVector<float>, since Eigen is very inefficient with
// sparse vectors. This is only necessary if running SGDAdaGrad.
virtual void ExampleGradient(
const Weights &weights, int example, float weights_scale,
float example_scale,
std::vector<std::pair<int, float>> *example_gradient) const {
loss_function_->ExampleGradient(
weights, instances_, labels_, example, weights_scale, example_scale,
example_gradient);
}
// Returns the number of examples in the dataset.
virtual int NumExamples() const { return instances_.rows(); }
// Returns the number of features.
virtual int NumFeatures() const { return instances_.cols(); }
// Returns the number of weights for the given number of features.
virtual int NumWeights() const {
return loss_function_->NumWeights(NumFeatures());
}
// Returns an upper bound on the curvature of the loss function. Used to set
// the learning rate of some LossMinimizer algorithms.
virtual float LossCurvature() const {
return loss_function_->LossCurvature(instances_);
}
// Returns the per-coordinate curvature of the data. Used to set the learning
// rates of ParallelBoostingWithMomentum.
virtual void PerCoordinateCurvature(
VectorXf *per_coordinate_curvature) const {
loss_function_->PerCoordinateCurvature(instances_,
per_coordinate_curvature);
}
// Returns sparsity, defined as the maximum instance l0 norm. Used to help
// set learning rates in ParallelBoostingWithMomentum.
float Sparsity() const {
typename Instance::Index sparsity = 0;
for (int i = 0; i < instances_.rows(); ++i) {
sparsity = std::max(sparsity, instances_.innerVector(i).nonZeros());
}
return static_cast<float>(sparsity);
}
// Returns the loss function.
const LossFunction *loss_function() const { return loss_function_; }
// Returns the instances.
const InstanceSet &instances() const { return instances_; }
// Returns the labels.
const LabelSet &labels() const { return labels_; }
private:
// Training instances.
const InstanceSet &instances_;
// Instance labels.
const LabelSet &labels_;
// Function for computing the loss and gradient of a single training example.
// Not owned.
const LossFunction *loss_function_;
};
} // namespace lossmin