lossmin/minimizers/gradient-evaluator.h - lossmin - Git at Google

 // Copyright 2014 Google Inc. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 // Author: nevena@google.com (Nevena Lazic)
 //
 // GradientEvaluator is a class for computing the value and gradient of a loss
 // LossFunction on a labeled dataset {(instance_i, label_i)}, given parameters
 // 'weights'. Its methods are called by gradient descent algorithms implementing
 // the LossMinimizer interface.

 #pragma once

 #include <algorithm>
 #include <mutex>
 #include <string>
 #include <vector>

 #include "lossmin/eigen-types.h"
 #include "lossmin/losses/loss-function.h"

 class BlockingCounter;

 namespace lossmin {

 class GradientEvaluator {
  public:
   // Constructor sets up the dataset and the loss function.
   GradientEvaluator(const InstanceSet &instances, const LabelSet &labels,
                     const LossFunction *loss_function)
       : instances_(instances),
         instances_transposed_(instances.transpose()),
         labels_(labels),
         loss_function_(loss_function) {}

   virtual ~GradientEvaluator() {}

   // Returns the loss for given parameters 'weights'. Multi-threading is used
   // if num_threads_ > 1.
   virtual double Loss(const Weights &weights) const;

   // Returns the loss for given parameters 'weights' and a subset of examples
   // 'example_indices'.
   virtual double Loss(const Weights &weights,
                       const std::vector<int> &example_indices) const;

   // Returns the loss for given parameters 'weights' and a different
   // dataset (typically used for validation).
   virtual double Loss(const Weights &weights,
                       const InstanceSet &validation_instances,
                       const LabelSet &validation_labels) const;

   // Computes the gradient wrt the given parameters 'weights'. 'gradient' is
   // owned by the caller and should be initialized to zero.
   // Multithreading is used if 'num_threads' > 1. The training examples are
   // divided into 'num_batches' batches; each thread computes the gradient of a
   // batch, adds it to 'gradient', and takes the next batch. These updates are
   // asynchronous, and behaviour is non-deterministic.
   virtual void Gradient(const Weights &weights, Weights *gradient) const;

   // Adds the gradient wrt 'weight_scale * weights' for 'example' to the vector
   // 'gradient' in place. The gradient is scaled by 'example_scale'.
   virtual void AddExampleGradient(const Weights &weights, int example,
                                   double weights_scale, double example_scale,
                                   Weights *gradient) const {
     loss_function_->AddExampleGradient(weights, instances_, labels_, example,
                                        weights_scale, example_scale, gradient);
   }

   // Returns the gradient wrt 'weights' as a vector<pair<int, double>> rather
   // than Eigen::SparseVector<double>, since Eigen is very inefficient with
   // sparse vectors. This is only necessary if running SGDAdaGrad.
   virtual void ExampleGradient(
       const Weights &weights, int example, double weights_scale,
       double example_scale,
       std::vector<std::pair<int, double>> *example_gradient) const {
     loss_function_->ExampleGradient(weights, instances_, labels_, example,
                                     weights_scale, example_scale,
                                     example_gradient);
   }

   // Returns the number of examples in the dataset.
   virtual int NumExamples() const { return instances_.rows(); }

   // Returns the number of features.
   virtual int NumFeatures() const { return instances_.cols(); }

   // Returns the number of weights for the given number of features.
   virtual int NumWeights() const {
     return loss_function_->NumWeights(NumFeatures());
   }

   // Returns an upper bound on the curvature of the loss function. Used to set
   // the learning rate of some LossMinimizer algorithms.
   virtual double LossCurvature() const {
     return loss_function_->LossCurvature(instances_);
   }

   // Returns the per-coordinate curvature of the data. Used to set the learning
   // rates of ParallelBoostingWithMomentum.
   virtual void PerCoordinateCurvature(
       VectorXd *per_coordinate_curvature) const {
     loss_function_->PerCoordinateCurvature(instances_,
                                            per_coordinate_curvature);
   }

   // Returns sparsity, defined as the maximum instance l0 norm. Used to help
   // set learning rates in ParallelBoostingWithMomentum.
   double Sparsity() const {
     typename Instance::Index sparsity = 0;
     for (int i = 0; i < instances_.rows(); ++i) {
       sparsity = std::max(sparsity, instances_.innerVector(i).nonZeros());
     }
     return static_cast<double>(sparsity);
   }

   // Returns the loss function.
   const LossFunction *loss_function() const { return loss_function_; }

   // Returns the instances.
   const InstanceSet &instances() const { return instances_; }

   // Returns the transpose pf instances.
   const InstanceSet &instances_transposed() const {
     return instances_transposed_;
   }

   // Returns the labels.
   const LabelSet &labels() const { return labels_; }

  private:
   // Training instances.
   const InstanceSet &instances_;

   // The transpose of instances. This is needed for fast gradient computations
   // and should be computed once so it is computed at construction (not each
   // time gradient is computed)
   const InstanceSet instances_transposed_;

   // Instance labels.
   const LabelSet &labels_;

   // Function for computing the loss and gradient of a single training example.
   // Not owned.
   const LossFunction *loss_function_;
 };

 }  // namespace lossmin
	// Copyright 2014 Google Inc. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.
	// Author: nevena@google.com (Nevena Lazic)
	//
	// GradientEvaluator is a class for computing the value and gradient of a loss
	// LossFunction on a labeled dataset {(instance_i, label_i)}, given parameters
	// 'weights'. Its methods are called by gradient descent algorithms implementing
	// the LossMinimizer interface.

	#pragma once

	#include <algorithm>
	#include <mutex>
	#include <string>
	#include <vector>

	#include "lossmin/eigen-types.h"
	#include "lossmin/losses/loss-function.h"

	class BlockingCounter;

	namespace lossmin {

	class GradientEvaluator {
	public:
	// Constructor sets up the dataset and the loss function.
	GradientEvaluator(const InstanceSet &instances, const LabelSet &labels,
	const LossFunction *loss_function)
	: instances_(instances),
	instances_transposed_(instances.transpose()),
	labels_(labels),
	loss_function_(loss_function) {}

	virtual ~GradientEvaluator() {}

	// Returns the loss for given parameters 'weights'. Multi-threading is used
	// if num_threads_ > 1.
	virtual double Loss(const Weights &weights) const;

	// Returns the loss for given parameters 'weights' and a subset of examples
	// 'example_indices'.
	virtual double Loss(const Weights &weights,
	const std::vector<int> &example_indices) const;

	// Returns the loss for given parameters 'weights' and a different
	// dataset (typically used for validation).
	virtual double Loss(const Weights &weights,
	const InstanceSet &validation_instances,
	const LabelSet &validation_labels) const;

	// Computes the gradient wrt the given parameters 'weights'. 'gradient' is
	// owned by the caller and should be initialized to zero.
	// Multithreading is used if 'num_threads' > 1. The training examples are
	// divided into 'num_batches' batches; each thread computes the gradient of a
	// batch, adds it to 'gradient', and takes the next batch. These updates are
	// asynchronous, and behaviour is non-deterministic.
	virtual void Gradient(const Weights &weights, Weights *gradient) const;

	// Adds the gradient wrt 'weight_scale * weights' for 'example' to the vector
	// 'gradient' in place. The gradient is scaled by 'example_scale'.
	virtual void AddExampleGradient(const Weights &weights, int example,
	double weights_scale, double example_scale,
	Weights *gradient) const {
	loss_function_->AddExampleGradient(weights, instances_, labels_, example,
	weights_scale, example_scale, gradient);
	}

	// Returns the gradient wrt 'weights' as a vector<pair<int, double>> rather
	// than Eigen::SparseVector<double>, since Eigen is very inefficient with
	// sparse vectors. This is only necessary if running SGDAdaGrad.
	virtual void ExampleGradient(
	const Weights &weights, int example, double weights_scale,
	double example_scale,
	std::vector<std::pair<int, double>> *example_gradient) const {
	loss_function_->ExampleGradient(weights, instances_, labels_, example,
	weights_scale, example_scale,
	example_gradient);
	}

	// Returns the number of examples in the dataset.
	virtual int NumExamples() const { return instances_.rows(); }

	// Returns the number of features.
	virtual int NumFeatures() const { return instances_.cols(); }

	// Returns the number of weights for the given number of features.
	virtual int NumWeights() const {
	return loss_function_->NumWeights(NumFeatures());
	}

	// Returns an upper bound on the curvature of the loss function. Used to set
	// the learning rate of some LossMinimizer algorithms.
	virtual double LossCurvature() const {
	return loss_function_->LossCurvature(instances_);
	}

	// Returns the per-coordinate curvature of the data. Used to set the learning
	// rates of ParallelBoostingWithMomentum.
	virtual void PerCoordinateCurvature(
	VectorXd *per_coordinate_curvature) const {
	loss_function_->PerCoordinateCurvature(instances_,
	per_coordinate_curvature);
	}

	// Returns sparsity, defined as the maximum instance l0 norm. Used to help
	// set learning rates in ParallelBoostingWithMomentum.
	double Sparsity() const {
	typename Instance::Index sparsity = 0;
	for (int i = 0; i < instances_.rows(); ++i) {
	sparsity = std::max(sparsity, instances_.innerVector(i).nonZeros());
	}
	return static_cast<double>(sparsity);
	}

	// Returns the loss function.
	const LossFunction *loss_function() const { return loss_function_; }

	// Returns the instances.
	const InstanceSet &instances() const { return instances_; }

	// Returns the transpose pf instances.
	const InstanceSet &instances_transposed() const {
	return instances_transposed_;
	}

	// Returns the labels.
	const LabelSet &labels() const { return labels_; }

	private:
	// Training instances.
	const InstanceSet &instances_;

	// The transpose of instances. This is needed for fast gradient computations
	// and should be computed once so it is computed at construction (not each
	// time gradient is computed)
	const InstanceSet instances_transposed_;

	// Instance labels.
	const LabelSet &labels_;

	// Function for computing the loss and gradient of a single training example.
	// Not owned.
	const LossFunction *loss_function_;
	};

	} // namespace lossmin