util/lossmin/minimizers/gradient-evaluator.h - cobalt - Git at Google

 // Copyright 2014 Google Inc. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 // Author: nevena@google.com (Nevena Lazic)
 //
 // GradientEvaluator is a class for computing the value and gradient of a
 // loss function on a labeled dataset {(instance_i, label_i)}, given parameters
 // 'weights'. Its methods are called by gradient descent algorithms implementing
 // the LossMinimizer interface.
 //
 // By default the loss function (whose value and gradient are computed), is the
 // linear regression objective function:
 //
 // f(x) == (0.5 / N) * || A * x - b ||_2^2,
 //
 // where x == 'weights', A = instances_, b == labels_, N = instances_.rows().

 #ifndef COBALT_UTIL_LOSSMIN_MINIMIZERS_GRADIENT_EVALUATOR_H_
 #define COBALT_UTIL_LOSSMIN_MINIMIZERS_GRADIENT_EVALUATOR_H_

 #include <algorithm>
 #include <mutex>
 #include <string>
 #include <vector>

 #include "util/lossmin/eigen-types.h"

 namespace cobalt_lossmin {

 class GradientEvaluator {
  public:
   // Constructor sets up the dataset.
   GradientEvaluator(const InstanceSet &instances, const LabelSet &labels)
       : instances_(instances),
         instances_transposed_(instances.transpose()),
         labels_(labels) {}

   virtual ~GradientEvaluator() {}

   // Returns the residual between the predicted labels at 'weights', and
   // labels_. The default implementation returns A * x - b, where A ==
   // instances_, x = 'weights', b == labels_. The implementation exploits
   // sparsity using iterators over rows of instances_ to implement matrix-vector
   // multiplication straight from definition.
   // Note:  It is not as efficient as calling
   // Eigen's matrix-vector multiply directly but can potentially be easily
   // parallelized (when parallel implementation is desired but open mp is not
   // available).
   virtual Weights Residual(const Weights &weights) const;

   // Returns the loss for given parameters 'weights'.
   // The default implementation computes the normalized norm of vector returned
   // by Residual(weights): 0.5 / N * || A * x - y ||_2^2, where A == instances_
   // y == labels_, x == 'weights', N = instances_.rows().
   virtual double Loss(const Weights &weights) const;

   // This is the same as Loss except (by default) instead of calling Residual,
   // it uses Eigen's efficient matrix-vector multiplication directly.
   // Note: It can be parallelized using open mp, if it is supported.
   virtual double SparseLoss(const Weights &weights) const;

   // Computes the gradient wrt the given parameters 'weights'. 'gradient' is
   // owned by the caller and should be initialized to zero.
   // The default implementation computes (1 / N) * A^T (A * x - b) where A ==
   // instances_ y == labels_, x == 'weights', N = instances_.rows(). The default
   // implementation exploits sparsity using iterators over rows of
   // instances_transposed_ to implement matrix-vector multiplication straight
   // from definition, in the same way as the default implementation of
   // Residual().
   // Note:  It is not as efficient as calling Eigen's matrix-vector
   // multiply directly but can potentially be easily parallelized (when parallel
   // implementation is desired but open mp is not supported).
   virtual void Gradient(const Weights &weights, Weights *gradient) const;

   // This is the same as Gradient except it uses (by default)
   // Eigen's efficient matrix-vector multiplication directly.
   // Note: It can be parallelized using open mp, if it is supported.
   virtual void SparseGradient(const Weights &weights, Weights *gradient) const;

   // Returns the number of examples in the dataset.
   int NumExamples() const { return instances_.rows(); }

   // Returns the number of features.
   int NumFeatures() const { return instances_.cols(); }

   // Alias for the number of features (these are often used interchangeably).
   int NumWeights() const { return NumFeatures(); }

   // Returns the per-coordinate curvature of the data. Used to set the learning
   // rates of ParallelBoostingWithMomentum.
   void PerCoordinateCurvature(VectorXd *per_coordinate_curvature) const;

   // Returns sparsity, defined as the maximum instance l0 norm. Used to help
   // set learning rates in ParallelBoostingWithMomentum.
   // TODO(bazyli): exploit sparsity in the implementation.
   double Sparsity() const;

   // Returns the instances.
   const InstanceSet &instances() const { return instances_; }

   // Returns the transpose pf instances.
   const InstanceSet &instances_transposed() const {
     return instances_transposed_;
   }

   // Returns the labels.
   const LabelSet &labels() const { return labels_; }

  private:
   // Training instances.
   const InstanceSet &instances_;

   // The transpose of instances. This is needed for fast gradient computations
   // and should be computed once so it is initialized at construction (not each
   // time gradient is computed).
   const InstanceSet instances_transposed_;

   // Instance labels.
   const LabelSet &labels_;
 };

 }  // namespace cobalt_lossmin

 #endif  // COBALT_UTIL_LOSSMIN_MINIMIZERS_GRADIENT_EVALUATOR_H_
	// Copyright 2014 Google Inc. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.
	// Author: nevena@google.com (Nevena Lazic)
	//
	// GradientEvaluator is a class for computing the value and gradient of a
	// loss function on a labeled dataset {(instance_i, label_i)}, given parameters
	// 'weights'. Its methods are called by gradient descent algorithms implementing
	// the LossMinimizer interface.
	//
	// By default the loss function (whose value and gradient are computed), is the
	// linear regression objective function:
	//
	// f(x) == (0.5 / N) * \|\| A * x - b \|\|_2^2,
	//
	// where x == 'weights', A = instances_, b == labels_, N = instances_.rows().

	#ifndef COBALT_UTIL_LOSSMIN_MINIMIZERS_GRADIENT_EVALUATOR_H_
	#define COBALT_UTIL_LOSSMIN_MINIMIZERS_GRADIENT_EVALUATOR_H_

	#include <algorithm>
	#include <mutex>
	#include <string>
	#include <vector>

	#include "util/lossmin/eigen-types.h"

	namespace cobalt_lossmin {

	class GradientEvaluator {
	public:
	// Constructor sets up the dataset.
	GradientEvaluator(const InstanceSet &instances, const LabelSet &labels)
	: instances_(instances),
	instances_transposed_(instances.transpose()),
	labels_(labels) {}

	virtual ~GradientEvaluator() {}

	// Returns the residual between the predicted labels at 'weights', and
	// labels_. The default implementation returns A * x - b, where A ==
	// instances_, x = 'weights', b == labels_. The implementation exploits
	// sparsity using iterators over rows of instances_ to implement matrix-vector
	// multiplication straight from definition.
	// Note: It is not as efficient as calling
	// Eigen's matrix-vector multiply directly but can potentially be easily
	// parallelized (when parallel implementation is desired but open mp is not
	// available).
	virtual Weights Residual(const Weights &weights) const;

	// Returns the loss for given parameters 'weights'.
	// The default implementation computes the normalized norm of vector returned
	// by Residual(weights): 0.5 / N * \|\| A * x - y \|\|_2^2, where A == instances_
	// y == labels_, x == 'weights', N = instances_.rows().
	virtual double Loss(const Weights &weights) const;

	// This is the same as Loss except (by default) instead of calling Residual,
	// it uses Eigen's efficient matrix-vector multiplication directly.
	// Note: It can be parallelized using open mp, if it is supported.
	virtual double SparseLoss(const Weights &weights) const;

	// Computes the gradient wrt the given parameters 'weights'. 'gradient' is
	// owned by the caller and should be initialized to zero.
	// The default implementation computes (1 / N) * A^T (A * x - b) where A ==
	// instances_ y == labels_, x == 'weights', N = instances_.rows(). The default
	// implementation exploits sparsity using iterators over rows of
	// instances_transposed_ to implement matrix-vector multiplication straight
	// from definition, in the same way as the default implementation of
	// Residual().
	// Note: It is not as efficient as calling Eigen's matrix-vector
	// multiply directly but can potentially be easily parallelized (when parallel
	// implementation is desired but open mp is not supported).
	virtual void Gradient(const Weights &weights, Weights *gradient) const;

	// This is the same as Gradient except it uses (by default)
	// Eigen's efficient matrix-vector multiplication directly.
	// Note: It can be parallelized using open mp, if it is supported.
	virtual void SparseGradient(const Weights &weights, Weights *gradient) const;

	// Returns the number of examples in the dataset.
	int NumExamples() const { return instances_.rows(); }

	// Returns the number of features.
	int NumFeatures() const { return instances_.cols(); }

	// Alias for the number of features (these are often used interchangeably).
	int NumWeights() const { return NumFeatures(); }

	// Returns the per-coordinate curvature of the data. Used to set the learning
	// rates of ParallelBoostingWithMomentum.
	void PerCoordinateCurvature(VectorXd *per_coordinate_curvature) const;

	// Returns sparsity, defined as the maximum instance l0 norm. Used to help
	// set learning rates in ParallelBoostingWithMomentum.
	// TODO(bazyli): exploit sparsity in the implementation.
	double Sparsity() const;

	// Returns the instances.
	const InstanceSet &instances() const { return instances_; }

	// Returns the transpose pf instances.
	const InstanceSet &instances_transposed() const {
	return instances_transposed_;
	}

	// Returns the labels.
	const LabelSet &labels() const { return labels_; }

	private:
	// Training instances.
	const InstanceSet &instances_;

	// The transpose of instances. This is needed for fast gradient computations
	// and should be computed once so it is initialized at construction (not each
	// time gradient is computed).
	const InstanceSet instances_transposed_;

	// Instance labels.
	const LabelSet &labels_;
	};

	} // namespace cobalt_lossmin

	#endif // COBALT_UTIL_LOSSMIN_MINIMIZERS_GRADIENT_EVALUATOR_H_