lossmin/minimizers/parallel-boosting-with-momentum.cc - lossmin - Git at Google

 // Copyright 2014 Google Inc. All Rights Reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 // Author: nevena@google.com (Nevena Lazic)
 // TODO(bazyli) update license header

 #include "lossmin/minimizers/parallel-boosting-with-momentum.h"

 #include <math.h>
 #include <functional>

 #include "lossmin/minimizers/gradient-evaluator.h"
 #include "third_party/eigen/Eigen/Core"

 namespace lossmin {

 void ParallelBoostingWithMomentum::Setup() {
   compute_and_set_learning_rates();
   alpha_ = 0.5;
   beta_ = 1.0 - alpha_;
   phi_center_ = Weights::Zero(gradient_evaluator().NumWeights());
 }

 void ParallelBoostingWithMomentum::compute_and_set_learning_rates() {
   // Per-coordinate learning rates are learning_rates[j] = 1 / sparsity * Lj,
   // where sparsity is the maximum instance l0 norm and Lj is upper bound on
   // the loss curvature along coordinate j.
   double sparsity = gradient_evaluator().Sparsity();
   gradient_evaluator().PerCoordinateCurvature(&learning_rates_);
   learning_rates_ =
       (learning_rates_.array() + l2()).inverse().matrix() / sparsity;
 }

 void ParallelBoostingWithMomentum::SparseInnerProductGradient(
     const Weights &weights, Weights *gradient) const {
   // Eigen library recommends computations step-by-step for best perfomance
   // TODO(bazyli) parallel matrix vector multiply with open mp
   Weights residual = gradient_evaluator().instances() * weights;
   residual -= gradient_evaluator().labels();
   *gradient = gradient_evaluator().instances_transposed() * residual;
   *gradient /= gradient_evaluator().NumExamples();
 }

 double ParallelBoostingWithMomentum::Loss(const Weights &weights) const {
   // Eigen library recommends computations step-by-step for best perfomance
   // TODO(bazyli) parallel matrix vector multiply with open mp
   Weights residual = gradient_evaluator().instances() * weights;
   residual -= gradient_evaluator().labels();
   double loss =
       0.5 * residual.squaredNorm() / gradient_evaluator().NumExamples();
   if (l2() > 0.0) loss += 0.5 * l2() * weights.squaredNorm();
   if (l1() > 0.0) loss += l1() * weights.cwiseAbs().sum();
   return loss;
 }

 void ParallelBoostingWithMomentum::ConvergenceCheck(const Weights &weights,
                                                     const Weights &gradient) {
   double error_squared = 0.0;
   for (int i = 0; i < gradient.size(); i++) {
     // for weights > 0 the gradient should be == -l1
     if (weights(i) > zero_threshold()) {
       error_squared += (gradient(i) + l1()) * (gradient(i) + l1());
     }
     // for weights < 0 the gradient should be == l1
     else if (weights(i) < -zero_threshold()) {
       error_squared += (gradient(i) - l1()) * (gradient(i) - l1());
     }
     // for weights == 0 the gradient should be between -l1 and l1
     else {
       double err = std::max(std::abs(gradient(i)) - l1(), 0.0);
       error_squared += err * err;
     }
   }

   if (std::sqrt(error_squared) / weights.size() < convergence_threshold()) {
     set_reached_solution(true);
     set_converged(true);
   }
 }

 void ParallelBoostingWithMomentum::EpochUpdate(Weights *weights, int epoch,
                                                bool check_convergence) {
   // Compute the intermediate weight vector y.
   Weights y = (1.0 - alpha_) * *weights + alpha_ * phi_center_;

   // Compute the gradient of the loss (except l1 penalty) wrt y.
   Weights gradient_wrt_y = Weights::Zero(y.size());
   SparseInnerProductGradient(y, &gradient_wrt_y);
   if (l2() > 0.0) gradient_wrt_y += l2() * y;

   // Gradient step.
   *weights -= gradient_wrt_y.cwiseProduct(learning_rates_);

   // l1 shrinkage.
   if (l1() > 0.0) {
     L1Prox(l1() * learning_rates_, weights);
   }

   // Update the approximation function.
   phi_center_ -= (1.0 - alpha_) / alpha_ * (y - *weights);
   alpha_ =
       -beta_ / 2.0 + pow(beta_ + beta_ * beta_ / 4.0, static_cast<double>(0.5));
   beta_ *= (1.0 - alpha_);

   // Compute the gradient of the objective except the l1 part and check
   // convergence
   if (check_convergence) {
     Weights gradient_wrt_weights = Weights::Zero(weights->size());
     SparseInnerProductGradient(*weights, &gradient_wrt_weights);
     if (l2() > 0.0f) {
       gradient_wrt_weights += l2() * *weights;
     }
     ConvergenceCheck(*weights, gradient_wrt_weights);
   }
 }

 }  // namespace lossmin
	// Copyright 2014 Google Inc. All Rights Reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.
	// Author: nevena@google.com (Nevena Lazic)
	// TODO(bazyli) update license header

	#include "lossmin/minimizers/parallel-boosting-with-momentum.h"

	#include <math.h>
	#include <functional>

	#include "lossmin/minimizers/gradient-evaluator.h"
	#include "third_party/eigen/Eigen/Core"

	namespace lossmin {

	void ParallelBoostingWithMomentum::Setup() {
	compute_and_set_learning_rates();
	alpha_ = 0.5;
	beta_ = 1.0 - alpha_;
	phi_center_ = Weights::Zero(gradient_evaluator().NumWeights());
	}

	void ParallelBoostingWithMomentum::compute_and_set_learning_rates() {
	// Per-coordinate learning rates are learning_rates[j] = 1 / sparsity * Lj,
	// where sparsity is the maximum instance l0 norm and Lj is upper bound on
	// the loss curvature along coordinate j.
	double sparsity = gradient_evaluator().Sparsity();
	gradient_evaluator().PerCoordinateCurvature(&learning_rates_);
	learning_rates_ =
	(learning_rates_.array() + l2()).inverse().matrix() / sparsity;
	}

	void ParallelBoostingWithMomentum::SparseInnerProductGradient(
	const Weights &weights, Weights *gradient) const {
	// Eigen library recommends computations step-by-step for best perfomance
	// TODO(bazyli) parallel matrix vector multiply with open mp
	Weights residual = gradient_evaluator().instances() * weights;
	residual -= gradient_evaluator().labels();
	gradient = gradient_evaluator().instances_transposed() residual;
	*gradient /= gradient_evaluator().NumExamples();
	}

	double ParallelBoostingWithMomentum::Loss(const Weights &weights) const {
	// Eigen library recommends computations step-by-step for best perfomance
	// TODO(bazyli) parallel matrix vector multiply with open mp
	Weights residual = gradient_evaluator().instances() * weights;
	residual -= gradient_evaluator().labels();
	double loss =
	0.5 * residual.squaredNorm() / gradient_evaluator().NumExamples();
	if (l2() > 0.0) loss += 0.5 * l2() * weights.squaredNorm();
	if (l1() > 0.0) loss += l1() * weights.cwiseAbs().sum();
	return loss;
	}

	void ParallelBoostingWithMomentum::ConvergenceCheck(const Weights &weights,
	const Weights &gradient) {
	double error_squared = 0.0;
	for (int i = 0; i < gradient.size(); i++) {
	// for weights > 0 the gradient should be == -l1
	if (weights(i) > zero_threshold()) {
	error_squared += (gradient(i) + l1()) * (gradient(i) + l1());
	}
	// for weights < 0 the gradient should be == l1
	else if (weights(i) < -zero_threshold()) {
	error_squared += (gradient(i) - l1()) * (gradient(i) - l1());
	}
	// for weights == 0 the gradient should be between -l1 and l1
	else {
	double err = std::max(std::abs(gradient(i)) - l1(), 0.0);
	error_squared += err * err;
	}
	}

	if (std::sqrt(error_squared) / weights.size() < convergence_threshold()) {
	set_reached_solution(true);
	set_converged(true);
	}
	}

	void ParallelBoostingWithMomentum::EpochUpdate(Weights *weights, int epoch,
	bool check_convergence) {
	// Compute the intermediate weight vector y.
	Weights y = (1.0 - alpha_) * weights + alpha_ phi_center_;

	// Compute the gradient of the loss (except l1 penalty) wrt y.
	Weights gradient_wrt_y = Weights::Zero(y.size());
	SparseInnerProductGradient(y, &gradient_wrt_y);
	if (l2() > 0.0) gradient_wrt_y += l2() * y;

	// Gradient step.
	*weights -= gradient_wrt_y.cwiseProduct(learning_rates_);

	// l1 shrinkage.
	if (l1() > 0.0) {
	L1Prox(l1() * learning_rates_, weights);
	}

	// Update the approximation function.
	phi_center_ -= (1.0 - alpha_) / alpha_ * (y - *weights);
	alpha_ =
	-beta_ / 2.0 + pow(beta_ + beta_ * beta_ / 4.0, static_cast<double>(0.5));
	beta_ *= (1.0 - alpha_);

	// Compute the gradient of the objective except the l1 part and check
	// convergence
	if (check_convergence) {
	Weights gradient_wrt_weights = Weights::Zero(weights->size());
	SparseInnerProductGradient(*weights, &gradient_wrt_weights);
	if (l2() > 0.0f) {
	gradient_wrt_weights += l2() * *weights;
	}
	ConvergenceCheck(*weights, gradient_wrt_weights);
	}
	}

	} // namespace lossmin