src/lib/lossmin/minimizers/parallel-boosting-with-momentum.cc - cobalt - Git at Google

 // Copyright 2014 Google Inc. All Rights Reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 // Author: nevena@google.com (Nevena Lazic)

 #include "src/lib/lossmin/minimizers/parallel-boosting-with-momentum.h"

 #include <math.h>

 #include <algorithm>
 #include <functional>

 #include "src/lib/lossmin/minimizers/gradient-evaluator.h"

 #include "third_party/eigen/Eigen/Core"

 namespace cobalt_lossmin {

 void ParallelBoostingWithMomentum::Setup() {
   compute_and_set_learning_rates();
   set_converged(false);
   set_reached_solution(false);
   alpha_ = 0.5;  // NOLINT readability-magic-numbers
   beta_ = 1.0 - alpha_;
   phi_center_ = Weights::Zero(gradient_evaluator().NumWeights());
 }

 void ParallelBoostingWithMomentum::compute_and_set_learning_rates() {
   // Per-coordinate learning rates are learning_rates[j] = 1 / (sparsity * Lj),
   // where sparsity is the maximum l0 norm of the rows of instance matrix, and
   // Lj is upper bound on the loss curvature along coordinate j.
   double sparsity = gradient_evaluator().Sparsity();
   gradient_evaluator().PerCoordinateCurvature(&learning_rates_);
   learning_rates_ = (learning_rates_.array() + l2()).inverse().matrix() / sparsity;
 }

 double ParallelBoostingWithMomentum::Loss(const Weights &weights) const {
   double loss = gradient_evaluator().SparseLoss(weights);
   if (l2() > 0.0) {
     // NOLINTNEXTLINE readability-magic-numbers
     loss += 0.5 * l2() * weights.squaredNorm();
   }
   if (l1() > 0.0) {
     loss += l1() * weights.cwiseAbs().sum();
   }
   return loss;
 }

 void ParallelBoostingWithMomentum::ConvergenceCheck(const Weights &weights,
                                                     const Weights &gradient) {
   double error_squared = 0.0;
   for (int i = 0; i < gradient.size(); i++) {
     if (weights(i) > zero_threshold()) {
       // for weights > 0 the gradient should be == -l1
       error_squared += (gradient(i) + l1()) * (gradient(i) + l1());
     } else if (weights(i) < -zero_threshold()) {
       // for weights < 0 the gradient should be == l1
       error_squared += (gradient(i) - l1()) * (gradient(i) - l1());
     } else {
       // for weights == 0 the gradient should be between -l1 and l1
       double err = std::max(std::abs(gradient(i)) - l1(), 0.0);
       error_squared += err * err;
     }
   }

   if (std::sqrt(error_squared) / weights.size() < convergence_threshold()) {
     set_reached_solution(true);
     set_converged(true);
   }
 }

 void ParallelBoostingWithMomentum::EpochUpdate(Weights *weights, int /*epoch*/,
                                                bool check_convergence) {
   // Compute the intermediate weight vector y.
   Weights y = (1.0 - alpha_) * *weights + alpha_ * phi_center_;

   // Compute the gradient at y (except l1 penalty).
   Weights gradient_wrt_y = Weights::Zero(y.size());
   gradient_evaluator().SparseGradient(y, &gradient_wrt_y);
   if (l2() > 0.0) {
     gradient_wrt_y += l2() * y;
   }

   // Take the gradient descent step.
   *weights -= gradient_wrt_y.cwiseProduct(learning_rates_);

   // Apply the l1 shrinkage.
   if (l1() > 0.0) {
     L1Prox(l1() * learning_rates_, weights);
   }

   // Update the approximation function.
   phi_center_ -= (1.0 - alpha_) / alpha_ * (y - *weights);
   // NOLINTNEXTLINE readability-magic-numbers
   alpha_ = -beta_ / 2.0 + pow(beta_ + beta_ * beta_ / 4.0, 0.5);
   beta_ *= (1.0 - alpha_);

   if (check_convergence) {
     // Compute the gradient of the objective except the l1 part and check
     // convergence.
     Weights gradient_wrt_weights = Weights::Zero(weights->size());
     gradient_evaluator().SparseGradient(*weights, &gradient_wrt_weights);
     if (l2() > 0.0) {
       gradient_wrt_weights += l2() * *weights;
     }
     ConvergenceCheck(*weights, gradient_wrt_weights);
   }
 }

 }  // namespace cobalt_lossmin
	// Copyright 2014 Google Inc. All Rights Reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.
	// Author: nevena@google.com (Nevena Lazic)

	#include "src/lib/lossmin/minimizers/parallel-boosting-with-momentum.h"

	#include <math.h>

	#include <algorithm>
	#include <functional>

	#include "src/lib/lossmin/minimizers/gradient-evaluator.h"

	#include "third_party/eigen/Eigen/Core"

	namespace cobalt_lossmin {

	void ParallelBoostingWithMomentum::Setup() {
	compute_and_set_learning_rates();
	set_converged(false);
	set_reached_solution(false);
	alpha_ = 0.5; // NOLINT readability-magic-numbers
	beta_ = 1.0 - alpha_;
	phi_center_ = Weights::Zero(gradient_evaluator().NumWeights());
	}

	void ParallelBoostingWithMomentum::compute_and_set_learning_rates() {
	// Per-coordinate learning rates are learning_rates[j] = 1 / (sparsity * Lj),
	// where sparsity is the maximum l0 norm of the rows of instance matrix, and
	// Lj is upper bound on the loss curvature along coordinate j.
	double sparsity = gradient_evaluator().Sparsity();
	gradient_evaluator().PerCoordinateCurvature(&learning_rates_);
	learning_rates_ = (learning_rates_.array() + l2()).inverse().matrix() / sparsity;
	}

	double ParallelBoostingWithMomentum::Loss(const Weights &weights) const {
	double loss = gradient_evaluator().SparseLoss(weights);
	if (l2() > 0.0) {
	// NOLINTNEXTLINE readability-magic-numbers
	loss += 0.5 * l2() * weights.squaredNorm();
	}
	if (l1() > 0.0) {
	loss += l1() * weights.cwiseAbs().sum();
	}
	return loss;
	}

	void ParallelBoostingWithMomentum::ConvergenceCheck(const Weights &weights,
	const Weights &gradient) {
	double error_squared = 0.0;
	for (int i = 0; i < gradient.size(); i++) {
	if (weights(i) > zero_threshold()) {
	// for weights > 0 the gradient should be == -l1
	error_squared += (gradient(i) + l1()) * (gradient(i) + l1());
	} else if (weights(i) < -zero_threshold()) {
	// for weights < 0 the gradient should be == l1
	error_squared += (gradient(i) - l1()) * (gradient(i) - l1());
	} else {
	// for weights == 0 the gradient should be between -l1 and l1
	double err = std::max(std::abs(gradient(i)) - l1(), 0.0);
	error_squared += err * err;
	}
	}

	if (std::sqrt(error_squared) / weights.size() < convergence_threshold()) {
	set_reached_solution(true);
	set_converged(true);
	}
	}

	void ParallelBoostingWithMomentum::EpochUpdate(Weights weights, int /epoch*/,
	bool check_convergence) {
	// Compute the intermediate weight vector y.
	Weights y = (1.0 - alpha_) * weights + alpha_ phi_center_;

	// Compute the gradient at y (except l1 penalty).
	Weights gradient_wrt_y = Weights::Zero(y.size());
	gradient_evaluator().SparseGradient(y, &gradient_wrt_y);
	if (l2() > 0.0) {
	gradient_wrt_y += l2() * y;
	}

	// Take the gradient descent step.
	*weights -= gradient_wrt_y.cwiseProduct(learning_rates_);

	// Apply the l1 shrinkage.
	if (l1() > 0.0) {
	L1Prox(l1() * learning_rates_, weights);
	}

	// Update the approximation function.
	phi_center_ -= (1.0 - alpha_) / alpha_ * (y - *weights);
	// NOLINTNEXTLINE readability-magic-numbers
	alpha_ = -beta_ / 2.0 + pow(beta_ + beta_ * beta_ / 4.0, 0.5);
	beta_ *= (1.0 - alpha_);

	if (check_convergence) {
	// Compute the gradient of the objective except the l1 part and check
	// convergence.
	Weights gradient_wrt_weights = Weights::Zero(weights->size());
	gradient_evaluator().SparseGradient(*weights, &gradient_wrt_weights);
	if (l2() > 0.0) {
	gradient_wrt_weights += l2() * *weights;
	}
	ConvergenceCheck(*weights, gradient_wrt_weights);
	}
	}

	} // namespace cobalt_lossmin