| // Copyright 2014 Google Inc. All Rights Reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // Author: nevena@google.com (Nevena Lazic) |
| |
| #include "lossmin/minimizers/parallel-boosting-with-momentum.h" |
| |
| #include <math.h> |
| #include <functional> |
| |
| #include "lossmin/minimizers/gradient-evaluator.h" |
| #include "third_party/eigen/Eigen/Core" |
| |
| namespace lossmin { |
| |
| void ParallelBoostingWithMomentum::Setup() { |
| // Initialize the approximating function parameters. |
| compute_and_set_learning_rates(); |
| alpha_ = 0.5f; |
| beta_ = 1.0f - alpha_; |
| phi_center_ = Weights::Zero(gradient_evaluator().NumWeights()); |
| } |
| |
| void ParallelBoostingWithMomentum::compute_and_set_learning_rates() { |
| // Per-coordinate learning rates are learning_rates[j] = 1 / sparsity * Lj, |
| // where sparsity is the maximum instance l0 norm and Lj is upper bound on |
| // the loss curvature along coordinate j. |
| float sparsity = gradient_evaluator().Sparsity(); |
| gradient_evaluator().PerCoordinateCurvature(&learning_rates_); |
| learning_rates_ = |
| (learning_rates_.array() + l2()).inverse().matrix() / sparsity; |
| } |
| |
| void ParallelBoostingWithMomentum::SparseInnerProductGradient( |
| const Weights &weights, Weights *gradient) const { |
| // Eigen library recommends computations step-by-step for best perfomance |
| Weights residual = gradient_evaluator().instances() * weights; |
| residual -= gradient_evaluator().labels(); |
| *gradient = instances_transpose_ * residual; |
| *gradient /= gradient_evaluator().NumExamples(); |
| } |
| |
| float ParallelBoostingWithMomentum::Loss(const Weights &weights) const { |
| // Eigen library recommends computations step-by-step for best perfomance |
| Weights residual = gradient_evaluator().instances() * weights; |
| residual -= gradient_evaluator().labels(); |
| float loss = 0.5 * residual.squaredNorm(); |
| if (l2() > 0.0f) loss += 0.5 * l2() * weights.squaredNorm(); |
| if (l1() > 0.0f) loss += l1() * weights.cwiseAbs().sum(); |
| return loss; |
| } |
| |
| void ParallelBoostingWithMomentum::ConvergenceCheck(const Weights &weights, |
| const Weights &gradient) { |
| float error_squared = 0.0f; |
| for (int i = 0; i < gradient.size(); i++) { |
| // for weights > 0 the gradient should be == -l1 |
| if (weights(i) > zero_threshold()) { |
| error_squared += (gradient(i) + l1()) * (gradient(i) + l1()); |
| } |
| // for weights < 0 the gradient should be == l1 |
| else if (weights(i) < -zero_threshold()) { |
| error_squared += (gradient(i) - l1()) * (gradient(i) - l1()); |
| } |
| // for weights == 0 the gradient should be between -l1 and l1 |
| else { |
| float err = std::max(std::abs(gradient(i)) - l1(), 0.0f); |
| error_squared += err * err; |
| } |
| } |
| |
| if (std::sqrt(error_squared) / weights.size() < convergence_threshold()) { |
| set_converged(true); |
| } |
| } |
| |
| void ParallelBoostingWithMomentum::EpochUpdate(Weights *weights, int epoch, |
| bool check_convergence) { |
| // Compute the intermediate weight vector y. |
| Weights y = (1.0f - alpha_) * *weights + alpha_ * phi_center_; |
| |
| // Compute the gradient of the loss wrt y. |
| Weights gradient_wrt_y = Weights::Zero(y.size()); |
| SparseInnerProductGradient(y, &gradient_wrt_y); |
| |
| if (l2() > 0.0f) gradient_wrt_y += l2() * y; |
| |
| // Gradient step. |
| *weights -= gradient_wrt_y.cwiseProduct(learning_rates_); |
| |
| // l1 shrinkage. |
| if (l1() > 0.0f) { |
| L1Prox(l1() * learning_rates_, weights); |
| } |
| |
| // Update the approximation function. |
| phi_center_ -= (1.0 - alpha_) / alpha_ * (y - *weights); |
| alpha_ = |
| -beta_ / 2.0 + pow(beta_ + beta_ * beta_ / 4.0, static_cast<float>(0.5)); |
| beta_ *= (1.0 - alpha_); |
| |
| // Compute the gradient of the objective except the l1 part and check |
| // convergence |
| if (check_convergence) { |
| Weights gradient_wrt_weights = Weights::Zero(weights->size()); |
| SparseInnerProductGradient(*weights, &gradient_wrt_weights); |
| if (l2() > 0.0f) { |
| gradient_wrt_weights += l2() * *weights; |
| } |
| ConvergenceCheck(*weights, gradient_wrt_weights); |
| } |
| } |
| |
| } // namespace lossmin |