| // Copyright 2014 Google Inc. All Rights Reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // Author: nevena@google.com (Nevena Lazic) |
| // |
| // An implementation of: |
| // I. Mukherjee, K. Canini, R. Frongillo, and Y. Singer, "Parallel Boosting with |
| // Momentum", ECML PKDD 2013. |
| // Variable names follow the notation in the paper. |
| |
| #pragma once |
| |
| #include "lossmin/eigen-types.h" |
| #include "lossmin/minimizers/gradient-evaluator.h" |
| #include "lossmin/minimizers/loss-minimizer.h" |
| |
| namespace lossmin { |
| |
| class GradientEvaluator; |
| |
| class ParallelBoostingWithMomentum : public LossMinimizer { |
| public: |
| ParallelBoostingWithMomentum( |
| float l1, float l2, const GradientEvaluator &gradient_evaluator) |
| : LossMinimizer(l1, l2, gradient_evaluator) { |
| Setup(); |
| } |
| |
| // Sets learning rates and other parameters. |
| void Setup() override; |
| |
| private: |
| // Updates 'weights' and the quadratic approximation function phi(w), such |
| // that at iteration k, loss(weights_k) <= min_w phi_k(w). |
| // y = (1 - alpha) * weights + alpha * phi_center |
| // grad_y = loss_grad(y) + l2 * y |
| // weights[j] = weights[j] - grad_y[j] / learning_rates[j] |
| // weights[j] = |
| // sign(weights[j]) * max(0, weights[j], l1 / learning_rates[j]) |
| void EpochUpdate(Weights *weights, int epoch, |
| bool check_convergence) override; |
| |
| // Per-coordinate learning rates. |
| VectorXf learning_rates_; |
| |
| // Center of the approximating quadratic function phi. |
| VectorXf phi_center_; |
| |
| // Parameter for updating the approximation function phi. At each iteration, |
| // 'alpha_' is updated to the solution of the quadratic equation |
| // alpha_^2 = beta_ * (1.0 - alpha_) |
| float alpha_; |
| |
| // Parameter used to update alpha, defined as |
| // beta_{epoch} = \prod_{i=1}^{epoch} (1 - alpha_i). |
| float beta_; |
| }; |
| |
| } // namespace lossmin |
| |