blob: 619dfe89d13254ebd0b43c224025b7ac552dd44e [file] [log] [blame]
// Copyright 2014 Google Inc. All Rights Reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Author: nevena@google.com (Nevena Lazic)
//
// An implementation of:
// I. Mukherjee, K. Canini, R. Frongillo, and Y. Singer, "Parallel Boosting with
// Momentum", ECML PKDD 2013.
// Variable names follow the notation in the paper.
#pragma once
#include "lossmin/eigen-types.h"
#include "lossmin/minimizers/gradient-evaluator.h"
#include "lossmin/minimizers/loss-minimizer.h"
namespace lossmin {
class GradientEvaluator;
class ParallelBoostingWithMomentum : public LossMinimizer {
public:
ParallelBoostingWithMomentum(
float l1, float l2, const GradientEvaluator &gradient_evaluator)
: LossMinimizer(l1, l2, gradient_evaluator) {
Setup();
}
// Sets learning rates and other parameters.
void Setup() override;
private:
// Updates 'weights' and the quadratic approximation function phi(w), such
// that at iteration k, loss(weights_k) <= min_w phi_k(w).
// y = (1 - alpha) * weights + alpha * phi_center
// grad_y = loss_grad(y) + l2 * y
// weights[j] = weights[j] - grad_y[j] / learning_rates[j]
// weights[j] =
// sign(weights[j]) * max(0, weights[j], l1 / learning_rates[j])
void EpochUpdate(Weights *weights, int epoch,
bool check_convergence) override;
// Per-coordinate learning rates.
VectorXf learning_rates_;
// Center of the approximating quadratic function phi.
VectorXf phi_center_;
// Parameter for updating the approximation function phi. At each iteration,
// 'alpha_' is updated to the solution of the quadratic equation
// alpha_^2 = beta_ * (1.0 - alpha_)
float alpha_;
// Parameter used to update alpha, defined as
// beta_{epoch} = \prod_{i=1}^{epoch} (1 - alpha_i).
float beta_;
};
} // namespace lossmin