blob: fef8e6b4af69b7041991e698a7144d78092796bb [file] [log] [blame]
// Copyright 2014 Google Inc. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Author: nevena@google.com (Nevena Lazic)
#include "lossmin/minimizers/loss-minimizer.h"
#include <algorithm>
#include <random>
namespace lossmin {
bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
Weights *weights, std::vector<float> *loss) {
// Run for up to 'max_epochs' epochs.
int epoch;
for (epoch = 0; epoch < max_epochs; ++epoch) {
// Compute the loss.
if (epoch % loss_epochs == 0) {
loss->push_back(Loss(*weights));
// LOG(INFO) << epoch << ": " << loss->at(loss->size() - 1);
}
// Set the 'check_convergence' flag.
bool check_convergence = (epoch > 0) && (epoch % convergence_epochs == 0);
// Run for one epoch to update the parameters.
EpochUpdate(weights, epoch, check_convergence);
// We should also periodically check if the algorithm has not stopped;
// numerical problems can be encountered if
// the convergence is checked only by CheckConvergence():
// if the algorithm "stops" for any reason before solving the problem
// up to the given accuracy.
if (check_convergence) {
SimpleConvergenceCheck(*loss);
}
// Check the convergence flag.
if (converged_) {
break;
}
}
loss->push_back(Loss(*weights));
// LOG(INFO) << "final loss: " << loss->at(loss->size() - 1);
num_epochs_run_ = std::min(epoch + 1, max_epochs);
return converged_;
}
bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
const InstanceSet &validation_instances,
const LabelSet &validation_labels, Weights *weights,
std::vector<float> *training_loss,
std::vector<float> *validation_loss) {
// Run for up to 'max_epochs' epochs.
int epoch;
for (epoch = 0; epoch < max_epochs; ++epoch) {
// Compute the loss.
if (epoch % loss_epochs == 0) {
training_loss->push_back(Loss(*weights));
validation_loss->push_back(gradient_evaluator_.Loss(
*weights, validation_instances, validation_labels));
// LOG(INFO) << epoch << ": " << training_loss->at(training_loss->size() -
// 1)
//<< " " << validation_loss->at(validation_loss->size() - 1);
}
// Set the 'check_convergence' flag.
bool check_convergence = (epoch > 0) && (epoch % convergence_epochs == 0);
// Run for one epoch to update the parameters.
EpochUpdate(weights, epoch, check_convergence);
// Optionally do a simple convergence check.
if (check_convergence && use_simple_convergence_check_) {
SimpleConvergenceCheck(*training_loss);
}
// Check the convergence flag.
if (converged_) {
break;
}
}
// Compute final loss.
training_loss->push_back(Loss(*weights));
validation_loss->push_back(gradient_evaluator_.Loss(
*weights, validation_instances, validation_labels));
// LOG(INFO) << "final loss: "
//<< training_loss->at(training_loss->size() - 1);
// LOG(INFO) << "final validation loss: "
//<< validation_loss->at(validation_loss->size() - 1);
num_epochs_run_ = std::min(epoch + 1, max_epochs);
return converged_;
}
float LossMinimizer::BestLearningRate() {
// Sample a subset of the data.
std::vector<int> examples(num_search_examples_);
std::mt19937 rnd;
std::uniform_int_distribution<> dis(0, gradient_evaluator_.NumExamples());
for (int i = 0; i < num_search_examples_; ++i) {
examples[i] = dis(rnd);
}
float min_loss_rate = initial_rates_[0];
float min_loss = HUGE_VAL;
for (int i = 0; i < initial_rates_.size(); ++i) {
// Initialize parameters to zero.
Weights weights = Weights::Zero(gradient_evaluator_.NumWeights());
// Make updates with the current learning rate.
Weights gradient(weights.size());
for (int example : examples) {
gradient = l2() * weights;
gradient_evaluator_.AddExampleGradient(weights, example, 1.0f, 1.0f,
&gradient);
weights -= initial_rates_[i] * gradient;
}
// Compute the corresponding loss.
float loss = gradient_evaluator_.Loss(weights, examples);
if (i == 0) {
min_loss = loss;
} else if (loss < min_loss) {
min_loss = loss;
min_loss_rate = initial_rates_[i];
}
}
// Return the learning rate corresponding to the smallest loss.
// LOG(INFO) << "best initial learning rate: " << min_loss_rate;
return min_loss_rate;
}
// By default only check if gradient is == 0
void LossMinimizer::ConvergenceCheck(const Weights &weights,
const Weights &gradient) {
if (gradient.squaredNorm() / weights.size() < convergence_threshold_) {
set_converged(true);
}
}
void LossMinimizer::SimpleConvergenceCheck(const std::vector<float> &loss) {
// Check convergence by verifying that the max relative loss decrease
// (loss[t-1] - loss[t]) / loss[t-1] is below 'convergence_threshold_'.
if (loss.size() > num_convergence_epochs_) {
float loss_difference = 0.0f;
for (int i = loss.size() - num_convergence_epochs_; i < loss.size(); ++i) {
if (loss[i - 1] > 0) {
loss_difference = std::max(loss_difference, 1 - loss[i] / loss[i - 1]);
} else {
set_reached_solution(true);
set_converged(true);
}
}
if (loss_difference < simple_convergence_threshold_) set_converged(true);
}
}
void LossMinimizer::set_batch_size(int batch_size) {
int num_batches =
(gradient_evaluator_.NumExamples() + batch_size - 1) / batch_size;
// Assign examples to each batch.
batch_examples_.assign(num_batches, {});
for (auto &batch : batch_examples_) batch.reserve(batch_size);
for (int i = 0; i < gradient_evaluator().NumExamples(); ++i) {
batch_examples_[i % num_batches].push_back(i);
}
}
} // namespace lossmin