| // Copyright 2014 Google Inc. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // Author: nevena@google.com (Nevena Lazic) |
| |
| #include "lossmin/minimizers/loss-minimizer.h" |
| |
| #include <algorithm> |
| #include <random> |
| |
| namespace lossmin { |
| |
| bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs, |
| Weights *weights, std::vector<float> *loss) { |
| // Run for up to 'max_epochs' epochs. |
| int epoch; |
| for (epoch = 0; epoch < max_epochs; ++epoch) { |
| // Compute the loss. |
| if (epoch % loss_epochs == 0) { |
| loss->push_back(Loss(*weights)); |
| // LOG(INFO) << epoch << ": " << loss->at(loss->size() - 1); |
| } |
| |
| // Set the 'check_convergence' flag. |
| bool check_convergence = (epoch > 0) && (epoch % convergence_epochs == 0); |
| |
| // Run for one epoch to update the parameters. |
| EpochUpdate(weights, epoch, check_convergence); |
| |
| // We should also periodically check if the algorithm has not stopped; |
| // numerical problems can be encountered if |
| // the convergence is checked only by CheckConvergence(): |
| // if the algorithm "stops" for any reason before solving the problem |
| // up to the given accuracy. |
| if (check_convergence) { |
| SimpleConvergenceCheck(*loss); |
| } |
| |
| // Check the convergence flag. |
| if (converged_) { |
| break; |
| } |
| } |
| |
| loss->push_back(Loss(*weights)); |
| // LOG(INFO) << "final loss: " << loss->at(loss->size() - 1); |
| num_epochs_run_ = std::min(epoch + 1, max_epochs); |
| return converged_; |
| } |
| |
| bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs, |
| const InstanceSet &validation_instances, |
| const LabelSet &validation_labels, Weights *weights, |
| std::vector<float> *training_loss, |
| std::vector<float> *validation_loss) { |
| // Run for up to 'max_epochs' epochs. |
| int epoch; |
| for (epoch = 0; epoch < max_epochs; ++epoch) { |
| // Compute the loss. |
| if (epoch % loss_epochs == 0) { |
| training_loss->push_back(Loss(*weights)); |
| validation_loss->push_back(gradient_evaluator_.Loss( |
| *weights, validation_instances, validation_labels)); |
| // LOG(INFO) << epoch << ": " << training_loss->at(training_loss->size() - |
| // 1) |
| //<< " " << validation_loss->at(validation_loss->size() - 1); |
| } |
| |
| // Set the 'check_convergence' flag. |
| bool check_convergence = (epoch > 0) && (epoch % convergence_epochs == 0); |
| |
| // Run for one epoch to update the parameters. |
| EpochUpdate(weights, epoch, check_convergence); |
| |
| // Optionally do a simple convergence check. |
| if (check_convergence && use_simple_convergence_check_) { |
| SimpleConvergenceCheck(*training_loss); |
| } |
| |
| // Check the convergence flag. |
| if (converged_) { |
| break; |
| } |
| } |
| |
| // Compute final loss. |
| training_loss->push_back(Loss(*weights)); |
| validation_loss->push_back(gradient_evaluator_.Loss( |
| *weights, validation_instances, validation_labels)); |
| // LOG(INFO) << "final loss: " |
| //<< training_loss->at(training_loss->size() - 1); |
| // LOG(INFO) << "final validation loss: " |
| //<< validation_loss->at(validation_loss->size() - 1); |
| num_epochs_run_ = std::min(epoch + 1, max_epochs); |
| return converged_; |
| } |
| |
| float LossMinimizer::BestLearningRate() { |
| // Sample a subset of the data. |
| std::vector<int> examples(num_search_examples_); |
| std::mt19937 rnd; |
| std::uniform_int_distribution<> dis(0, gradient_evaluator_.NumExamples()); |
| for (int i = 0; i < num_search_examples_; ++i) { |
| examples[i] = dis(rnd); |
| } |
| |
| float min_loss_rate = initial_rates_[0]; |
| float min_loss = HUGE_VAL; |
| for (int i = 0; i < initial_rates_.size(); ++i) { |
| // Initialize parameters to zero. |
| Weights weights = Weights::Zero(gradient_evaluator_.NumWeights()); |
| |
| // Make updates with the current learning rate. |
| Weights gradient(weights.size()); |
| for (int example : examples) { |
| gradient = l2() * weights; |
| gradient_evaluator_.AddExampleGradient(weights, example, 1.0f, 1.0f, |
| &gradient); |
| weights -= initial_rates_[i] * gradient; |
| } |
| |
| // Compute the corresponding loss. |
| float loss = gradient_evaluator_.Loss(weights, examples); |
| if (i == 0) { |
| min_loss = loss; |
| } else if (loss < min_loss) { |
| min_loss = loss; |
| min_loss_rate = initial_rates_[i]; |
| } |
| } |
| |
| // Return the learning rate corresponding to the smallest loss. |
| // LOG(INFO) << "best initial learning rate: " << min_loss_rate; |
| return min_loss_rate; |
| } |
| |
| // By default only check if gradient is == 0 |
| void LossMinimizer::ConvergenceCheck(const Weights &weights, |
| const Weights &gradient) { |
| if (gradient.squaredNorm() / weights.size() < convergence_threshold_) { |
| set_converged(true); |
| } |
| } |
| |
| void LossMinimizer::SimpleConvergenceCheck(const std::vector<float> &loss) { |
| // Check convergence by verifying that the max relative loss decrease |
| // (loss[t-1] - loss[t]) / loss[t-1] is below 'convergence_threshold_'. |
| if (loss.size() > num_convergence_epochs_) { |
| float loss_difference = 0.0f; |
| for (int i = loss.size() - num_convergence_epochs_; i < loss.size(); ++i) { |
| if (loss[i - 1] > 0) { |
| loss_difference = std::max(loss_difference, 1 - loss[i] / loss[i - 1]); |
| } else { |
| set_reached_solution(true); |
| set_converged(true); |
| } |
| } |
| if (loss_difference < simple_convergence_threshold_) set_converged(true); |
| } |
| } |
| |
| void LossMinimizer::set_batch_size(int batch_size) { |
| int num_batches = |
| (gradient_evaluator_.NumExamples() + batch_size - 1) / batch_size; |
| |
| // Assign examples to each batch. |
| batch_examples_.assign(num_batches, {}); |
| for (auto &batch : batch_examples_) batch.reserve(batch_size); |
| for (int i = 0; i < gradient_evaluator().NumExamples(); ++i) { |
| batch_examples_[i % num_batches].push_back(i); |
| } |
| } |
| |
| } // namespace lossmin |