lossmin/minimizers/loss-minimizer.cc - lossmin - Git at Google

 // Copyright 2014 Google Inc. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 // Author: nevena@google.com (Nevena Lazic)

 #include "lossmin/minimizers/loss-minimizer.h"

 #include <algorithm>
 #include <random>

 namespace lossmin {

 bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
                         Weights *weights, std::vector<float> *loss) {
   // Run for up to 'max_epochs' epochs.
   int epoch;
   for (epoch = 0; epoch < max_epochs; ++epoch) {
     // Compute the loss.
     if (epoch % loss_epochs == 0) {
       loss->push_back(Loss(*weights));
       // LOG(INFO) << epoch << ": " << loss->at(loss->size() - 1);
     }

     // Set the 'check_convergence' flag.
     bool check_convergence = (epoch > 0) && (epoch % convergence_epochs == 0);

     // Run for one epoch to update the parameters.
     EpochUpdate(weights, epoch, check_convergence);

     // We should also periodically check if the algorithm has not stopped;
     // numerical problems can be encountered if
     // the convergence is checked only by CheckConvergence():
     // if the algorithm "stops" for any reason before solving the problem
     // up to the given accuracy.
     if (check_convergence) {
       SimpleConvergenceCheck(*loss);
     }

     // Check the convergence flag.
     if (converged_) {
       break;
     }
   }

   loss->push_back(Loss(*weights));
   // LOG(INFO) << "final loss: " << loss->at(loss->size() - 1);
   num_epochs_run_ = std::min(epoch + 1, max_epochs);
   return converged_;
 }

 bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
                         const InstanceSet &validation_instances,
                         const LabelSet &validation_labels, Weights *weights,
                         std::vector<float> *training_loss,
                         std::vector<float> *validation_loss) {
   // Run for up to 'max_epochs' epochs.
   int epoch;
   for (epoch = 0; epoch < max_epochs; ++epoch) {
     // Compute the loss.
     if (epoch % loss_epochs == 0) {
       training_loss->push_back(Loss(*weights));
       validation_loss->push_back(gradient_evaluator_.Loss(
           *weights, validation_instances, validation_labels));
       // LOG(INFO) << epoch << ": " << training_loss->at(training_loss->size() -
       // 1)
       //<< "  " << validation_loss->at(validation_loss->size() - 1);
     }

     // Set the 'check_convergence' flag.
     bool check_convergence = (epoch > 0) && (epoch % convergence_epochs == 0);

     // Run for one epoch to update the parameters.
     EpochUpdate(weights, epoch, check_convergence);

     // Optionally do a simple convergence check.
     if (check_convergence && use_simple_convergence_check_) {
       SimpleConvergenceCheck(*training_loss);
     }

     // Check the convergence flag.
     if (converged_) {
       break;
     }
   }

   // Compute final loss.
   training_loss->push_back(Loss(*weights));
   validation_loss->push_back(gradient_evaluator_.Loss(
       *weights, validation_instances, validation_labels));
   // LOG(INFO) << "final loss: "
   //<< training_loss->at(training_loss->size() - 1);
   // LOG(INFO) << "final validation loss: "
   //<< validation_loss->at(validation_loss->size() - 1);
   num_epochs_run_ = std::min(epoch + 1, max_epochs);
   return converged_;
 }

 float LossMinimizer::BestLearningRate() {
   // Sample a subset of the data.
   std::vector<int> examples(num_search_examples_);
   std::mt19937 rnd;
   std::uniform_int_distribution<> dis(0, gradient_evaluator_.NumExamples());
   for (int i = 0; i < num_search_examples_; ++i) {
     examples[i] = dis(rnd);
   }

   float min_loss_rate = initial_rates_[0];
   float min_loss = HUGE_VAL;
   for (int i = 0; i < initial_rates_.size(); ++i) {
     // Initialize parameters to zero.
     Weights weights = Weights::Zero(gradient_evaluator_.NumWeights());

     // Make updates with the current learning rate.
     Weights gradient(weights.size());
     for (int example : examples) {
       gradient = l2() * weights;
       gradient_evaluator_.AddExampleGradient(weights, example, 1.0f, 1.0f,
                                              &gradient);
       weights -= initial_rates_[i] * gradient;
     }

     // Compute the corresponding loss.
     float loss = gradient_evaluator_.Loss(weights, examples);
     if (i == 0) {
       min_loss = loss;
     } else if (loss < min_loss) {
       min_loss = loss;
       min_loss_rate = initial_rates_[i];
     }
   }

   // Return the learning rate corresponding to the smallest loss.
   // LOG(INFO) << "best initial learning rate: " << min_loss_rate;
   return min_loss_rate;
 }

 // By default only check if gradient is == 0
 void LossMinimizer::ConvergenceCheck(const Weights &weights,
                                      const Weights &gradient) {
   if (gradient.squaredNorm() / weights.size() < convergence_threshold_) {
     set_converged(true);
   }
 }

 void LossMinimizer::SimpleConvergenceCheck(const std::vector<float> &loss) {
   // Check convergence by verifying that the max relative loss decrease
   // (loss[t-1] - loss[t]) / loss[t-1] is below 'convergence_threshold_'.
   if (loss.size() > num_convergence_epochs_) {
     float loss_difference = 0.0f;
     for (int i = loss.size() - num_convergence_epochs_; i < loss.size(); ++i) {
       if (loss[i - 1] > 0) {
         loss_difference = std::max(loss_difference, 1 - loss[i] / loss[i - 1]);
       } else {
         set_reached_solution(true);
         set_converged(true);
       }
     }
     if (loss_difference < simple_convergence_threshold_) set_converged(true);
   }
 }

 void LossMinimizer::set_batch_size(int batch_size) {
   int num_batches =
       (gradient_evaluator_.NumExamples() + batch_size - 1) / batch_size;

   // Assign examples to each batch.
   batch_examples_.assign(num_batches, {});
   for (auto &batch : batch_examples_) batch.reserve(batch_size);
   for (int i = 0; i < gradient_evaluator().NumExamples(); ++i) {
     batch_examples_[i % num_batches].push_back(i);
   }
 }

 }  // namespace lossmin
	// Copyright 2014 Google Inc. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.
	// Author: nevena@google.com (Nevena Lazic)

	#include "lossmin/minimizers/loss-minimizer.h"

	#include <algorithm>
	#include <random>

	namespace lossmin {

	bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
	Weights weights, std::vector<float> loss) {
	// Run for up to 'max_epochs' epochs.
	int epoch;
	for (epoch = 0; epoch < max_epochs; ++epoch) {
	// Compute the loss.
	if (epoch % loss_epochs == 0) {
	loss->push_back(Loss(*weights));
	// LOG(INFO) << epoch << ": " << loss->at(loss->size() - 1);
	}

	// Set the 'check_convergence' flag.
	bool check_convergence = (epoch > 0) && (epoch % convergence_epochs == 0);

	// Run for one epoch to update the parameters.
	EpochUpdate(weights, epoch, check_convergence);

	// We should also periodically check if the algorithm has not stopped;
	// numerical problems can be encountered if
	// the convergence is checked only by CheckConvergence():
	// if the algorithm "stops" for any reason before solving the problem
	// up to the given accuracy.
	if (check_convergence) {
	SimpleConvergenceCheck(*loss);
	}

	// Check the convergence flag.
	if (converged_) {
	break;
	}
	}

	loss->push_back(Loss(*weights));
	// LOG(INFO) << "final loss: " << loss->at(loss->size() - 1);
	num_epochs_run_ = std::min(epoch + 1, max_epochs);
	return converged_;
	}

	bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
	const InstanceSet &validation_instances,
	const LabelSet &validation_labels, Weights *weights,
	std::vector<float> *training_loss,
	std::vector<float> *validation_loss) {
	// Run for up to 'max_epochs' epochs.
	int epoch;
	for (epoch = 0; epoch < max_epochs; ++epoch) {
	// Compute the loss.
	if (epoch % loss_epochs == 0) {
	training_loss->push_back(Loss(*weights));
	validation_loss->push_back(gradient_evaluator_.Loss(
	*weights, validation_instances, validation_labels));
	// LOG(INFO) << epoch << ": " << training_loss->at(training_loss->size() -
	// 1)
	//<< " " << validation_loss->at(validation_loss->size() - 1);
	}

	// Set the 'check_convergence' flag.
	bool check_convergence = (epoch > 0) && (epoch % convergence_epochs == 0);

	// Run for one epoch to update the parameters.
	EpochUpdate(weights, epoch, check_convergence);

	// Optionally do a simple convergence check.
	if (check_convergence && use_simple_convergence_check_) {
	SimpleConvergenceCheck(*training_loss);
	}

	// Check the convergence flag.
	if (converged_) {
	break;
	}
	}

	// Compute final loss.
	training_loss->push_back(Loss(*weights));
	validation_loss->push_back(gradient_evaluator_.Loss(
	*weights, validation_instances, validation_labels));
	// LOG(INFO) << "final loss: "
	//<< training_loss->at(training_loss->size() - 1);
	// LOG(INFO) << "final validation loss: "
	//<< validation_loss->at(validation_loss->size() - 1);
	num_epochs_run_ = std::min(epoch + 1, max_epochs);
	return converged_;
	}

	float LossMinimizer::BestLearningRate() {
	// Sample a subset of the data.
	std::vector<int> examples(num_search_examples_);
	std::mt19937 rnd;
	std::uniform_int_distribution<> dis(0, gradient_evaluator_.NumExamples());
	for (int i = 0; i < num_search_examples_; ++i) {
	examples[i] = dis(rnd);
	}

	float min_loss_rate = initial_rates_[0];
	float min_loss = HUGE_VAL;
	for (int i = 0; i < initial_rates_.size(); ++i) {
	// Initialize parameters to zero.
	Weights weights = Weights::Zero(gradient_evaluator_.NumWeights());

	// Make updates with the current learning rate.
	Weights gradient(weights.size());
	for (int example : examples) {
	gradient = l2() * weights;
	gradient_evaluator_.AddExampleGradient(weights, example, 1.0f, 1.0f,
	&gradient);
	weights -= initial_rates_[i] * gradient;
	}

	// Compute the corresponding loss.
	float loss = gradient_evaluator_.Loss(weights, examples);
	if (i == 0) {
	min_loss = loss;
	} else if (loss < min_loss) {
	min_loss = loss;
	min_loss_rate = initial_rates_[i];
	}
	}

	// Return the learning rate corresponding to the smallest loss.
	// LOG(INFO) << "best initial learning rate: " << min_loss_rate;
	return min_loss_rate;
	}

	// By default only check if gradient is == 0
	void LossMinimizer::ConvergenceCheck(const Weights &weights,
	const Weights &gradient) {
	if (gradient.squaredNorm() / weights.size() < convergence_threshold_) {
	set_converged(true);
	}
	}

	void LossMinimizer::SimpleConvergenceCheck(const std::vector<float> &loss) {
	// Check convergence by verifying that the max relative loss decrease
	// (loss[t-1] - loss[t]) / loss[t-1] is below 'convergence_threshold_'.
	if (loss.size() > num_convergence_epochs_) {
	float loss_difference = 0.0f;
	for (int i = loss.size() - num_convergence_epochs_; i < loss.size(); ++i) {
	if (loss[i - 1] > 0) {
	loss_difference = std::max(loss_difference, 1 - loss[i] / loss[i - 1]);
	} else {
	set_reached_solution(true);
	set_converged(true);
	}
	}
	if (loss_difference < simple_convergence_threshold_) set_converged(true);
	}
	}

	void LossMinimizer::set_batch_size(int batch_size) {
	int num_batches =
	(gradient_evaluator_.NumExamples() + batch_size - 1) / batch_size;

	// Assign examples to each batch.
	batch_examples_.assign(num_batches, {});
	for (auto &batch : batch_examples_) batch.reserve(batch_size);
	for (int i = 0; i < gradient_evaluator().NumExamples(); ++i) {
	batch_examples_[i % num_batches].push_back(i);
	}
	}

	} // namespace lossmin