Added counting of epochs run in loss minimizer
Change-Id: I664b37b64c6f2a10a97bd3e8569229971616adb2
diff --git a/lossmin/minimizers/loss-minimizer.cc b/lossmin/minimizers/loss-minimizer.cc
index d6610f8..427c4ac 100644
--- a/lossmin/minimizers/loss-minimizer.cc
+++ b/lossmin/minimizers/loss-minimizer.cc
@@ -5,20 +5,20 @@
#include "lossmin/minimizers/loss-minimizer.h"
-#include <random>
#include <algorithm>
+#include <random>
namespace lossmin {
-bool LossMinimizer::Run(
- int max_epochs, int loss_epochs, int convergence_epochs,
- Weights *weights, std::vector<float> *loss) {
+bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
+ Weights *weights, std::vector<float> *loss) {
// Run for up to 'max_epochs' epochs.
- for (int epoch = 0; epoch < max_epochs; ++epoch) {
+ int epoch;
+ for (epoch = 0; epoch < max_epochs; ++epoch) {
// Compute the loss.
if (epoch % loss_epochs == 0) {
loss->push_back(Loss(*weights));
- //LOG(INFO) << epoch << ": " << loss->at(loss->size() - 1);
+ // LOG(INFO) << epoch << ": " << loss->at(loss->size() - 1);
}
// Set the 'check_convergence' flag.
@@ -33,29 +33,33 @@
}
// Check the convergence flag.
- if (converged_) break;
+ if (converged_) {
+ break;
+ }
}
loss->push_back(Loss(*weights));
- //LOG(INFO) << "final loss: " << loss->at(loss->size() - 1);
+ // LOG(INFO) << "final loss: " << loss->at(loss->size() - 1);
+ num_epochs_run_ = std::min(epoch + 1, max_epochs);
return converged_;
}
-bool LossMinimizer::Run(
- int max_epochs, int loss_epochs, int convergence_epochs,
- const InstanceSet &validation_instances,
- const LabelSet &validation_labels, Weights *weights,
- std::vector<float> *training_loss, std::vector<float> *validation_loss) {
+bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
+ const InstanceSet &validation_instances,
+ const LabelSet &validation_labels, Weights *weights,
+ std::vector<float> *training_loss,
+ std::vector<float> *validation_loss) {
// Run for up to 'max_epochs' epochs.
- for (int epoch = 0; epoch < max_epochs; ++epoch) {
+ int epoch;
+ for (epoch = 0; epoch < max_epochs; ++epoch) {
// Compute the loss.
if (epoch % loss_epochs == 0) {
training_loss->push_back(Loss(*weights));
- validation_loss->push_back(
- gradient_evaluator_.Loss(*weights, validation_instances,
- validation_labels));
- //LOG(INFO) << epoch << ": " << training_loss->at(training_loss->size() - 1)
- //<< " " << validation_loss->at(validation_loss->size() - 1);
+ validation_loss->push_back(gradient_evaluator_.Loss(
+ *weights, validation_instances, validation_labels));
+ // LOG(INFO) << epoch << ": " << training_loss->at(training_loss->size() -
+ // 1)
+ //<< " " << validation_loss->at(validation_loss->size() - 1);
}
// Set the 'check_convergence' flag.
@@ -70,18 +74,20 @@
}
// Check the convergence flag.
- if (converged_) break;
+ if (converged_) {
+ break;
+ }
}
// Compute final loss.
training_loss->push_back(Loss(*weights));
- validation_loss->push_back(
- gradient_evaluator_.Loss(*weights, validation_instances,
- validation_labels));
- //LOG(INFO) << "final loss: "
- //<< training_loss->at(training_loss->size() - 1);
- //LOG(INFO) << "final validation loss: "
- //<< validation_loss->at(validation_loss->size() - 1);
+ validation_loss->push_back(gradient_evaluator_.Loss(
+ *weights, validation_instances, validation_labels));
+ // LOG(INFO) << "final loss: "
+ //<< training_loss->at(training_loss->size() - 1);
+ // LOG(INFO) << "final validation loss: "
+ //<< validation_loss->at(validation_loss->size() - 1);
+ num_epochs_run_ = std::min(epoch + 1, max_epochs);
return converged_;
}
@@ -120,12 +126,12 @@
}
// Return the learning rate corresponding to the smallest loss.
- //LOG(INFO) << "best initial learning rate: " << min_loss_rate;
+ // LOG(INFO) << "best initial learning rate: " << min_loss_rate;
return min_loss_rate;
}
-void LossMinimizer::ConvergenceCheck(
- const Weights &weights, const Weights &gradient) {
+void LossMinimizer::ConvergenceCheck(const Weights &weights,
+ const Weights &gradient) {
Weights gradient_for_convergence =
(weights.array() == 0.0f).select(abs(gradient.array()) - l1_, gradient);
if (gradient_for_convergence.norm() / weights.size() <
diff --git a/lossmin/minimizers/loss-minimizer.h b/lossmin/minimizers/loss-minimizer.h
index 066aba9..48df05a 100644
--- a/lossmin/minimizers/loss-minimizer.h
+++ b/lossmin/minimizers/loss-minimizer.h
@@ -45,8 +45,8 @@
#include <vector>
#include "lossmin/eigen-types.h"
-#include "lossmin/minimizers/gradient-evaluator.h"
#include "lossmin/losses/loss-function.h"
+#include "lossmin/minimizers/gradient-evaluator.h"
#include "third_party/eigen/Eigen/Core"
namespace lossmin {
@@ -59,9 +59,10 @@
public:
// Constructor sets the l1 and l2 regularization parameters and
// 'gradient_evalutor_'.
- LossMinimizer(
- float l1, float l2, const GradientEvaluator &gradient_evaluator)
- : l1_(l1), l2_(l2), gradient_evaluator_(gradient_evaluator),
+ LossMinimizer(float l1, float l2, const GradientEvaluator &gradient_evaluator)
+ : l1_(l1),
+ l2_(l2),
+ gradient_evaluator_(gradient_evaluator),
converged_(false) {
std::random_device rd;
rnd_.seed(rd());
@@ -83,11 +84,11 @@
Weights *weights, std::vector<float> *loss);
// Runs minimization, evaluating loss on both training and validation data.
- bool Run(
- int max_epochs, int loss_epochs, int convergence_epochs,
- const InstanceSet &validation_instances,
- const LabelSet &validation_labels, Weights *weights,
- std::vector<float> *training_loss, std::vector<float> *validation_loss);
+ bool Run(int max_epochs, int loss_epochs, int convergence_epochs,
+ const InstanceSet &validation_instances,
+ const LabelSet &validation_labels, Weights *weights,
+ std::vector<float> *training_loss,
+ std::vector<float> *validation_loss);
// Convenience Run method that evaluates the loss and checks for convergence
// at every iteration.
@@ -141,7 +142,7 @@
// stochastic methods.
virtual void set_initial_learning_rate(float initial_learning_rate) {
initial_learning_rate_ = initial_learning_rate;
- //LOG(INFO) << "initial_learning_rate set to " << initial_learning_rate_;
+ // LOG(INFO) << "initial_learning_rate set to " << initial_learning_rate_;
}
float initial_learning_rate() const { return initial_learning_rate_; }
void set_initial_rates(const std::vector<float> &initial_rates) {
@@ -169,9 +170,9 @@
// Shuffles and returns the examples in 'batch_examples_[batch]'.
const std::vector<int> &batch_examples(int batch) {
- //DCHECK(batch < batch_examples_.size());
- std::shuffle(batch_examples_[batch].begin(),
- batch_examples_[batch].end(), rnd_);
+ // DCHECK(batch < batch_examples_.size());
+ std::shuffle(batch_examples_[batch].begin(), batch_examples_[batch].end(),
+ rnd_);
return batch_examples_[batch];
}
@@ -195,9 +196,7 @@
// Getter/setter for 'num_iterations_per_stage_', used in
// StochasticVarianceReducedGradient.
- int num_iterations_per_stage() const {
- return num_iterations_per_stage_;
- }
+ int num_iterations_per_stage() const { return num_iterations_per_stage_; }
void set_num_iterations_per_stage(int num_iterations_per_stage) {
num_iterations_per_stage_ = num_iterations_per_stage;
}
@@ -209,6 +208,9 @@
project_weights_ = project_weights;
}
+ // Returns the number of iterations the last time Run() was executed
+ int num_epochs_run() const { return num_epochs_run_; }
+
// Applies L1Prox coefficientwise to 'weights' and 'threshold'.
static void L1Prox(float threshold, Weights *weights) {
for (int i = 0; i < weights->size(); ++i) {
@@ -248,10 +250,21 @@
const GradientEvaluator &gradient_evaluator_;
// Convergence parameters.
+ // Convergence threshold should be strict but not too strict.
+ // This will depend on precision used. As float gives 1e-8 relative accuracy,
+ // 1e-6 or 1e-7 is probably strictest one should use (but this also depends
+ // on the implementation of convergence checks).
+ // This can also be updated during initialization of the minimizer so the
+ // default value should be less strict (e.g. 1e-5).
bool converged_ = false; // convergence flag set by convergence checks
float convergence_threshold_ = 1e-5; // threshold for assessing convergence
bool use_simple_convergence_check_ = false; // which convergence check to use
- int num_convergence_epochs_ = 5; // used in SimpleConvergenceCheck
+ int num_convergence_epochs_ = 5; // used in SimpleConvergenceCheck
+
+ // The number of epochs (iterations) when Run() was executed.
+ // In other words, each epoch is a step towards minimum during minimization.
+ // This variable gets updated when Run() is called
+ int num_epochs_run_ = 0;
// Initial learning rate, used in stochastic methods.
float initial_learning_rate_ = 0.01;
@@ -285,4 +298,3 @@
};
} // namespace lossmin
-