Added counting of epochs run in loss minimizer

Change-Id: I664b37b64c6f2a10a97bd3e8569229971616adb2
diff --git a/lossmin/minimizers/loss-minimizer.cc b/lossmin/minimizers/loss-minimizer.cc
index d6610f8..427c4ac 100644
--- a/lossmin/minimizers/loss-minimizer.cc
+++ b/lossmin/minimizers/loss-minimizer.cc
@@ -5,20 +5,20 @@
 
 #include "lossmin/minimizers/loss-minimizer.h"
 
-#include <random>
 #include <algorithm>
+#include <random>
 
 namespace lossmin {
 
-bool LossMinimizer::Run(
-    int max_epochs, int loss_epochs, int convergence_epochs,
-    Weights *weights, std::vector<float> *loss) {
+bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
+                        Weights *weights, std::vector<float> *loss) {
   // Run for up to 'max_epochs' epochs.
-  for (int epoch = 0; epoch < max_epochs; ++epoch) {
+  int epoch;
+  for (epoch = 0; epoch < max_epochs; ++epoch) {
     // Compute the loss.
     if (epoch % loss_epochs == 0) {
       loss->push_back(Loss(*weights));
-      //LOG(INFO) << epoch << ": " << loss->at(loss->size() - 1);
+      // LOG(INFO) << epoch << ": " << loss->at(loss->size() - 1);
     }
 
     // Set the 'check_convergence' flag.
@@ -33,29 +33,33 @@
     }
 
     // Check the convergence flag.
-    if (converged_) break;
+    if (converged_) {
+      break;
+    }
   }
 
   loss->push_back(Loss(*weights));
-  //LOG(INFO) << "final loss: " << loss->at(loss->size() - 1);
+  // LOG(INFO) << "final loss: " << loss->at(loss->size() - 1);
+  num_epochs_run_ = std::min(epoch + 1, max_epochs);
   return converged_;
 }
 
-bool LossMinimizer::Run(
-    int max_epochs, int loss_epochs, int convergence_epochs,
-    const InstanceSet &validation_instances,
-    const LabelSet &validation_labels, Weights *weights,
-    std::vector<float> *training_loss, std::vector<float> *validation_loss) {
+bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
+                        const InstanceSet &validation_instances,
+                        const LabelSet &validation_labels, Weights *weights,
+                        std::vector<float> *training_loss,
+                        std::vector<float> *validation_loss) {
   // Run for up to 'max_epochs' epochs.
-  for (int epoch = 0; epoch < max_epochs; ++epoch) {
+  int epoch;
+  for (epoch = 0; epoch < max_epochs; ++epoch) {
     // Compute the loss.
     if (epoch % loss_epochs == 0) {
       training_loss->push_back(Loss(*weights));
-      validation_loss->push_back(
-          gradient_evaluator_.Loss(*weights, validation_instances,
-                                    validation_labels));
-      //LOG(INFO) << epoch << ": " << training_loss->at(training_loss->size() - 1)
-                //<< "  " << validation_loss->at(validation_loss->size() - 1);
+      validation_loss->push_back(gradient_evaluator_.Loss(
+          *weights, validation_instances, validation_labels));
+      // LOG(INFO) << epoch << ": " << training_loss->at(training_loss->size() -
+      // 1)
+      //<< "  " << validation_loss->at(validation_loss->size() - 1);
     }
 
     // Set the 'check_convergence' flag.
@@ -70,18 +74,20 @@
     }
 
     // Check the convergence flag.
-    if (converged_) break;
+    if (converged_) {
+      break;
+    }
   }
 
   // Compute final loss.
   training_loss->push_back(Loss(*weights));
-  validation_loss->push_back(
-      gradient_evaluator_.Loss(*weights, validation_instances,
-                               validation_labels));
-  //LOG(INFO) << "final loss: "
-            //<< training_loss->at(training_loss->size() - 1);
-  //LOG(INFO) << "final validation loss: "
-            //<< validation_loss->at(validation_loss->size() - 1);
+  validation_loss->push_back(gradient_evaluator_.Loss(
+      *weights, validation_instances, validation_labels));
+  // LOG(INFO) << "final loss: "
+  //<< training_loss->at(training_loss->size() - 1);
+  // LOG(INFO) << "final validation loss: "
+  //<< validation_loss->at(validation_loss->size() - 1);
+  num_epochs_run_ = std::min(epoch + 1, max_epochs);
   return converged_;
 }
 
@@ -120,12 +126,12 @@
   }
 
   // Return the learning rate corresponding to the smallest loss.
-  //LOG(INFO) << "best initial learning rate: " << min_loss_rate;
+  // LOG(INFO) << "best initial learning rate: " << min_loss_rate;
   return min_loss_rate;
 }
 
-void LossMinimizer::ConvergenceCheck(
-    const Weights &weights, const Weights &gradient) {
+void LossMinimizer::ConvergenceCheck(const Weights &weights,
+                                     const Weights &gradient) {
   Weights gradient_for_convergence =
       (weights.array() == 0.0f).select(abs(gradient.array()) - l1_, gradient);
   if (gradient_for_convergence.norm() / weights.size() <
diff --git a/lossmin/minimizers/loss-minimizer.h b/lossmin/minimizers/loss-minimizer.h
index 066aba9..48df05a 100644
--- a/lossmin/minimizers/loss-minimizer.h
+++ b/lossmin/minimizers/loss-minimizer.h
@@ -45,8 +45,8 @@
 #include <vector>
 
 #include "lossmin/eigen-types.h"
-#include "lossmin/minimizers/gradient-evaluator.h"
 #include "lossmin/losses/loss-function.h"
+#include "lossmin/minimizers/gradient-evaluator.h"
 #include "third_party/eigen/Eigen/Core"
 
 namespace lossmin {
@@ -59,9 +59,10 @@
  public:
   // Constructor sets the l1 and l2 regularization parameters and
   // 'gradient_evalutor_'.
-  LossMinimizer(
-      float l1, float l2, const GradientEvaluator &gradient_evaluator)
-      : l1_(l1), l2_(l2), gradient_evaluator_(gradient_evaluator),
+  LossMinimizer(float l1, float l2, const GradientEvaluator &gradient_evaluator)
+      : l1_(l1),
+        l2_(l2),
+        gradient_evaluator_(gradient_evaluator),
         converged_(false) {
     std::random_device rd;
     rnd_.seed(rd());
@@ -83,11 +84,11 @@
            Weights *weights, std::vector<float> *loss);
 
   // Runs minimization, evaluating loss on both training and validation data.
-  bool Run(
-      int max_epochs, int loss_epochs, int convergence_epochs,
-      const InstanceSet &validation_instances,
-      const LabelSet &validation_labels, Weights *weights,
-      std::vector<float> *training_loss, std::vector<float> *validation_loss);
+  bool Run(int max_epochs, int loss_epochs, int convergence_epochs,
+           const InstanceSet &validation_instances,
+           const LabelSet &validation_labels, Weights *weights,
+           std::vector<float> *training_loss,
+           std::vector<float> *validation_loss);
 
   // Convenience Run method that evaluates the loss and checks for convergence
   // at every iteration.
@@ -141,7 +142,7 @@
   // stochastic methods.
   virtual void set_initial_learning_rate(float initial_learning_rate) {
     initial_learning_rate_ = initial_learning_rate;
-    //LOG(INFO) << "initial_learning_rate set to " << initial_learning_rate_;
+    // LOG(INFO) << "initial_learning_rate set to " << initial_learning_rate_;
   }
   float initial_learning_rate() const { return initial_learning_rate_; }
   void set_initial_rates(const std::vector<float> &initial_rates) {
@@ -169,9 +170,9 @@
 
   // Shuffles and returns the examples in 'batch_examples_[batch]'.
   const std::vector<int> &batch_examples(int batch) {
-    //DCHECK(batch < batch_examples_.size());
-    std::shuffle(batch_examples_[batch].begin(),
-        batch_examples_[batch].end(), rnd_);
+    // DCHECK(batch < batch_examples_.size());
+    std::shuffle(batch_examples_[batch].begin(), batch_examples_[batch].end(),
+                 rnd_);
     return batch_examples_[batch];
   }
 
@@ -195,9 +196,7 @@
 
   // Getter/setter for 'num_iterations_per_stage_', used in
   // StochasticVarianceReducedGradient.
-  int num_iterations_per_stage() const {
-    return num_iterations_per_stage_;
-  }
+  int num_iterations_per_stage() const { return num_iterations_per_stage_; }
   void set_num_iterations_per_stage(int num_iterations_per_stage) {
     num_iterations_per_stage_ = num_iterations_per_stage;
   }
@@ -209,6 +208,9 @@
     project_weights_ = project_weights;
   }
 
+  // Returns the number of iterations the last time Run() was executed
+  int num_epochs_run() const { return num_epochs_run_; }
+
   // Applies L1Prox coefficientwise to 'weights' and 'threshold'.
   static void L1Prox(float threshold, Weights *weights) {
     for (int i = 0; i < weights->size(); ++i) {
@@ -248,10 +250,21 @@
   const GradientEvaluator &gradient_evaluator_;
 
   // Convergence parameters.
+  // Convergence threshold should be strict but not too strict.
+  // This will depend on precision used. As float gives 1e-8 relative accuracy,
+  // 1e-6 or 1e-7 is probably strictest one should use (but this also depends
+  // on the implementation of convergence checks).
+  // This can also be updated during initialization of the minimizer so the
+  // default value should be less strict (e.g. 1e-5).
   bool converged_ = false;  // convergence flag set by convergence checks
   float convergence_threshold_ = 1e-5;  // threshold for assessing convergence
   bool use_simple_convergence_check_ = false;  // which convergence check to use
-  int num_convergence_epochs_ = 5;  // used in SimpleConvergenceCheck
+  int num_convergence_epochs_ = 5;             // used in SimpleConvergenceCheck
+
+  // The number of epochs (iterations) when Run() was executed.
+  // In other words, each epoch is a step towards minimum during minimization.
+  // This variable gets updated when Run() is called
+  int num_epochs_run_ = 0;
 
   // Initial learning rate, used in stochastic methods.
   float initial_learning_rate_ = 0.01;
@@ -285,4 +298,3 @@
 };
 
 }  // namespace lossmin
-