Improving convergence checking in the minimizer.

Splitting into two checks:
1. That the solver reached the solution.
2. That the solver converged (either reached the solution or stopped).

This is because 2. might happen before 1. which may lead into numerical problems.

Change-Id: Ide3ad6ee0fb97b96694d40ae9c59a86ffff78d8b
diff --git a/lossmin/minimizers/loss-minimizer.cc b/lossmin/minimizers/loss-minimizer.cc
index 9e8d6d6..fef8e6b 100644
--- a/lossmin/minimizers/loss-minimizer.cc
+++ b/lossmin/minimizers/loss-minimizer.cc
@@ -27,8 +27,12 @@
     // Run for one epoch to update the parameters.
     EpochUpdate(weights, epoch, check_convergence);
 
-    // Optionally do a simple convergence check.
-    if (check_convergence && use_simple_convergence_check_) {
+    // We should also periodically check if the algorithm has not stopped;
+    // numerical problems can be encountered if
+    // the convergence is checked only by CheckConvergence():
+    // if the algorithm "stops" for any reason before solving the problem
+    // up to the given accuracy.
+    if (check_convergence) {
       SimpleConvergenceCheck(*loss);
     }
 
@@ -147,10 +151,11 @@
       if (loss[i - 1] > 0) {
         loss_difference = std::max(loss_difference, 1 - loss[i] / loss[i - 1]);
       } else {
+        set_reached_solution(true);
         set_converged(true);
       }
     }
-    if (loss_difference < convergence_threshold_) set_converged(true);
+    if (loss_difference < simple_convergence_threshold_) set_converged(true);
   }
 }
 
diff --git a/lossmin/minimizers/loss-minimizer.h b/lossmin/minimizers/loss-minimizer.h
index 01003ad..0f05d10 100644
--- a/lossmin/minimizers/loss-minimizer.h
+++ b/lossmin/minimizers/loss-minimizer.h
@@ -122,6 +122,10 @@
   // Setters and getters for convergence criteria parameters.
   bool converged() const { return converged_; }
   void set_converged(bool converged) { converged_ = converged; }
+  bool reached_solution() const { return reached_solution_; }
+  void set_reached_solution(bool reached_solution) {
+    reached_solution_ = reached_solution;
+  }
   void set_use_simple_convergence_check(bool use_simple_convergence_check) {
     use_simple_convergence_check_ = use_simple_convergence_check;
   }
@@ -129,6 +133,12 @@
   void set_convergence_threshold(float convergence_threshold) {
     convergence_threshold_ = convergence_threshold;
   }
+  float simple_convergence_threshold() const {
+    return simple_convergence_threshold_;
+  }
+  void set_simple_convergence_threshold(float simple_convergence_threshold) {
+    simple_convergence_threshold_ = simple_convergence_threshold;
+  }
   void set_num_convergence_epochs(int num_convergence_epochs) {
     num_convergence_epochs_ = num_convergence_epochs;
   }
@@ -263,7 +273,13 @@
   // This can also be updated during initialization of the minimizer so the
   // default value should be less strict (e.g. 1e-5).
   bool converged_ = false;  // convergence flag set by convergence checks
-  float convergence_threshold_ = 1e-5;  // threshold for assessing convergence
+  bool reached_solution_ =
+      false;  // flag indicating whether the algorithm
+              // actually reached the solution as determined by ConvergenceCheck
+  float convergence_threshold_ =
+      1e-5;  // threshold for assessing convergence by ConvergenceCheck
+  float simple_convergence_threshold_ =
+      1e-5;  // threshold for assesing convergence by SimpleConvergenceCheck
   bool use_simple_convergence_check_ = false;  // which convergence check to use
   int num_convergence_epochs_ = 5;             // used in SimpleConvergenceCheck
 
diff --git a/lossmin/minimizers/parallel-boosting-with-momentum.cc b/lossmin/minimizers/parallel-boosting-with-momentum.cc
index f197594..127842c 100644
--- a/lossmin/minimizers/parallel-boosting-with-momentum.cc
+++ b/lossmin/minimizers/parallel-boosting-with-momentum.cc
@@ -44,7 +44,8 @@
   // Eigen library recommends computations step-by-step for best perfomance
   Weights residual = gradient_evaluator().instances() * weights;
   residual -= gradient_evaluator().labels();
-  float loss = 0.5 * residual.squaredNorm();
+  float loss =
+      0.5 * residual.squaredNorm() / gradient_evaluator().NumExamples();
   if (l2() > 0.0f) loss += 0.5 * l2() * weights.squaredNorm();
   if (l1() > 0.0f) loss += l1() * weights.cwiseAbs().sum();
   return loss;
@@ -70,6 +71,7 @@
   }
 
   if (std::sqrt(error_squared) / weights.size() < convergence_threshold()) {
+    set_reached_solution(true);
     set_converged(true);
   }
 }