Removing most of the unused code from the linear algebra library (lossmin).
This involves mostly code that was specific to solvers that we do not use.
Change-Id: Id61ebc04f77209cbadb549e4b1cb9779c33543cc
diff --git a/lossmin/losses/inner-product-loss-function.h b/lossmin/losses/inner-product-loss-function.h
index a5c8d22..d00cef2 100644
--- a/lossmin/losses/inner-product-loss-function.h
+++ b/lossmin/losses/inner-product-loss-function.h
@@ -86,43 +86,6 @@
float curvature_;
};
-// Binary logistic regression with labels in {-1, 1}.
-class LogisticRegressionLossFunction : public InnerProductLossFunction {
- public:
- LogisticRegressionLossFunction() { set_curvature(0.25); }
-
- // Returns the logistic loss.
- float InnerProductExampleLoss(float inner_product, float label)
- const override {
- return log1pf(exp(-label * inner_product));
- }
-
- // Returns the gradient of the logistic loss wrt 'inner_product'.
- float InnerProductExampleGradient(float inner_product, float label)
- const override {
- if (label * inner_product < kInputMin) return -label;
- return -label / (1.0f + exp(label * inner_product));
- }
-
- // Assigns a label in {-1, 1} given 'inner_product'.
- float InnerProductPredictLabel(float inner_product) const override {
- if (inner_product > 0) return 1.0f;
- return -1.0f;
- }
-
- // Returns the magnitude of the second derivative of the loss, evaluated at
- // 'inner_product' and 'label'.
- float InnerProductCurvature(float inner_product, float label)
- const override {
- if (-label * inner_product < kInputMin) return 0.0f;
- float label_prob = 1.0f / (1.0f + exp(-label * inner_product));
- return label_prob * (1.0f - label_prob);
- }
-
- // Min allowed exp input.
- const float kInputMin = log(FLT_EPSILON);
-};
-
// Linear regression with squared error loss.
class LinearRegressionLossFunction : public InnerProductLossFunction {
public:
@@ -146,131 +109,4 @@
}
};
-// Poisson regression, where the distribution mean is the inner product between
-// an instance and parameters.
-class PoissonRegressionLossFunction : public InnerProductLossFunction {
- public:
- PoissonRegressionLossFunction() {}
-
- // Returns the Poisson regression loss.
- float InnerProductExampleLoss(float inner_product, float label)
- const override {
- return exp(inner_product) - label * inner_product;
- }
-
- // Returns the gradient of the Poisson regression loss wrt 'inner_product'.
- float InnerProductExampleGradient(float inner_product, float label)
- const override {
- return exp(inner_product) - label;
- }
-
- // Returns the mode of a Poisson distribution with mean 'inner_product'.
- float InnerProductPredictLabel(float inner_product) const override {
- return floor(inner_product);
- }
-
- // Returns the magnitude of the second derivative of the loss, evaluated at
- // 'inner_product' and 'label'.
- float InnerProductCurvature(float inner_product, float label)
- const override {
- return exp(inner_product);
- }
-};
-
-// Binary logistic regression where 'label' = P(true_label = 1) is in [0, 1],
-// and the loss is averaged over labels:
-// label * logloss(inner_product, 1) + (1 - label) * logloss(inner_product, 0).
-class AveragedLogisticLossFunction : public InnerProductLossFunction {
- public:
- AveragedLogisticLossFunction() { set_curvature(0.25); }
-
- // Returns the averaged logistic loss.
- float InnerProductExampleLoss(float inner_product, float label)
- const override {
- return (1.0 - label) * log1pf(exp(inner_product))
- + label * log1pf(exp(-inner_product));
- }
-
- // Returns the gradient of the averaged logistic loss wrt 'inner_product'.
- float InnerProductExampleGradient(float inner_product, float label)
- const override {
- if (-inner_product < kInputMin) return -label + 1.0f;
- return -label + 1.0f / (1.0f + exp(-inner_product));
- }
-
- // Returns the probability that the label is 1 given the inner product.
- float InnerProductPredictLabel(float inner_product) const override {
- if (-inner_product < kInputMin) return 1.0f;
- return 1.0f / (1.0f + exp(-inner_product));
- }
-
- // Returns the magnitude of the second derivative of the loss, evaluated at
- // 'inner_product' and 'label'.
- float InnerProductCurvature(float inner_product, float label)
- const override {
- float label_prob = 1.0f / (1.0f + exp(-inner_product));
- if (-inner_product < kInputMin) return 0.0f;
- return label_prob * (1.0f - label_prob);
- }
-
- // Min allowed exp input.
- const float kInputMin = log(FLT_EPSILON);
-};
-
-
-// Smooth hinge loss. If a = inner_product * label, the loss is
-// loss = 0.0 a > threshold
-// 0.5 * (a - threshold)^2 0 < a <= threshold
-// 0.5 * threshold^2 - threshold * a a <= 0
-class SmoothHingeLossFunction : public InnerProductLossFunction {
- public:
- SmoothHingeLossFunction() { set_curvature(1.0f); }
-
- /*
- void Setup(const LossFunctionConfigContainer &config) override {
- if (config.has_smooth_hinge_threshold()) {
- threshold_ = config.smooth_hinge_threshold();
- }
- set_curvature(threshold_);
- }
- */
-
- float InnerProductExampleLoss(float inner_product, float label)
- const override {
- float hinge_input = inner_product *= label;
- if (hinge_input >= threshold_) {
- return 0.0;
- } else if (hinge_input <= 0.0f) {
- return (0.5 * threshold_ * threshold_ - threshold_ * hinge_input);
- } else {
- return 0.5 * (hinge_input - threshold_) * (hinge_input - threshold_);
- }
- }
-
- float InnerProductExampleGradient(float inner_product, float label)
- const override {
- float hinge_input = inner_product * label;
- if (hinge_input >= threshold_) {
- return 0.0f;
- } else if (hinge_input <= 0.0f) {
- return -label * threshold_;
- } else {
- return (hinge_input - threshold_) * label;
- }
- }
-
- // Assigns a label in {-1, 1} given 'inner_product'.
- float InnerProductPredictLabel(float inner_product) const override {
- if (inner_product > 0) return 1.0f;
- return -1.0f;
- }
-
- void set_threshold(float threshold) { threshold_ = threshold; }
-
- private:
- float threshold_ = 1.0f;
-};
-
-
-} // namespace lossmin
-
+} // namespace lossmin
\ No newline at end of file
diff --git a/lossmin/minimizers/gradient-evaluator.cc b/lossmin/minimizers/gradient-evaluator.cc
index 270d334..32fafbc 100644
--- a/lossmin/minimizers/gradient-evaluator.cc
+++ b/lossmin/minimizers/gradient-evaluator.cc
@@ -40,45 +40,4 @@
*gradient /= NumExamples();
}
-void GradientEvaluator::set_num_threads(int num_threads,
- int batch_size) {
- num_threads_ = num_threads;
- if (num_threads_ == 1) return;
-
- num_batches_ = (NumExamples() + batch_size - 1) / batch_size;
-
- // Assign examples to each batch.
- batch_examples_.assign(num_batches_, {});
- for (auto &batch : batch_examples_) batch.reserve(batch_size);
- for (int i = 0; i < NumExamples(); ++i) {
- batch_examples_[i % num_batches_].push_back(i);
- }
-}
-
-/*
-void GradientEvaluator::ThreadLoss(
- const Weights *weights, int example, VectorXf *loss_values,
- BlockingCounter *num_jobs_remaining) const {
- // Compute example loss.
- loss_values->coeffRef(example) =
- loss_function_->ExampleLoss(*weights, instances_, labels_, example);
-
- // Decrement thread counter.
- num_jobs_remaining->DecrementCount();
-}
-
-void GradientEvaluator::ThreadGradient(
- const Weights *weights, int batch, Weights *gradient,
- BlockingCounter *num_jobs_remaining) const {
- // Add the example gradient.
- for (int example : batch_examples_[batch]) {
- loss_function_->AddExampleGradient(
- *weights, instances_, labels_, example, 1.0f, 1.0f, gradient);
- }
-
- // Decrement thread counter.
- num_jobs_remaining->DecrementCount();
-}
-*/
-
} // namespace lossmin
diff --git a/lossmin/minimizers/gradient-evaluator.h b/lossmin/minimizers/gradient-evaluator.h
index e63b842..d756759 100644
--- a/lossmin/minimizers/gradient-evaluator.h
+++ b/lossmin/minimizers/gradient-evaluator.h
@@ -77,16 +77,6 @@
example_gradient);
}
- // Sets the number of threads and batch size for multithreaded computation.
- // Assigns examples to 'batch_examples_'.
- void set_num_threads(int num_threads, int batch_size);
-
- // Convenience method that sets the number of batches to a default.
- void set_num_threads(int num_threads) {
- int batch_size = static_cast<int>(NumExamples() / num_threads / 2);
- set_num_threads(num_threads, batch_size);
- }
-
// Returns the number of examples in the dataset.
virtual int NumExamples() const { return instances_.rows(); }
@@ -98,12 +88,6 @@
return loss_function_->NumWeights(NumFeatures());
}
- // Returns the number of batches for multithreaded computation.
- int num_batches() const { return num_batches_; }
-
- // Returns the number of threads.
- int num_threads() const { return num_threads_; }
-
// Returns an upper bound on the curvature of the loss function. Used to set
// the learning rate of some LossMinimizer algorithms.
virtual float LossCurvature() const {
@@ -137,25 +121,6 @@
// Returns the labels.
const LabelSet &labels() const { return labels_; }
- // Returns a list of examples in batch 'batch'.
- const std::vector<int> &batch_examples(int batch) const {
- return batch_examples_[batch];
- }
-
- protected:
- /*
- // Computes the loss for a single example 'example' and returns it in
- // 'loss_values[example_index]'.
- virtual void ThreadLoss(
- const Weights *weights, int example, VectorXf *loss_values,
- BlockingCounter *num_jobs_remaining) const;
-
- // Computes the gradient for examples in batch 'batch', adds it to 'gradient'.
- virtual void ThreadGradient(
- const Weights *weights, int batch, Weights *gradient,
- BlockingCounter *num_jobs_remaining) const;
- */
-
private:
// Training instances.
const InstanceSet &instances_;
@@ -166,19 +131,6 @@
// Function for computing the loss and gradient of a single training example.
// Not owned.
const LossFunction *loss_function_;
-
- // Mutex for multithreaded gradient computation.
- mutable std::mutex gradient_update_mutex_;
-
- // Number of threads used to parallelize gradient computation.
- // If num_threads_ = 1, multithreading is not used.
- int num_threads_ = 1;
-
- // List of examples assigned to each thread.
- std::vector<std::vector<int>> batch_examples_;
-
- // Number of batches.
- int num_batches_;
};
} // namespace lossmin
diff --git a/lossmin/minimizers/loss-minimizer.cc b/lossmin/minimizers/loss-minimizer.cc
index fef8e6b..45ea3d6 100644
--- a/lossmin/minimizers/loss-minimizer.cc
+++ b/lossmin/minimizers/loss-minimizer.cc
@@ -48,103 +48,18 @@
return converged_;
}
-bool LossMinimizer::Run(int max_epochs, int loss_epochs, int convergence_epochs,
- const InstanceSet &validation_instances,
- const LabelSet &validation_labels, Weights *weights,
- std::vector<float> *training_loss,
- std::vector<float> *validation_loss) {
- // Run for up to 'max_epochs' epochs.
- int epoch;
- for (epoch = 0; epoch < max_epochs; ++epoch) {
- // Compute the loss.
- if (epoch % loss_epochs == 0) {
- training_loss->push_back(Loss(*weights));
- validation_loss->push_back(gradient_evaluator_.Loss(
- *weights, validation_instances, validation_labels));
- // LOG(INFO) << epoch << ": " << training_loss->at(training_loss->size() -
- // 1)
- //<< " " << validation_loss->at(validation_loss->size() - 1);
- }
-
- // Set the 'check_convergence' flag.
- bool check_convergence = (epoch > 0) && (epoch % convergence_epochs == 0);
-
- // Run for one epoch to update the parameters.
- EpochUpdate(weights, epoch, check_convergence);
-
- // Optionally do a simple convergence check.
- if (check_convergence && use_simple_convergence_check_) {
- SimpleConvergenceCheck(*training_loss);
- }
-
- // Check the convergence flag.
- if (converged_) {
- break;
- }
- }
-
- // Compute final loss.
- training_loss->push_back(Loss(*weights));
- validation_loss->push_back(gradient_evaluator_.Loss(
- *weights, validation_instances, validation_labels));
- // LOG(INFO) << "final loss: "
- //<< training_loss->at(training_loss->size() - 1);
- // LOG(INFO) << "final validation loss: "
- //<< validation_loss->at(validation_loss->size() - 1);
- num_epochs_run_ = std::min(epoch + 1, max_epochs);
- return converged_;
-}
-
-float LossMinimizer::BestLearningRate() {
- // Sample a subset of the data.
- std::vector<int> examples(num_search_examples_);
- std::mt19937 rnd;
- std::uniform_int_distribution<> dis(0, gradient_evaluator_.NumExamples());
- for (int i = 0; i < num_search_examples_; ++i) {
- examples[i] = dis(rnd);
- }
-
- float min_loss_rate = initial_rates_[0];
- float min_loss = HUGE_VAL;
- for (int i = 0; i < initial_rates_.size(); ++i) {
- // Initialize parameters to zero.
- Weights weights = Weights::Zero(gradient_evaluator_.NumWeights());
-
- // Make updates with the current learning rate.
- Weights gradient(weights.size());
- for (int example : examples) {
- gradient = l2() * weights;
- gradient_evaluator_.AddExampleGradient(weights, example, 1.0f, 1.0f,
- &gradient);
- weights -= initial_rates_[i] * gradient;
- }
-
- // Compute the corresponding loss.
- float loss = gradient_evaluator_.Loss(weights, examples);
- if (i == 0) {
- min_loss = loss;
- } else if (loss < min_loss) {
- min_loss = loss;
- min_loss_rate = initial_rates_[i];
- }
- }
-
- // Return the learning rate corresponding to the smallest loss.
- // LOG(INFO) << "best initial learning rate: " << min_loss_rate;
- return min_loss_rate;
-}
-
// By default only check if gradient is == 0
void LossMinimizer::ConvergenceCheck(const Weights &weights,
const Weights &gradient) {
if (gradient.squaredNorm() / weights.size() < convergence_threshold_) {
+ set_reached_solution(true);
set_converged(true);
}
}
void LossMinimizer::SimpleConvergenceCheck(const std::vector<float> &loss) {
// Check convergence by verifying that the max relative loss decrease
- // (loss[t-1] - loss[t]) / loss[t-1] is below 'convergence_threshold_'.
+ // (loss[t-1] - loss[t]) / loss[t-1] is below 'simple_convergence_threshold_'.
if (loss.size() > num_convergence_epochs_) {
float loss_difference = 0.0f;
for (int i = loss.size() - num_convergence_epochs_; i < loss.size(); ++i) {
@@ -159,16 +74,4 @@
}
}
-void LossMinimizer::set_batch_size(int batch_size) {
- int num_batches =
- (gradient_evaluator_.NumExamples() + batch_size - 1) / batch_size;
-
- // Assign examples to each batch.
- batch_examples_.assign(num_batches, {});
- for (auto &batch : batch_examples_) batch.reserve(batch_size);
- for (int i = 0; i < gradient_evaluator().NumExamples(); ++i) {
- batch_examples_[i % num_batches].push_back(i);
- }
-}
-
} // namespace lossmin
diff --git a/lossmin/minimizers/loss-minimizer.h b/lossmin/minimizers/loss-minimizer.h
index 0f05d10..eb87947 100644
--- a/lossmin/minimizers/loss-minimizer.h
+++ b/lossmin/minimizers/loss-minimizer.h
@@ -33,6 +33,7 @@
// the data and the loss function. If the dataset or the loss pointed to by
// 'gradient_evaluator' change, the user must call loss_minimizer.Setup() method
// before loss_minimizer.Run().
+// TODO(bazyli) Update description.
#pragma once
@@ -63,10 +64,7 @@
: l1_(l1),
l2_(l2),
gradient_evaluator_(gradient_evaluator),
- converged_(false) {
- std::random_device rd;
- rnd_.seed(rd());
- }
+ converged_(false) {}
virtual ~LossMinimizer() {}
// Initializes algorithm-specific parameters such as learning rates, based
@@ -83,13 +81,6 @@
bool Run(int max_epochs, int loss_epochs, int convergence_epochs,
Weights *weights, std::vector<float> *loss);
- // Runs minimization, evaluating loss on both training and validation data.
- bool Run(int max_epochs, int loss_epochs, int convergence_epochs,
- const InstanceSet &validation_instances,
- const LabelSet &validation_labels, Weights *weights,
- std::vector<float> *training_loss,
- std::vector<float> *validation_loss);
-
// Convenience Run method that evaluates the loss and checks for convergence
// at every iteration.
bool Run(int max_epochs, Weights *weights, std::vector<float> *loss) {
@@ -109,8 +100,9 @@
return loss;
}
- // Checks convergence of deterministic methods.
- // If converged, the flag 'converged_' is set to true.
+ // Checks convergence by checking the sufficient and necessary conditions for
+ // minimizer directly. If converged, the flags 'converged_' and
+ // 'reached_solution_' are set to true.
virtual void ConvergenceCheck(const Weights &weights,
const Weights &gradient);
@@ -147,49 +139,6 @@
zero_threshold_ = zero_threshold;
}
- // Returns the best initial learning rate for stochastic methods by evaluating
- // elements of 'initial_rates_' on a subset of 'num_search_examples_' training
- // examples.
- float BestLearningRate();
-
- // Methods that set parameters related to the initial learning rate in
- // stochastic methods.
- virtual void set_initial_learning_rate(float initial_learning_rate) {
- initial_learning_rate_ = initial_learning_rate;
- // LOG(INFO) << "initial_learning_rate set to " << initial_learning_rate_;
- }
- float initial_learning_rate() const { return initial_learning_rate_; }
- void set_initial_rates(const std::vector<float> &initial_rates) {
- initial_rates_ = initial_rates;
- }
- void set_num_search_examples(int num_search_examples) {
- num_search_examples_ = num_search_examples;
- }
-
- // Sets the seed of the random number generator.
- void SetSeed(int32_t seed) { rnd_.seed(seed); }
-
- // Number of threads for stochastic methods.
- void set_num_threads(int num_threads) { num_threads_ = num_threads; }
- int num_threads() const { return num_threads_; }
-
- // Minibatch size for stochastic methods.
- void set_batch_size(int batch_size);
- int NumBatches() const { return batch_examples_.size(); }
-
- // Shuffles the batches with examples.
- void ShuffleBatches() {
- std::shuffle(batch_examples_.begin(), batch_examples_.end(), rnd_);
- }
-
- // Shuffles and returns the examples in 'batch_examples_[batch]'.
- const std::vector<int> &batch_examples(int batch) {
- // DCHECK(batch < batch_examples_.size());
- std::shuffle(batch_examples_[batch].begin(), batch_examples_[batch].end(),
- rnd_);
- return batch_examples_[batch];
- }
-
// Returns a reference to 'gradient_evaluator_'.
const GradientEvaluator &gradient_evaluator() const {
return gradient_evaluator_;
@@ -203,27 +152,6 @@
float l2() const { return l2_; }
void set_l2(float l2) { l2_ = l2; }
- // Getter/setter for the 'num_scale_iterations_', used for sparse updates in
- // stochastic methods.
- int num_scale_iterations() const { return num_scale_iterations_; }
- void set_num_scale_iterations(int num_scale_iterations) {
- num_scale_iterations_ = num_scale_iterations;
- }
-
- // Getter/setter for 'num_iterations_per_stage_', used in
- // StochasticVarianceReducedGradient.
- int num_iterations_per_stage() const { return num_iterations_per_stage_; }
- void set_num_iterations_per_stage(int num_iterations_per_stage) {
- num_iterations_per_stage_ = num_iterations_per_stage;
- }
-
- // Getter/setter for projecting weights onto a ball in
- // StochasticGradientDescent.
- bool project_weights() const { return project_weights_; }
- void set_project_weights(bool project_weights) {
- project_weights_ = project_weights;
- }
-
// Returns the number of iterations the last time Run() was executed
int num_epochs_run() const { return num_epochs_run_; }
@@ -254,9 +182,6 @@
return 0.0f;
}
- // Default initial learning rate for stochastic methods.
- const float kDefaultInitialLearningRate = 0.05;
-
private:
// Regularization parameters.
float l1_;
@@ -291,36 +216,6 @@
// In other words, each epoch is a step towards minimum during minimization.
// This variable gets updated when Run() is called.
int num_epochs_run_ = 0;
-
- // Initial learning rate, used in stochastic methods.
- float initial_learning_rate_ = 0.01;
-
- // Parameters used to search for a good initial learning rate for stochastic
- // methods in the method BestLearningRate(). 'initial_rates_' are evaluated on
- // a data subset of size 'num_search_examples_'.
- std::vector<float> initial_rates_ = {0.001, 0.005, 0.01, 0.05};
- int num_search_examples_ = 500;
-
- // Multithreading parameters, optionally used in stochastic methods.
- int num_threads_;
- std::vector<std::vector<int>> batch_examples_;
-
- // Pseudo-random number generator for shuffling 'batch_examples_'.
- std::mt19937 rnd_;
-
- // In stochastic methods, weights = scale * Weights. For numerical stability,
- // this is reset to scale = 1.0 every 'num_scale_iterations_' iterations.
- int num_scale_iterations_ = 100;
-
- // Number of iterations per stage; used in StochasticVarianceReducedGradient.
- int num_iterations_per_stage_ = 100000;
-
- // For online/stochastic methods, a simple bound on the optimal solution is
- // |weights|^2 <= 2 / L2. Knowing this, if an intermediate solution is
- // outside of a ball of radius sqrt(2 / L2), we can project it onto the ball
- // to potentially speed up convergence. This approach is used in
- // StochasticGradientDescent, and controlled by the flag 'project_weights_'.
- bool project_weights_ = true;
};
} // namespace lossmin
diff --git a/lossmin/minimizers/parallel-boosting-with-momentum.cc b/lossmin/minimizers/parallel-boosting-with-momentum.cc
index 127842c..e3f25f6 100644
--- a/lossmin/minimizers/parallel-boosting-with-momentum.cc
+++ b/lossmin/minimizers/parallel-boosting-with-momentum.cc
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Author: nevena@google.com (Nevena Lazic)
+// TODO(bazyli) update license header
#include "lossmin/minimizers/parallel-boosting-with-momentum.h"
@@ -14,7 +15,6 @@
namespace lossmin {
void ParallelBoostingWithMomentum::Setup() {
- // Initialize the approximating function parameters.
compute_and_set_learning_rates();
alpha_ = 0.5f;
beta_ = 1.0f - alpha_;
@@ -34,6 +34,7 @@
void ParallelBoostingWithMomentum::SparseInnerProductGradient(
const Weights &weights, Weights *gradient) const {
// Eigen library recommends computations step-by-step for best perfomance
+ // TODO(bazyli) parallel matrix vector multiply with open mp
Weights residual = gradient_evaluator().instances() * weights;
residual -= gradient_evaluator().labels();
*gradient = instances_transpose_ * residual;
@@ -42,6 +43,7 @@
float ParallelBoostingWithMomentum::Loss(const Weights &weights) const {
// Eigen library recommends computations step-by-step for best perfomance
+ // TODO(bazyli) parallel matrix vector multiply with open mp
Weights residual = gradient_evaluator().instances() * weights;
residual -= gradient_evaluator().labels();
float loss =
@@ -81,10 +83,9 @@
// Compute the intermediate weight vector y.
Weights y = (1.0f - alpha_) * *weights + alpha_ * phi_center_;
- // Compute the gradient of the loss wrt y.
+ // Compute the gradient of the loss (except l1 penalty) wrt y.
Weights gradient_wrt_y = Weights::Zero(y.size());
SparseInnerProductGradient(y, &gradient_wrt_y);
-
if (l2() > 0.0f) gradient_wrt_y += l2() * y;
// Gradient step.
diff --git a/lossmin/minimizers/parallel-boosting-with-momentum.h b/lossmin/minimizers/parallel-boosting-with-momentum.h
index d8e5bb2..62e75e6 100644
--- a/lossmin/minimizers/parallel-boosting-with-momentum.h
+++ b/lossmin/minimizers/parallel-boosting-with-momentum.h
@@ -7,6 +7,7 @@
// I. Mukherjee, K. Canini, R. Frongillo, and Y. Singer, "Parallel Boosting with
// Momentum", ECML PKDD 2013.
// Variable names follow the notation in the paper.
+// TODO(bazyli) update license
#pragma once
@@ -27,7 +28,7 @@
Setup();
}
- // Sets learning rates and other parameters.
+ // Sets learning rates, initializes alpha_,beta_ and phi_center_.
void Setup() override;
// Checks convergence by verifying the KKT conditions directly.
@@ -42,14 +43,16 @@
// If weights_i == 0 then -l1() <= gradient_i <= l1().
//
// The squared violations at each coordinate are summed and the square
- // root divided by weights.size() is compared to convergence_thresold()
+ // root is divided by weights.size() is compared to convergence_thresold().
+ // If convergence is determined, sets the appropriate flags so that
+ // converged() == true and reached_solution() == true.
void ConvergenceCheck(const Weights &weights,
const Weights &gradient) override;
- // Returns the total loss for given parameters 'weights', including l1 and l2
+ // Returns the total loss for given parameters |weights|, including l1 and l2
// regularization. Uses sparse matrix multiply from Eigen.
// This is more efficient in terms of performed operations than calling
- // gradient_evaluator().Loss(weights) but is not intended for parallelization.
+ // gradient_evaluator().Loss(weights).
float Loss(const Weights &weights) const override;
// Computes the inner product gradient at |weights|, written to |gradient*|;
@@ -57,6 +60,8 @@
// multiply from Eigen. This is much more efficient in terms of performed
// operations than calling gradient_evaluator().gradient(weights) but is not
// intended for parallelization.
+ // TODO(bazyli) parallelize sparse matrix * dense vector multiply using open
+ // mp
void SparseInnerProductGradient(const Weights &weights,
Weights *gradient) const;
@@ -77,6 +82,7 @@
void set_beta(const float beta) { beta_ = beta; }
private:
+ // Run epoch (iteration) of the algorithm.
// Updates 'weights' and the quadratic approximation function phi(w), such
// that at iteration k, loss(weights_k) <= min_w phi_k(w).
// y = (1 - alpha) * weights + alpha * phi_center