| // Copyright ©2014 The Gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package optimize |
| |
| import ( |
| "math" |
| |
| "gonum.org/v1/gonum/floats" |
| "gonum.org/v1/gonum/floats/scalar" |
| ) |
| |
| const ( |
| initialStepFactor = 1 |
| |
| quadraticMinimumStepSize = 1e-3 |
| quadraticMaximumStepSize = 1 |
| quadraticThreshold = 1e-12 |
| |
| firstOrderMinimumStepSize = quadraticMinimumStepSize |
| firstOrderMaximumStepSize = quadraticMaximumStepSize |
| ) |
| |
| var ( |
| _ StepSizer = ConstantStepSize{} |
| _ StepSizer = (*QuadraticStepSize)(nil) |
| _ StepSizer = (*FirstOrderStepSize)(nil) |
| ) |
| |
| // ConstantStepSize is a StepSizer that returns the same step size for |
| // every iteration. |
| type ConstantStepSize struct { |
| Size float64 |
| } |
| |
| func (c ConstantStepSize) Init(_ *Location, _ []float64) float64 { |
| return c.Size |
| } |
| |
| func (c ConstantStepSize) StepSize(_ *Location, _ []float64) float64 { |
| return c.Size |
| } |
| |
| // QuadraticStepSize estimates the initial line search step size as the minimum |
| // of a quadratic that interpolates f(x_{k-1}), f(x_k) and ∇f_k⋅p_k. |
| // This is useful for line search methods that do not produce well-scaled |
| // descent directions, such as gradient descent or conjugate gradient methods. |
| // The step size is bounded away from zero. |
| type QuadraticStepSize struct { |
| // Threshold determines that the initial step size should be estimated by |
| // quadratic interpolation when the relative change in the objective |
| // function is larger than Threshold. Otherwise the initial step size is |
| // set to 2*previous step size. |
| // If Threshold is zero, it will be set to 1e-12. |
| Threshold float64 |
| // InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞. |
| // If InitialStepFactor is zero, it will be set to one. |
| InitialStepFactor float64 |
| // MinStepSize is the lower bound on the estimated step size. |
| // MinStepSize times GradientAbsTol should always be greater than machine epsilon. |
| // If MinStepSize is zero, it will be set to 1e-3. |
| MinStepSize float64 |
| // MaxStepSize is the upper bound on the estimated step size. |
| // If MaxStepSize is zero, it will be set to 1. |
| MaxStepSize float64 |
| |
| fPrev float64 |
| dirPrevNorm float64 |
| projGradPrev float64 |
| xPrev []float64 |
| } |
| |
| func (q *QuadraticStepSize) Init(loc *Location, dir []float64) (stepSize float64) { |
| if q.Threshold == 0 { |
| q.Threshold = quadraticThreshold |
| } |
| if q.InitialStepFactor == 0 { |
| q.InitialStepFactor = initialStepFactor |
| } |
| if q.MinStepSize == 0 { |
| q.MinStepSize = quadraticMinimumStepSize |
| } |
| if q.MaxStepSize == 0 { |
| q.MaxStepSize = quadraticMaximumStepSize |
| } |
| if q.MaxStepSize <= q.MinStepSize { |
| panic("optimize: MinStepSize not smaller than MaxStepSize") |
| } |
| |
| gNorm := floats.Norm(loc.Gradient, math.Inf(1)) |
| stepSize = math.Max(q.MinStepSize, math.Min(q.InitialStepFactor/gNorm, q.MaxStepSize)) |
| |
| q.fPrev = loc.F |
| q.dirPrevNorm = floats.Norm(dir, 2) |
| q.projGradPrev = floats.Dot(loc.Gradient, dir) |
| q.xPrev = resize(q.xPrev, len(loc.X)) |
| copy(q.xPrev, loc.X) |
| return stepSize |
| } |
| |
| func (q *QuadraticStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) { |
| stepSizePrev := floats.Distance(loc.X, q.xPrev, 2) / q.dirPrevNorm |
| projGrad := floats.Dot(loc.Gradient, dir) |
| |
| stepSize = 2 * stepSizePrev |
| if !scalar.EqualWithinRel(q.fPrev, loc.F, q.Threshold) { |
| // Two consecutive function values are not relatively equal, so |
| // computing the minimum of a quadratic interpolant might make sense |
| |
| df := (loc.F - q.fPrev) / stepSizePrev |
| quadTest := df - q.projGradPrev |
| if quadTest > 0 { |
| // There is a chance of approximating the function well by a |
| // quadratic only if the finite difference (f_k-f_{k-1})/stepSizePrev |
| // is larger than ∇f_{k-1}⋅p_{k-1} |
| |
| // Set the step size to the minimizer of the quadratic function that |
| // interpolates f_{k-1}, ∇f_{k-1}⋅p_{k-1} and f_k |
| stepSize = -q.projGradPrev * stepSizePrev / quadTest / 2 |
| } |
| } |
| // Bound the step size to lie in [MinStepSize, MaxStepSize] |
| stepSize = math.Max(q.MinStepSize, math.Min(stepSize, q.MaxStepSize)) |
| |
| q.fPrev = loc.F |
| q.dirPrevNorm = floats.Norm(dir, 2) |
| q.projGradPrev = projGrad |
| copy(q.xPrev, loc.X) |
| return stepSize |
| } |
| |
| // FirstOrderStepSize estimates the initial line search step size based on the |
| // assumption that the first-order change in the function will be the same as |
| // that obtained at the previous iteration. That is, the initial step size s^0_k |
| // is chosen so that |
| // |
| // s^0_k ∇f_k⋅p_k = s_{k-1} ∇f_{k-1}⋅p_{k-1} |
| // |
| // This is useful for line search methods that do not produce well-scaled |
| // descent directions, such as gradient descent or conjugate gradient methods. |
| type FirstOrderStepSize struct { |
| // InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞. |
| // If InitialStepFactor is zero, it will be set to one. |
| InitialStepFactor float64 |
| // MinStepSize is the lower bound on the estimated step size. |
| // MinStepSize times GradientAbsTol should always be greater than machine epsilon. |
| // If MinStepSize is zero, it will be set to 1e-3. |
| MinStepSize float64 |
| // MaxStepSize is the upper bound on the estimated step size. |
| // If MaxStepSize is zero, it will be set to 1. |
| MaxStepSize float64 |
| |
| dirPrevNorm float64 |
| projGradPrev float64 |
| xPrev []float64 |
| } |
| |
| func (fo *FirstOrderStepSize) Init(loc *Location, dir []float64) (stepSize float64) { |
| if fo.InitialStepFactor == 0 { |
| fo.InitialStepFactor = initialStepFactor |
| } |
| if fo.MinStepSize == 0 { |
| fo.MinStepSize = firstOrderMinimumStepSize |
| } |
| if fo.MaxStepSize == 0 { |
| fo.MaxStepSize = firstOrderMaximumStepSize |
| } |
| if fo.MaxStepSize <= fo.MinStepSize { |
| panic("optimize: MinStepSize not smaller than MaxStepSize") |
| } |
| |
| gNorm := floats.Norm(loc.Gradient, math.Inf(1)) |
| stepSize = math.Max(fo.MinStepSize, math.Min(fo.InitialStepFactor/gNorm, fo.MaxStepSize)) |
| |
| fo.dirPrevNorm = floats.Norm(dir, 2) |
| fo.projGradPrev = floats.Dot(loc.Gradient, dir) |
| fo.xPrev = resize(fo.xPrev, len(loc.X)) |
| copy(fo.xPrev, loc.X) |
| return stepSize |
| } |
| |
| func (fo *FirstOrderStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) { |
| stepSizePrev := floats.Distance(loc.X, fo.xPrev, 2) / fo.dirPrevNorm |
| projGrad := floats.Dot(loc.Gradient, dir) |
| |
| stepSize = stepSizePrev * fo.projGradPrev / projGrad |
| stepSize = math.Max(fo.MinStepSize, math.Min(stepSize, fo.MaxStepSize)) |
| |
| fo.dirPrevNorm = floats.Norm(dir, 2) |
| fo.projGradPrev = floats.Dot(loc.Gradient, dir) |
| copy(fo.xPrev, loc.X) |
| return stepSize |
| } |