blob: 487c158cc27270611f9e3b5390cad89473cdceb9 [file] [log] [blame]
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"gonum.org/v1/gonum/floats"
)
const (
initialStepFactor = 1
quadraticMinimumStepSize = 1e-3
quadraticMaximumStepSize = 1
quadraticThreshold = 1e-12
firstOrderMinimumStepSize = quadraticMinimumStepSize
firstOrderMaximumStepSize = quadraticMaximumStepSize
)
// ConstantStepSize is a StepSizer that returns the same step size for
// every iteration.
type ConstantStepSize struct {
Size float64
}
func (c ConstantStepSize) Init(_ *Location, _ []float64) float64 {
return c.Size
}
func (c ConstantStepSize) StepSize(_ *Location, _ []float64) float64 {
return c.Size
}
// QuadraticStepSize estimates the initial line search step size as the minimum
// of a quadratic that interpolates f(x_{k-1}), f(x_k) and ∇f_k⋅p_k.
// This is useful for line search methods that do not produce well-scaled
// descent directions, such as gradient descent or conjugate gradient methods.
// The step size is bounded away from zero.
type QuadraticStepSize struct {
// Threshold determines that the initial step size should be estimated by
// quadratic interpolation when the relative change in the objective
// function is larger than Threshold. Otherwise the initial step size is
// set to 2*previous step size.
// If Threshold is zero, it will be set to 1e-12.
Threshold float64
// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
// If InitialStepFactor is zero, it will be set to one.
InitialStepFactor float64
// MinStepSize is the lower bound on the estimated step size.
// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
// If MinStepSize is zero, it will be set to 1e-3.
MinStepSize float64
// MaxStepSize is the upper bound on the estimated step size.
// If MaxStepSize is zero, it will be set to 1.
MaxStepSize float64
fPrev float64
dirPrevNorm float64
projGradPrev float64
xPrev []float64
}
func (q *QuadraticStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
if q.Threshold == 0 {
q.Threshold = quadraticThreshold
}
if q.InitialStepFactor == 0 {
q.InitialStepFactor = initialStepFactor
}
if q.MinStepSize == 0 {
q.MinStepSize = quadraticMinimumStepSize
}
if q.MaxStepSize == 0 {
q.MaxStepSize = quadraticMaximumStepSize
}
if q.MaxStepSize <= q.MinStepSize {
panic("optimize: MinStepSize not smaller than MaxStepSize")
}
gNorm := floats.Norm(loc.Gradient, math.Inf(1))
stepSize = math.Max(q.MinStepSize, math.Min(q.InitialStepFactor/gNorm, q.MaxStepSize))
q.fPrev = loc.F
q.dirPrevNorm = floats.Norm(dir, 2)
q.projGradPrev = floats.Dot(loc.Gradient, dir)
q.xPrev = resize(q.xPrev, len(loc.X))
copy(q.xPrev, loc.X)
return stepSize
}
func (q *QuadraticStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
stepSizePrev := floats.Distance(loc.X, q.xPrev, 2) / q.dirPrevNorm
projGrad := floats.Dot(loc.Gradient, dir)
stepSize = 2 * stepSizePrev
if !floats.EqualWithinRel(q.fPrev, loc.F, q.Threshold) {
// Two consecutive function values are not relatively equal, so
// computing the minimum of a quadratic interpolant might make sense
df := (loc.F - q.fPrev) / stepSizePrev
quadTest := df - q.projGradPrev
if quadTest > 0 {
// There is a chance of approximating the function well by a
// quadratic only if the finite difference (f_k-f_{k-1})/stepSizePrev
// is larger than ∇f_{k-1}⋅p_{k-1}
// Set the step size to the minimizer of the quadratic function that
// interpolates f_{k-1}, ∇f_{k-1}⋅p_{k-1} and f_k
stepSize = -q.projGradPrev * stepSizePrev / quadTest / 2
}
}
// Bound the step size to lie in [MinStepSize, MaxStepSize]
stepSize = math.Max(q.MinStepSize, math.Min(stepSize, q.MaxStepSize))
q.fPrev = loc.F
q.dirPrevNorm = floats.Norm(dir, 2)
q.projGradPrev = projGrad
copy(q.xPrev, loc.X)
return stepSize
}
// FirstOrderStepSize estimates the initial line search step size based on the
// assumption that the first-order change in the function will be the same as
// that obtained at the previous iteration. That is, the initial step size s^0_k
// is chosen so that
// s^0_k ∇f_k⋅p_k = s_{k-1} ∇f_{k-1}⋅p_{k-1}
// This is useful for line search methods that do not produce well-scaled
// descent directions, such as gradient descent or conjugate gradient methods.
type FirstOrderStepSize struct {
// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
// If InitialStepFactor is zero, it will be set to one.
InitialStepFactor float64
// MinStepSize is the lower bound on the estimated step size.
// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
// If MinStepSize is zero, it will be set to 1e-3.
MinStepSize float64
// MaxStepSize is the upper bound on the estimated step size.
// If MaxStepSize is zero, it will be set to 1.
MaxStepSize float64
dirPrevNorm float64
projGradPrev float64
xPrev []float64
}
func (fo *FirstOrderStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
if fo.InitialStepFactor == 0 {
fo.InitialStepFactor = initialStepFactor
}
if fo.MinStepSize == 0 {
fo.MinStepSize = firstOrderMinimumStepSize
}
if fo.MaxStepSize == 0 {
fo.MaxStepSize = firstOrderMaximumStepSize
}
if fo.MaxStepSize <= fo.MinStepSize {
panic("optimize: MinStepSize not smaller than MaxStepSize")
}
gNorm := floats.Norm(loc.Gradient, math.Inf(1))
stepSize = math.Max(fo.MinStepSize, math.Min(fo.InitialStepFactor/gNorm, fo.MaxStepSize))
fo.dirPrevNorm = floats.Norm(dir, 2)
fo.projGradPrev = floats.Dot(loc.Gradient, dir)
fo.xPrev = resize(fo.xPrev, len(loc.X))
copy(fo.xPrev, loc.X)
return stepSize
}
func (fo *FirstOrderStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
stepSizePrev := floats.Distance(loc.X, fo.xPrev, 2) / fo.dirPrevNorm
projGrad := floats.Dot(loc.Gradient, dir)
stepSize = stepSizePrev * fo.projGradPrev / projGrad
stepSize = math.Max(fo.MinStepSize, math.Min(stepSize, fo.MaxStepSize))
fo.dirPrevNorm = floats.Norm(dir, 2)
fo.projGradPrev = floats.Dot(loc.Gradient, dir)
copy(fo.xPrev, loc.X)
return stepSize
}