blob: b44ef81ee0b10a464c52767e4cf19b434723b395 [file] [log] [blame] [edit]
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"gonum.org/v1/gonum/mat"
)
var (
_ Method = (*BFGS)(nil)
_ localMethod = (*BFGS)(nil)
_ NextDirectioner = (*BFGS)(nil)
)
// BFGS implements the Broyden–Fletcher–Goldfarb–Shanno optimization method. It
// is a quasi-Newton method that performs successive rank-one updates to an
// estimate of the inverse Hessian of the objective function. It exhibits
// super-linear convergence when in proximity to a local minimum. It has memory
// cost that is O(n^2) relative to the input dimension.
type BFGS struct {
// Linesearcher selects suitable steps along the descent direction.
// Accepted steps should satisfy the strong Wolfe conditions.
// If Linesearcher == nil, an appropriate default is chosen.
Linesearcher Linesearcher
// GradStopThreshold sets the threshold for stopping if the gradient norm
// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
// if it is NaN the setting is not used.
GradStopThreshold float64
ls *LinesearchMethod
status Status
err error
dim int
x mat.VecDense // Location of the last major iteration.
grad mat.VecDense // Gradient at the last major iteration.
s mat.VecDense // Difference between locations in this and the previous iteration.
y mat.VecDense // Difference between gradients in this and the previous iteration.
tmp mat.VecDense
invHess *mat.SymDense
first bool // Indicator of the first iteration.
}
func (b *BFGS) Status() (Status, error) {
return b.status, b.err
}
func (*BFGS) Uses(has Available) (uses Available, err error) {
return has.gradient()
}
func (b *BFGS) Init(dim, tasks int) int {
b.status = NotTerminated
b.err = nil
return 1
}
func (b *BFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
b.status, b.err = localOptimizer{}.run(b, b.GradStopThreshold, operation, result, tasks)
close(operation)
}
func (b *BFGS) initLocal(loc *Location) (Operation, error) {
if b.Linesearcher == nil {
b.Linesearcher = &Bisection{}
}
if b.ls == nil {
b.ls = &LinesearchMethod{}
}
b.ls.Linesearcher = b.Linesearcher
b.ls.NextDirectioner = b
return b.ls.Init(loc)
}
func (b *BFGS) iterateLocal(loc *Location) (Operation, error) {
return b.ls.Iterate(loc)
}
func (b *BFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
dim := len(loc.X)
b.dim = dim
b.first = true
x := mat.NewVecDense(dim, loc.X)
grad := mat.NewVecDense(dim, loc.Gradient)
b.x.CloneFromVec(x)
b.grad.CloneFromVec(grad)
b.y.Reset()
b.s.Reset()
b.tmp.Reset()
if b.invHess == nil || cap(b.invHess.RawSymmetric().Data) < dim*dim {
b.invHess = mat.NewSymDense(dim, nil)
} else {
b.invHess = mat.NewSymDense(dim, b.invHess.RawSymmetric().Data[:dim*dim])
}
// The values of the inverse Hessian are initialized in the first call to
// NextDirection.
// Initial direction is just negative of the gradient because the Hessian
// is an identity matrix.
d := mat.NewVecDense(dim, dir)
d.ScaleVec(-1, grad)
return 1 / mat.Norm(d, 2)
}
func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
dim := b.dim
if len(loc.X) != dim {
panic("bfgs: unexpected size mismatch")
}
if len(loc.Gradient) != dim {
panic("bfgs: unexpected size mismatch")
}
if len(dir) != dim {
panic("bfgs: unexpected size mismatch")
}
x := mat.NewVecDense(dim, loc.X)
grad := mat.NewVecDense(dim, loc.Gradient)
// s = x_{k+1} - x_{k}
b.s.SubVec(x, &b.x)
// y = g_{k+1} - g_{k}
b.y.SubVec(grad, &b.grad)
sDotY := mat.Dot(&b.s, &b.y)
if b.first {
// Rescale the initial Hessian.
// From: Nocedal, J., Wright, S.: Numerical Optimization (2nd ed).
// Springer (2006), page 143, eq. 6.20.
yDotY := mat.Dot(&b.y, &b.y)
scale := sDotY / yDotY
for i := 0; i < dim; i++ {
for j := i; j < dim; j++ {
if i == j {
b.invHess.SetSym(i, i, scale)
} else {
b.invHess.SetSym(i, j, 0)
}
}
}
b.first = false
}
if math.Abs(sDotY) != 0 {
// Update the inverse Hessian according to the formula
//
// B_{k+1}^-1 = B_k^-1
// + (s_kᵀ y_k + y_kᵀ B_k^-1 y_k) / (s_kᵀ y_k)^2 * (s_k s_kᵀ)
// - (B_k^-1 y_k s_kᵀ + s_k y_kᵀ B_k^-1) / (s_kᵀ y_k).
//
// Note that y_kᵀ B_k^-1 y_k is a scalar, and that the third term is a
// rank-two update where B_k^-1 y_k is one vector and s_k is the other.
yBy := mat.Inner(&b.y, b.invHess, &b.y)
b.tmp.MulVec(b.invHess, &b.y)
scale := (1 + yBy/sDotY) / sDotY
b.invHess.SymRankOne(b.invHess, scale, &b.s)
b.invHess.RankTwo(b.invHess, -1/sDotY, &b.tmp, &b.s)
}
// Update the stored BFGS data.
b.x.CopyVec(x)
b.grad.CopyVec(grad)
// New direction is stored in dir.
d := mat.NewVecDense(dim, dir)
d.MulVec(b.invHess, grad)
d.ScaleVec(-1, d)
return 1
}
func (*BFGS) needs() struct {
Gradient bool
Hessian bool
} {
return struct {
Gradient bool
Hessian bool
}{true, false}
}