| // Copyright ©2014 The Gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package optimize |
| |
| import ( |
| "math" |
| |
| "gonum.org/v1/gonum/mat" |
| ) |
| |
| var ( |
| _ Method = (*BFGS)(nil) |
| _ localMethod = (*BFGS)(nil) |
| _ NextDirectioner = (*BFGS)(nil) |
| ) |
| |
| // BFGS implements the Broyden–Fletcher–Goldfarb–Shanno optimization method. It |
| // is a quasi-Newton method that performs successive rank-one updates to an |
| // estimate of the inverse Hessian of the objective function. It exhibits |
| // super-linear convergence when in proximity to a local minimum. It has memory |
| // cost that is O(n^2) relative to the input dimension. |
| type BFGS struct { |
| // Linesearcher selects suitable steps along the descent direction. |
| // Accepted steps should satisfy the strong Wolfe conditions. |
| // If Linesearcher == nil, an appropriate default is chosen. |
| Linesearcher Linesearcher |
| // GradStopThreshold sets the threshold for stopping if the gradient norm |
| // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and |
| // if it is NaN the setting is not used. |
| GradStopThreshold float64 |
| |
| ls *LinesearchMethod |
| |
| status Status |
| err error |
| |
| dim int |
| x mat.VecDense // Location of the last major iteration. |
| grad mat.VecDense // Gradient at the last major iteration. |
| s mat.VecDense // Difference between locations in this and the previous iteration. |
| y mat.VecDense // Difference between gradients in this and the previous iteration. |
| tmp mat.VecDense |
| |
| invHess *mat.SymDense |
| |
| first bool // Indicator of the first iteration. |
| } |
| |
| func (b *BFGS) Status() (Status, error) { |
| return b.status, b.err |
| } |
| |
| func (*BFGS) Uses(has Available) (uses Available, err error) { |
| return has.gradient() |
| } |
| |
| func (b *BFGS) Init(dim, tasks int) int { |
| b.status = NotTerminated |
| b.err = nil |
| return 1 |
| } |
| |
| func (b *BFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) { |
| b.status, b.err = localOptimizer{}.run(b, b.GradStopThreshold, operation, result, tasks) |
| close(operation) |
| } |
| |
| func (b *BFGS) initLocal(loc *Location) (Operation, error) { |
| if b.Linesearcher == nil { |
| b.Linesearcher = &Bisection{} |
| } |
| if b.ls == nil { |
| b.ls = &LinesearchMethod{} |
| } |
| b.ls.Linesearcher = b.Linesearcher |
| b.ls.NextDirectioner = b |
| |
| return b.ls.Init(loc) |
| } |
| |
| func (b *BFGS) iterateLocal(loc *Location) (Operation, error) { |
| return b.ls.Iterate(loc) |
| } |
| |
| func (b *BFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) { |
| dim := len(loc.X) |
| b.dim = dim |
| b.first = true |
| |
| x := mat.NewVecDense(dim, loc.X) |
| grad := mat.NewVecDense(dim, loc.Gradient) |
| b.x.CloneFromVec(x) |
| b.grad.CloneFromVec(grad) |
| |
| b.y.Reset() |
| b.s.Reset() |
| b.tmp.Reset() |
| |
| if b.invHess == nil || cap(b.invHess.RawSymmetric().Data) < dim*dim { |
| b.invHess = mat.NewSymDense(dim, nil) |
| } else { |
| b.invHess = mat.NewSymDense(dim, b.invHess.RawSymmetric().Data[:dim*dim]) |
| } |
| // The values of the inverse Hessian are initialized in the first call to |
| // NextDirection. |
| |
| // Initial direction is just negative of the gradient because the Hessian |
| // is an identity matrix. |
| d := mat.NewVecDense(dim, dir) |
| d.ScaleVec(-1, grad) |
| return 1 / mat.Norm(d, 2) |
| } |
| |
| func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { |
| dim := b.dim |
| if len(loc.X) != dim { |
| panic("bfgs: unexpected size mismatch") |
| } |
| if len(loc.Gradient) != dim { |
| panic("bfgs: unexpected size mismatch") |
| } |
| if len(dir) != dim { |
| panic("bfgs: unexpected size mismatch") |
| } |
| |
| x := mat.NewVecDense(dim, loc.X) |
| grad := mat.NewVecDense(dim, loc.Gradient) |
| |
| // s = x_{k+1} - x_{k} |
| b.s.SubVec(x, &b.x) |
| // y = g_{k+1} - g_{k} |
| b.y.SubVec(grad, &b.grad) |
| |
| sDotY := mat.Dot(&b.s, &b.y) |
| |
| if b.first { |
| // Rescale the initial Hessian. |
| // From: Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). |
| // Springer (2006), page 143, eq. 6.20. |
| yDotY := mat.Dot(&b.y, &b.y) |
| scale := sDotY / yDotY |
| for i := 0; i < dim; i++ { |
| for j := i; j < dim; j++ { |
| if i == j { |
| b.invHess.SetSym(i, i, scale) |
| } else { |
| b.invHess.SetSym(i, j, 0) |
| } |
| } |
| } |
| b.first = false |
| } |
| |
| if math.Abs(sDotY) != 0 { |
| // Update the inverse Hessian according to the formula |
| // |
| // B_{k+1}^-1 = B_k^-1 |
| // + (s_kᵀ y_k + y_kᵀ B_k^-1 y_k) / (s_kᵀ y_k)^2 * (s_k s_kᵀ) |
| // - (B_k^-1 y_k s_kᵀ + s_k y_kᵀ B_k^-1) / (s_kᵀ y_k). |
| // |
| // Note that y_kᵀ B_k^-1 y_k is a scalar, and that the third term is a |
| // rank-two update where B_k^-1 y_k is one vector and s_k is the other. |
| yBy := mat.Inner(&b.y, b.invHess, &b.y) |
| b.tmp.MulVec(b.invHess, &b.y) |
| scale := (1 + yBy/sDotY) / sDotY |
| b.invHess.SymRankOne(b.invHess, scale, &b.s) |
| b.invHess.RankTwo(b.invHess, -1/sDotY, &b.tmp, &b.s) |
| } |
| |
| // Update the stored BFGS data. |
| b.x.CopyVec(x) |
| b.grad.CopyVec(grad) |
| |
| // New direction is stored in dir. |
| d := mat.NewVecDense(dim, dir) |
| d.MulVec(b.invHess, grad) |
| d.ScaleVec(-1, d) |
| |
| return 1 |
| } |
| |
| func (*BFGS) needs() struct { |
| Gradient bool |
| Hessian bool |
| } { |
| return struct { |
| Gradient bool |
| Hessian bool |
| }{true, false} |
| } |