| // Copyright ©2014 The Gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package optimize |
| |
| import ( |
| "gonum.org/v1/gonum/floats" |
| ) |
| |
| var ( |
| _ Method = (*LBFGS)(nil) |
| _ localMethod = (*LBFGS)(nil) |
| _ NextDirectioner = (*LBFGS)(nil) |
| ) |
| |
| // LBFGS implements the limited-memory BFGS method for gradient-based |
| // unconstrained minimization. |
| // |
| // It stores a modified version of the inverse Hessian approximation H |
| // implicitly from the last Store iterations while the normal BFGS method |
| // stores and manipulates H directly as a dense matrix. Therefore LBFGS is more |
| // appropriate than BFGS for large problems as the cost of LBFGS scales as |
| // O(Store * dim) while BFGS scales as O(dim^2). The "forgetful" nature of |
| // LBFGS may also make it perform better than BFGS for functions with Hessians |
| // that vary rapidly spatially. |
| type LBFGS struct { |
| // Linesearcher selects suitable steps along the descent direction. |
| // Accepted steps should satisfy the strong Wolfe conditions. |
| // If Linesearcher is nil, a reasonable default will be chosen. |
| Linesearcher Linesearcher |
| // Store is the size of the limited-memory storage. |
| // If Store is 0, it will be defaulted to 15. |
| Store int |
| // GradStopThreshold sets the threshold for stopping if the gradient norm |
| // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and |
| // if it is NaN the setting is not used. |
| GradStopThreshold float64 |
| |
| status Status |
| err error |
| |
| ls *LinesearchMethod |
| |
| dim int // Dimension of the problem |
| x []float64 // Location at the last major iteration |
| grad []float64 // Gradient at the last major iteration |
| |
| // History |
| oldest int // Index of the oldest element of the history |
| y [][]float64 // Last Store values of y |
| s [][]float64 // Last Store values of s |
| rho []float64 // Last Store values of rho |
| a []float64 // Cache of Hessian updates |
| } |
| |
| func (l *LBFGS) Status() (Status, error) { |
| return l.status, l.err |
| } |
| |
| func (*LBFGS) Uses(has Available) (uses Available, err error) { |
| return has.gradient() |
| } |
| |
| func (l *LBFGS) Init(dim, tasks int) int { |
| l.status = NotTerminated |
| l.err = nil |
| return 1 |
| } |
| |
| func (l *LBFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) { |
| l.status, l.err = localOptimizer{}.run(l, l.GradStopThreshold, operation, result, tasks) |
| close(operation) |
| } |
| |
| func (l *LBFGS) initLocal(loc *Location) (Operation, error) { |
| if l.Linesearcher == nil { |
| l.Linesearcher = &Bisection{} |
| } |
| if l.Store == 0 { |
| l.Store = 15 |
| } |
| |
| if l.ls == nil { |
| l.ls = &LinesearchMethod{} |
| } |
| l.ls.Linesearcher = l.Linesearcher |
| l.ls.NextDirectioner = l |
| |
| return l.ls.Init(loc) |
| } |
| |
| func (l *LBFGS) iterateLocal(loc *Location) (Operation, error) { |
| return l.ls.Iterate(loc) |
| } |
| |
| func (l *LBFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) { |
| dim := len(loc.X) |
| l.dim = dim |
| l.oldest = 0 |
| |
| l.a = resize(l.a, l.Store) |
| l.rho = resize(l.rho, l.Store) |
| l.y = l.initHistory(l.y) |
| l.s = l.initHistory(l.s) |
| |
| l.x = resize(l.x, dim) |
| copy(l.x, loc.X) |
| |
| l.grad = resize(l.grad, dim) |
| copy(l.grad, loc.Gradient) |
| |
| copy(dir, loc.Gradient) |
| floats.Scale(-1, dir) |
| return 1 / floats.Norm(dir, 2) |
| } |
| |
| func (l *LBFGS) initHistory(hist [][]float64) [][]float64 { |
| c := cap(hist) |
| if c < l.Store { |
| n := make([][]float64, l.Store-c) |
| hist = append(hist[:c], n...) |
| } |
| hist = hist[:l.Store] |
| for i := range hist { |
| hist[i] = resize(hist[i], l.dim) |
| for j := range hist[i] { |
| hist[i][j] = 0 |
| } |
| } |
| return hist |
| } |
| |
| func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { |
| // Uses two-loop correction as described in |
| // Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). Springer (2006), chapter 7, page 178. |
| |
| if len(loc.X) != l.dim { |
| panic("lbfgs: unexpected size mismatch") |
| } |
| if len(loc.Gradient) != l.dim { |
| panic("lbfgs: unexpected size mismatch") |
| } |
| if len(dir) != l.dim { |
| panic("lbfgs: unexpected size mismatch") |
| } |
| |
| y := l.y[l.oldest] |
| floats.SubTo(y, loc.Gradient, l.grad) |
| s := l.s[l.oldest] |
| floats.SubTo(s, loc.X, l.x) |
| sDotY := floats.Dot(s, y) |
| l.rho[l.oldest] = 1 / sDotY |
| |
| l.oldest = (l.oldest + 1) % l.Store |
| |
| copy(l.x, loc.X) |
| copy(l.grad, loc.Gradient) |
| copy(dir, loc.Gradient) |
| |
| // Start with the most recent element and go backward, |
| for i := 0; i < l.Store; i++ { |
| idx := l.oldest - i - 1 |
| if idx < 0 { |
| idx += l.Store |
| } |
| l.a[idx] = l.rho[idx] * floats.Dot(l.s[idx], dir) |
| floats.AddScaled(dir, -l.a[idx], l.y[idx]) |
| } |
| |
| // Scale the initial Hessian. |
| gamma := sDotY / floats.Dot(y, y) |
| floats.Scale(gamma, dir) |
| |
| // Start with the oldest element and go forward. |
| for i := 0; i < l.Store; i++ { |
| idx := i + l.oldest |
| if idx >= l.Store { |
| idx -= l.Store |
| } |
| beta := l.rho[idx] * floats.Dot(l.y[idx], dir) |
| floats.AddScaled(dir, l.a[idx]-beta, l.s[idx]) |
| } |
| |
| // dir contains H^{-1} * g, so flip the direction for minimization. |
| floats.Scale(-1, dir) |
| |
| return 1 |
| } |
| |
| func (*LBFGS) needs() struct { |
| Gradient bool |
| Hessian bool |
| } { |
| return struct { |
| Gradient bool |
| Hessian bool |
| }{true, false} |
| } |