| // Copyright ©2015 The Gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package optimize |
| |
| import ( |
| "math" |
| |
| "gonum.org/v1/gonum/mat" |
| ) |
| |
| const maxNewtonModifications = 20 |
| |
| var ( |
| _ Method = (*Newton)(nil) |
| _ localMethod = (*Newton)(nil) |
| _ NextDirectioner = (*Newton)(nil) |
| ) |
| |
| // Newton implements a modified Newton's method for Hessian-based unconstrained |
| // minimization. It applies regularization when the Hessian is not positive |
| // definite, and it can converge to a local minimum from any starting point. |
| // |
| // Newton iteratively forms a quadratic model to the objective function f and |
| // tries to minimize this approximate model. It generates a sequence of |
| // locations x_k by means of |
| // solve H_k d_k = -∇f_k for d_k, |
| // x_{k+1} = x_k + α_k d_k, |
| // where H_k is the Hessian matrix of f at x_k and α_k is a step size found by |
| // a line search. |
| // |
| // Away from a minimizer H_k may not be positive definite and d_k may not be a |
| // descent direction. Newton implements a Hessian modification strategy that |
| // adds successively larger multiples of identity to H_k until it becomes |
| // positive definite. Note that the repeated trial factorization of the |
| // modified Hessian involved in this process can be computationally expensive. |
| // |
| // If the Hessian matrix cannot be formed explicitly or if the computational |
| // cost of its factorization is prohibitive, BFGS or L-BFGS quasi-Newton method |
| // can be used instead. |
| type Newton struct { |
| // Linesearcher is used for selecting suitable steps along the descent |
| // direction d. Accepted steps should satisfy at least one of the Wolfe, |
| // Goldstein or Armijo conditions. |
| // If Linesearcher == nil, an appropriate default is chosen. |
| Linesearcher Linesearcher |
| // Increase is the factor by which a scalar tau is successively increased |
| // so that (H + tau*I) is positive definite. Larger values reduce the |
| // number of trial Hessian factorizations, but also reduce the second-order |
| // information in H. |
| // Increase must be greater than 1. If Increase is 0, it is defaulted to 5. |
| Increase float64 |
| // GradStopThreshold sets the threshold for stopping if the gradient norm |
| // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and |
| // if it is NaN the setting is not used. |
| GradStopThreshold float64 |
| |
| status Status |
| err error |
| |
| ls *LinesearchMethod |
| |
| hess *mat.SymDense // Storage for a copy of the Hessian matrix. |
| chol mat.Cholesky // Storage for the Cholesky factorization. |
| tau float64 |
| } |
| |
| func (n *Newton) Status() (Status, error) { |
| return n.status, n.err |
| } |
| |
| func (*Newton) Uses(has Available) (uses Available, err error) { |
| return has.hessian() |
| } |
| |
| func (n *Newton) Init(dim, tasks int) int { |
| n.status = NotTerminated |
| n.err = nil |
| return 1 |
| } |
| |
| func (n *Newton) Run(operation chan<- Task, result <-chan Task, tasks []Task) { |
| n.status, n.err = localOptimizer{}.run(n, n.GradStopThreshold, operation, result, tasks) |
| close(operation) |
| } |
| |
| func (n *Newton) initLocal(loc *Location) (Operation, error) { |
| if n.Increase == 0 { |
| n.Increase = 5 |
| } |
| if n.Increase <= 1 { |
| panic("optimize: Newton.Increase must be greater than 1") |
| } |
| if n.Linesearcher == nil { |
| n.Linesearcher = &Bisection{} |
| } |
| if n.ls == nil { |
| n.ls = &LinesearchMethod{} |
| } |
| n.ls.Linesearcher = n.Linesearcher |
| n.ls.NextDirectioner = n |
| return n.ls.Init(loc) |
| } |
| |
| func (n *Newton) iterateLocal(loc *Location) (Operation, error) { |
| return n.ls.Iterate(loc) |
| } |
| |
| func (n *Newton) InitDirection(loc *Location, dir []float64) (stepSize float64) { |
| dim := len(loc.X) |
| n.hess = resizeSymDense(n.hess, dim) |
| n.tau = 0 |
| return n.NextDirection(loc, dir) |
| } |
| |
| func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) { |
| // This method implements Algorithm 3.3 (Cholesky with Added Multiple of |
| // the Identity) from Nocedal, Wright (2006), 2nd edition. |
| |
| dim := len(loc.X) |
| d := mat.NewVecDense(dim, dir) |
| grad := mat.NewVecDense(dim, loc.Gradient) |
| n.hess.CopySym(loc.Hessian) |
| |
| // Find the smallest diagonal entry of the Hessian. |
| minA := n.hess.At(0, 0) |
| for i := 1; i < dim; i++ { |
| a := n.hess.At(i, i) |
| if a < minA { |
| minA = a |
| } |
| } |
| // If the smallest diagonal entry is positive, the Hessian may be positive |
| // definite, and so first attempt to apply the Cholesky factorization to |
| // the un-modified Hessian. If the smallest entry is negative, use the |
| // final tau from the last iteration if regularization was needed, |
| // otherwise guess an appropriate value for tau. |
| if minA > 0 { |
| n.tau = 0 |
| } else if n.tau == 0 { |
| n.tau = -minA + 0.001 |
| } |
| |
| for k := 0; k < maxNewtonModifications; k++ { |
| if n.tau != 0 { |
| // Add a multiple of identity to the Hessian. |
| for i := 0; i < dim; i++ { |
| n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau) |
| } |
| } |
| // Try to apply the Cholesky factorization. |
| pd := n.chol.Factorize(n.hess) |
| if pd { |
| // Store the solution in d's backing array, dir. |
| err := n.chol.SolveVecTo(d, grad) |
| if err == nil { |
| d.ScaleVec(-1, d) |
| return 1 |
| } |
| } |
| // Modified Hessian is not PD, so increase tau. |
| n.tau = math.Max(n.Increase*n.tau, 0.001) |
| } |
| |
| // Hessian modification failed to get a PD matrix. Return the negative |
| // gradient as the descent direction. |
| d.ScaleVec(-1, grad) |
| return 1 |
| } |
| |
| func (n *Newton) needs() struct { |
| Gradient bool |
| Hessian bool |
| } { |
| return struct { |
| Gradient bool |
| Hessian bool |
| }{true, true} |
| } |