123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375 |
- #| -*-Scheme-*-
- Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
- 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
- 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Massachusetts
- Institute of Technology
- This file is part of MIT/GNU Scheme.
- MIT/GNU Scheme is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or (at
- your option) any later version.
- MIT/GNU Scheme is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with MIT/GNU Scheme; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
- USA.
- |#
- ;;;; MULTIMIN.SCM -- n-dimensional minimization routines
- ;;; 9/22/89 (gjs) reduce->a-reduce
- (declare (usual-integrations = + - * /
- zero? 1+ -1+
- ;; truncate round floor ceiling
- sqrt exp log sin cos))
- ;;; Nelder-Mead downhill simplex algorithm.
- ;;; We have a function, f, defined on points in n-space.
- ;;; We are looking for a local minimum of f.
- ;;; The central idea -- We have a simplex of n+1 vertices where f is
- ;;; known. We want to deform the simplex until it sits on the minimum.
- ;;; A simplex is represented as a list of entries, each of which is a
- ;;; pair consisting of a vertex and the value of f at that vertex.
- (define simplex-size length)
- (define simplex-vertex car)
- (define simplex-value cdr)
- (define simplex-entry cons)
- ;;; Simplices are stored in sorted order
- (define simplex-highest car)
- (define simplex-but-highest cdr)
- (define simplex-next-highest cadr)
- (define (simplex-lowest s) (car (last-pair s)))
- (define (simplex-add-entry entry s)
- (let ((fv (simplex-value entry)))
- (let loop ((s s))
- (cond ((null? s) (list entry))
- ((> fv (simplex-value (car s))) (cons entry s))
- (else (cons (car s) (loop (cdr s))))))))
- (define (simplex-adjoin v fv s)
- (simplex-add-entry (simplex-entry v fv) s))
- (define (simplex-sort s)
- (let lp ((s s) (ans '()))
- (if (null? s)
- ans
- (lp (cdr s) (simplex-add-entry (car s) ans)))))
- (define simplex-centroid
- (lambda (simplex)
- (scalar*vector (/ 1 (simplex-size simplex))
- (a-reduce vector+vector
- (map simplex-vertex simplex)))))
- (define extender
- (lambda (p1 p2)
- (let ((dp (vector-vector p2 p1)))
- (lambda (k)
- (vector+vector p1 (scalar*vector k dp))))))
- (define (make-simplex point step f)
- (simplex-sort
- (map (lambda (vertex) (simplex-entry vertex (f vertex)))
- (cons point
- (let ((n (vector-length point)))
- (generate-list n
- (lambda (i)
- (vector+vector point
- (scalar*vector step
- (v:make-basis-unit n i))))))))))
- (define (stationary? simplex epsilon)
- (close-enuf? (simplex-value (simplex-highest simplex))
- (simplex-value (simplex-lowest simplex))
- epsilon))
- (define nelder-wallp? false)
- (define (nelder-mead f start-pt start-step epsilon maxiter)
- (define shrink-coef 0.5)
- (define reflection-coef 2.0)
- (define expansion-coef 3.0)
- (define contraction-coef-1 1.5)
- (define contraction-coef-2 (- 2 contraction-coef-1))
- (define (simplex-shrink point simplex)
- (let ((pv (simplex-vertex point)))
- (simplex-sort
- (map (lambda (sp)
- (if (eq? point sp)
- sp
- (let ((vertex ((extender pv (simplex-vertex sp))
- shrink-coef)))
- (simplex-entry vertex (f vertex)))))
- simplex))))
- (define (nm-step simplex)
- (let ((g (simplex-highest simplex))
- (h (simplex-next-highest simplex))
- (s (simplex-lowest simplex))
- (s-h (simplex-but-highest simplex)))
- (let* ((vg (simplex-vertex g)) (fg (simplex-value g))
- (fh (simplex-value h)) (fs (simplex-value s))
- (extend (extender vg (simplex-centroid s-h))))
- (let* ((vr (extend reflection-coef))
- (fr (f vr))) ;try reflection
- (if (< fr fh) ;reflection successful
- (if (< fr fs) ;new minimum
- (let* ((ve (extend expansion-coef))
- (fe (f ve))) ;try expansion
- (if (< fe fs) ;expansion successful
- (simplex-adjoin ve fe s-h)
- (simplex-adjoin vr fr s-h)))
- (simplex-adjoin vr fr s-h))
- (let* ((vc (extend (if (< fr fg)
- contraction-coef-1
- contraction-coef-2)))
- (fc (f vc))) ;try contraction
- (if (< fc fg) ;contraction successful
- (simplex-adjoin vc fc s-h)
- (simplex-shrink s simplex))))))))
- (define (limit simplex count)
- (if nelder-wallp? (write-line (simplex-lowest simplex)))
- (if (stationary? simplex epsilon)
- (list 'ok (simplex-lowest simplex) count)
- (if (fix:= count maxiter)
- (list 'maxcount (simplex-lowest simplex) count)
- (limit (nm-step simplex) (fix:+ count 1)))))
- (limit (make-simplex start-pt start-step f) 0))
- ;;;(define (stationary? simplex epsilon)
- ;;; (let ((np1 (length simplex)))
- ;;; (let* ((mean (/ (a-reduce + (map simplex-value simplex)) np1))
- ;;; (variance (/ (a-reduce +
- ;;; (map (lambda (e)
- ;;; (square (- (simplex-value e) mean)))
- ;;; simplex))
- ;;; np1)))
- ;;; (< variance epsilon))))
- ;;; Variable Metric Methods:
- ;;; Fletcher-Powell (choice of line search)
- ;;; Broyden-Fletcher-Goldfarb-Shanno (Davidon's line search)
- ;;; The following utility procedure returns a gradient function given a
- ;;; differentiable function f of a vector of length n. In general, a gradient
- ;;; function accepts an n-vector and returns a vector of derivatives. In this
- ;;; case, the derivatives are estimated by richardson-extrapolation of a
- ;;; central difference quotient, with the convergence tolerance being a
- ;;; specified parameter.
- (define (generate-gradient-procedure f n tol)
- (lambda (x)
- (generate-vector
- n
- (lambda (i)
- (richardson-limit
- (let ((fi (lambda (t)
- (f (vector+vector x
- (scalar*vector t
- (v:make-basis-unit n i)))))))
- (lambda (h) (/ (- (fi h) (fi (- h))) 2 h)))
- (max 0.1 (* 0.1 (abs (vector-ref x i)))) ;starting h
- 2 ;ord -- see doc for RE.SCM
- 2 ;inc
- tol)))))
- ;;; The following line-minimization procedure is Davidon's original
- ;;; recommendation. It does a bracketing search using gradients, and
- ;;; then interpolates the straddled minimum with a cubic.
- (define (line-min-davidon f g x v est)
- (define (t->x t) (vector+vector x (scalar*vector t v)))
- (define (linef t) (f (t->x t)))
- (define (lineg t) (g (t->x t)))
- (define f0 (linef 0))
- (define g0 (lineg 0))
- (define s0 (/ (- f0 est) -.5 (v:dot-product g0 v)))
- (let loop ((t (if (and (positive? s0) (< s0 1)) s0 1))
- (iter 0))
- (if (> iter 100)
- (list 'no-min)
- (let ((ft (linef t))
- (gt (lineg t)))
- (if (or (>= ft f0)
- (>= (v:dot-product v gt) 0))
- (let* ((vg0 (v:dot-product v g0))
- (vgt (v:dot-product v gt))
- (z (+ (* 3 (- f0 ft) (/ 1 t)) vg0 vgt))
- (w (sqrt (- (* z z) (* vg0 vgt))))
- (tstar (* t (- 1 (/ (+ vgt w (- z))
- (+ vgt (- vg0) (* 2 w))))))
- (fstar (linef tstar)))
- (if (< fstar f0)
- (list 'ok (t->x tstar) fstar)
- (loop tstar (+ iter 1))))
- (loop (* t 2) (+ iter 1)))))))
-
-
-
- ;;; The following line-minimization procedure is based on Brent's
- ;;; algorithm.
- (define (line-min-brent f g x v est)
- (define (t->x t) (vector+vector x (scalar*vector t v)))
- (define (linef t) (f (t->x t)))
- (define (lineg t) (g (t->x t)))
- (define f0 (linef 0))
- (define g0 (lineg 0))
- (define s0 (/ (- f0 est) -.5 (v:dot-product g0 v)))
- (let loop ((t (if (and (positive? s0) (< s0 1)) s0 1))
- (iter 0))
- (if (> iter 100)
- (list 'no-min)
- (let ((ft (linef t))
- (gt (lineg t)))
- (if (or (>= ft f0)
- (>= (v:dot-product v gt) 0))
- (let* ((result (brent-min linef 0 t *sqrt-machine-epsilon*))
- (tstar (car result))
- (fstar (cadr result)))
- (list 'ok (t->x tstar) fstar))
- (loop (* t 2) (+ iter 1)))))))
- ;;; In the following implementation of the Davidon-Fletcher-Powell
- ;;; algorithm, f is a function of a single vector argument that returns
- ;;; a real value to be minimized, g is the vector-valued gradient and
- ;;; x is a (vector) starting point, and est is an estimate of the minimum
- ;;; function value. If g is '(), then a numerical approximation is
- ;;; substituted using GENERATE-GRADIENT-PROCEDURE. ftol is the convergence
- ;;; criterion: the search is stopped when the relative change in f falls
- ;;; below ftol.
- (define fletcher-powell-wallp? false)
- (define (fletcher-powell line-search f g x est ftol maxiter)
- (let ((n (vector-length x)))
- (if (null? g) (set! g (generate-gradient-procedure
- f n (* 1000 *machine-epsilon*))))
- (let loop ((H (m:make-identity n))
- (x x)
- (fx (f x))
- (gx (g x))
- (count 0))
- (if fletcher-powell-wallp? (print (list x fx gx)))
- (let ((v (matrix*vector H (scalar*vector -1 gx))))
- (if (positive? (v:dot-product v gx))
- (begin
- (if fletcher-powell-wallp?
- (display (list "H reset to Identity at iteration" count)))
- (loop (m:make-identity n) x fx gx count))
- (let ((r (line-search f g x v est)))
- (if (eq? (car r) 'no-min)
- (list 'no-min (cons x fx) count)
- (let ((newx (cadr r))
- (newfx (caddr r)))
- (if (close-enuf? newfx fx ftol) ;convergence criterion
- (list 'ok (cons newx newfx) count)
- (if (fix:= count maxiter)
- (list 'maxcount (cons newx newfx) count)
- (let* ((newgx (g newx))
- (dx (vector-vector newx x))
- (dg (vector-vector newgx gx))
- (Hdg (matrix*vector H dg))
- (A (matrix*scalar
- (m:outer-product (vector->column-matrix dx)
- (vector->row-matrix dx))
- (/ 1 (v:dot-product dx dg))))
- (B (matrix*scalar
- (m:outer-product (vector->column-matrix Hdg)
- (vector->row-matrix Hdg))
- (/ -1 (v:dot-product dg Hdg))))
- (newH (matrix+matrix H (matrix+matrix A B))))
- (loop newH newx newfx newgx (fix:+ count 1)))))))))))))
- ;;; The following procedures, DFP and DFP-BRENT, call directly upon
- ;;; FLETCHER-POWELL. The first uses Davidon's line search which is
- ;;; efficient, and would be the normal choice. The second uses Brent's
- ;;; line search, which is less efficient but more reliable.
- (define (dfp f g x est ftol maxiter)
- (fletcher-powell line-min-davidon f g x est ftol maxiter))
- (define (dfp-brent f g x est ftol maxiter)
- (fletcher-powell line-min-brent f g x est ftol maxiter))
- ;;; The following is a variation on DFP, due (independently, we are told)
- ;;; to Broyden, Fletcher, Goldfarb, and Shanno. It differs in the formula
- ;;; used to update H, and is said to be more immune than DFP to imprecise
- ;;; line-search. Consequently, it is offered with Davidon's line search
- ;;; wired in.
- (define bfgs-wallp? false)
- (define (bfgs f g x est ftol maxiter)
- (let ((n (vector-length x)))
- (if (null? g) (set! g (generate-gradient-procedure
- f n (* 1000 *machine-epsilon*))))
- (let loop ((H (m:make-identity n))
- (x x)
- (fx (f x))
- (gx (g x))
- (count 0))
- (if bfgs-wallp? (print (list x fx gx)))
- (let ((v (matrix*vector H (scalar*vector -1 gx))))
- (if (positive? (v:dot-product v gx))
- (begin
- (if bfgs-wallp?
- (display (list "H reset to Identity at iteration" count)))
- (loop (m:make-identity n) x fx gx count))
- (let ((r (line-min-davidon f g x v est)))
- (if (eq? (car r) 'no-min)
- (list 'no-min (cons x fx) count)
- (let ((newx (cadr r))
- (newfx (caddr r)))
- (if (close-enuf? newfx fx ftol) ;convergence criterion
- (list 'ok (cons newx newfx) count)
- (if (fix:= count maxiter)
- (list 'maxcount (cons newx newfx) count)
- (let* ((newgx (g newx))
- (dx (vector-vector newx x))
- (dg (vector-vector newgx gx))
- (Hdg (matrix*vector H dg))
- (dxdg (v:dot-product dx dg))
- (dgHdg (v:dot-product dg Hdg))
- (u (vector-vector (scalar*vector (/ 1 dxdg) dx)
- (scalar*vector (/ 1 dgHdg) Hdg)))
- (A (matrix*scalar
- (m:outer-product (vector->column-matrix dx)
- (vector->row-matrix dx))
- (/ 1 dxdg)))
- (B (matrix*scalar
- (m:outer-product (vector->column-matrix Hdg)
- (vector->row-matrix Hdg))
- (/ -1 dgHdg)))
- (C (matrix*scalar
- (m:outer-product (vector->column-matrix u)
- (vector->row-matrix u))
- dgHdg))
- (newH
- (matrix+matrix (matrix+matrix H A)
- (matrix+matrix B C))))
- (loop newH newx newfx newgx (fix:+ count 1)))))))))))))
|