multimin.scm 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. #| -*-Scheme-*-
  2. Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
  3. 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
  4. 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Massachusetts
  5. Institute of Technology
  6. This file is part of MIT/GNU Scheme.
  7. MIT/GNU Scheme is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 2 of the License, or (at
  10. your option) any later version.
  11. MIT/GNU Scheme is distributed in the hope that it will be useful, but
  12. WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with MIT/GNU Scheme; if not, write to the Free Software
  17. Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
  18. USA.
  19. |#
  20. ;;;; MULTIMIN.SCM -- n-dimensional minimization routines
  21. ;;; 9/22/89 (gjs) reduce->a-reduce
  22. (declare (usual-integrations = + - * /
  23. zero? 1+ -1+
  24. ;; truncate round floor ceiling
  25. sqrt exp log sin cos))
  26. ;;; Nelder-Mead downhill simplex algorithm.
  27. ;;; We have a function, f, defined on points in n-space.
  28. ;;; We are looking for a local minimum of f.
  29. ;;; The central idea -- We have a simplex of n+1 vertices where f is
  30. ;;; known. We want to deform the simplex until it sits on the minimum.
  31. ;;; A simplex is represented as a list of entries, each of which is a
  32. ;;; pair consisting of a vertex and the value of f at that vertex.
  33. (define simplex-size length)
  34. (define simplex-vertex car)
  35. (define simplex-value cdr)
  36. (define simplex-entry cons)
  37. ;;; Simplices are stored in sorted order
  38. (define simplex-highest car)
  39. (define simplex-but-highest cdr)
  40. (define simplex-next-highest cadr)
  41. (define (simplex-lowest s) (car (last-pair s)))
  42. (define (simplex-add-entry entry s)
  43. (let ((fv (simplex-value entry)))
  44. (let loop ((s s))
  45. (cond ((null? s) (list entry))
  46. ((> fv (simplex-value (car s))) (cons entry s))
  47. (else (cons (car s) (loop (cdr s))))))))
  48. (define (simplex-adjoin v fv s)
  49. (simplex-add-entry (simplex-entry v fv) s))
  50. (define (simplex-sort s)
  51. (let lp ((s s) (ans '()))
  52. (if (null? s)
  53. ans
  54. (lp (cdr s) (simplex-add-entry (car s) ans)))))
  55. (define simplex-centroid
  56. (lambda (simplex)
  57. (scalar*vector (/ 1 (simplex-size simplex))
  58. (a-reduce vector+vector
  59. (map simplex-vertex simplex)))))
  60. (define extender
  61. (lambda (p1 p2)
  62. (let ((dp (vector-vector p2 p1)))
  63. (lambda (k)
  64. (vector+vector p1 (scalar*vector k dp))))))
  65. (define (make-simplex point step f)
  66. (simplex-sort
  67. (map (lambda (vertex) (simplex-entry vertex (f vertex)))
  68. (cons point
  69. (let ((n (vector-length point)))
  70. (generate-list n
  71. (lambda (i)
  72. (vector+vector point
  73. (scalar*vector step
  74. (v:make-basis-unit n i))))))))))
  75. (define (stationary? simplex epsilon)
  76. (close-enuf? (simplex-value (simplex-highest simplex))
  77. (simplex-value (simplex-lowest simplex))
  78. epsilon))
  79. (define nelder-wallp? false)
  80. (define (nelder-mead f start-pt start-step epsilon maxiter)
  81. (define shrink-coef 0.5)
  82. (define reflection-coef 2.0)
  83. (define expansion-coef 3.0)
  84. (define contraction-coef-1 1.5)
  85. (define contraction-coef-2 (- 2 contraction-coef-1))
  86. (define (simplex-shrink point simplex)
  87. (let ((pv (simplex-vertex point)))
  88. (simplex-sort
  89. (map (lambda (sp)
  90. (if (eq? point sp)
  91. sp
  92. (let ((vertex ((extender pv (simplex-vertex sp))
  93. shrink-coef)))
  94. (simplex-entry vertex (f vertex)))))
  95. simplex))))
  96. (define (nm-step simplex)
  97. (let ((g (simplex-highest simplex))
  98. (h (simplex-next-highest simplex))
  99. (s (simplex-lowest simplex))
  100. (s-h (simplex-but-highest simplex)))
  101. (let* ((vg (simplex-vertex g)) (fg (simplex-value g))
  102. (fh (simplex-value h)) (fs (simplex-value s))
  103. (extend (extender vg (simplex-centroid s-h))))
  104. (let* ((vr (extend reflection-coef))
  105. (fr (f vr))) ;try reflection
  106. (if (< fr fh) ;reflection successful
  107. (if (< fr fs) ;new minimum
  108. (let* ((ve (extend expansion-coef))
  109. (fe (f ve))) ;try expansion
  110. (if (< fe fs) ;expansion successful
  111. (simplex-adjoin ve fe s-h)
  112. (simplex-adjoin vr fr s-h)))
  113. (simplex-adjoin vr fr s-h))
  114. (let* ((vc (extend (if (< fr fg)
  115. contraction-coef-1
  116. contraction-coef-2)))
  117. (fc (f vc))) ;try contraction
  118. (if (< fc fg) ;contraction successful
  119. (simplex-adjoin vc fc s-h)
  120. (simplex-shrink s simplex))))))))
  121. (define (limit simplex count)
  122. (if nelder-wallp? (write-line (simplex-lowest simplex)))
  123. (if (stationary? simplex epsilon)
  124. (list 'ok (simplex-lowest simplex) count)
  125. (if (fix:= count maxiter)
  126. (list 'maxcount (simplex-lowest simplex) count)
  127. (limit (nm-step simplex) (fix:+ count 1)))))
  128. (limit (make-simplex start-pt start-step f) 0))
  129. ;;;(define (stationary? simplex epsilon)
  130. ;;; (let ((np1 (length simplex)))
  131. ;;; (let* ((mean (/ (a-reduce + (map simplex-value simplex)) np1))
  132. ;;; (variance (/ (a-reduce +
  133. ;;; (map (lambda (e)
  134. ;;; (square (- (simplex-value e) mean)))
  135. ;;; simplex))
  136. ;;; np1)))
  137. ;;; (< variance epsilon))))
  138. ;;; Variable Metric Methods:
  139. ;;; Fletcher-Powell (choice of line search)
  140. ;;; Broyden-Fletcher-Goldfarb-Shanno (Davidon's line search)
  141. ;;; The following utility procedure returns a gradient function given a
  142. ;;; differentiable function f of a vector of length n. In general, a gradient
  143. ;;; function accepts an n-vector and returns a vector of derivatives. In this
  144. ;;; case, the derivatives are estimated by richardson-extrapolation of a
  145. ;;; central difference quotient, with the convergence tolerance being a
  146. ;;; specified parameter.
  147. (define (generate-gradient-procedure f n tol)
  148. (lambda (x)
  149. (generate-vector
  150. n
  151. (lambda (i)
  152. (richardson-limit
  153. (let ((fi (lambda (t)
  154. (f (vector+vector x
  155. (scalar*vector t
  156. (v:make-basis-unit n i)))))))
  157. (lambda (h) (/ (- (fi h) (fi (- h))) 2 h)))
  158. (max 0.1 (* 0.1 (abs (vector-ref x i)))) ;starting h
  159. 2 ;ord -- see doc for RE.SCM
  160. 2 ;inc
  161. tol)))))
  162. ;;; The following line-minimization procedure is Davidon's original
  163. ;;; recommendation. It does a bracketing search using gradients, and
  164. ;;; then interpolates the straddled minimum with a cubic.
  165. (define (line-min-davidon f g x v est)
  166. (define (t->x t) (vector+vector x (scalar*vector t v)))
  167. (define (linef t) (f (t->x t)))
  168. (define (lineg t) (g (t->x t)))
  169. (define f0 (linef 0))
  170. (define g0 (lineg 0))
  171. (define s0 (/ (- f0 est) -.5 (v:dot-product g0 v)))
  172. (let loop ((t (if (and (positive? s0) (< s0 1)) s0 1))
  173. (iter 0))
  174. (if (> iter 100)
  175. (list 'no-min)
  176. (let ((ft (linef t))
  177. (gt (lineg t)))
  178. (if (or (>= ft f0)
  179. (>= (v:dot-product v gt) 0))
  180. (let* ((vg0 (v:dot-product v g0))
  181. (vgt (v:dot-product v gt))
  182. (z (+ (* 3 (- f0 ft) (/ 1 t)) vg0 vgt))
  183. (w (sqrt (- (* z z) (* vg0 vgt))))
  184. (tstar (* t (- 1 (/ (+ vgt w (- z))
  185. (+ vgt (- vg0) (* 2 w))))))
  186. (fstar (linef tstar)))
  187. (if (< fstar f0)
  188. (list 'ok (t->x tstar) fstar)
  189. (loop tstar (+ iter 1))))
  190. (loop (* t 2) (+ iter 1)))))))
  191. ;;; The following line-minimization procedure is based on Brent's
  192. ;;; algorithm.
  193. (define (line-min-brent f g x v est)
  194. (define (t->x t) (vector+vector x (scalar*vector t v)))
  195. (define (linef t) (f (t->x t)))
  196. (define (lineg t) (g (t->x t)))
  197. (define f0 (linef 0))
  198. (define g0 (lineg 0))
  199. (define s0 (/ (- f0 est) -.5 (v:dot-product g0 v)))
  200. (let loop ((t (if (and (positive? s0) (< s0 1)) s0 1))
  201. (iter 0))
  202. (if (> iter 100)
  203. (list 'no-min)
  204. (let ((ft (linef t))
  205. (gt (lineg t)))
  206. (if (or (>= ft f0)
  207. (>= (v:dot-product v gt) 0))
  208. (let* ((result (brent-min linef 0 t *sqrt-machine-epsilon*))
  209. (tstar (car result))
  210. (fstar (cadr result)))
  211. (list 'ok (t->x tstar) fstar))
  212. (loop (* t 2) (+ iter 1)))))))
  213. ;;; In the following implementation of the Davidon-Fletcher-Powell
  214. ;;; algorithm, f is a function of a single vector argument that returns
  215. ;;; a real value to be minimized, g is the vector-valued gradient and
  216. ;;; x is a (vector) starting point, and est is an estimate of the minimum
  217. ;;; function value. If g is '(), then a numerical approximation is
  218. ;;; substituted using GENERATE-GRADIENT-PROCEDURE. ftol is the convergence
  219. ;;; criterion: the search is stopped when the relative change in f falls
  220. ;;; below ftol.
  221. (define fletcher-powell-wallp? false)
  222. (define (fletcher-powell line-search f g x est ftol maxiter)
  223. (let ((n (vector-length x)))
  224. (if (null? g) (set! g (generate-gradient-procedure
  225. f n (* 1000 *machine-epsilon*))))
  226. (let loop ((H (m:make-identity n))
  227. (x x)
  228. (fx (f x))
  229. (gx (g x))
  230. (count 0))
  231. (if fletcher-powell-wallp? (print (list x fx gx)))
  232. (let ((v (matrix*vector H (scalar*vector -1 gx))))
  233. (if (positive? (v:dot-product v gx))
  234. (begin
  235. (if fletcher-powell-wallp?
  236. (display (list "H reset to Identity at iteration" count)))
  237. (loop (m:make-identity n) x fx gx count))
  238. (let ((r (line-search f g x v est)))
  239. (if (eq? (car r) 'no-min)
  240. (list 'no-min (cons x fx) count)
  241. (let ((newx (cadr r))
  242. (newfx (caddr r)))
  243. (if (close-enuf? newfx fx ftol) ;convergence criterion
  244. (list 'ok (cons newx newfx) count)
  245. (if (fix:= count maxiter)
  246. (list 'maxcount (cons newx newfx) count)
  247. (let* ((newgx (g newx))
  248. (dx (vector-vector newx x))
  249. (dg (vector-vector newgx gx))
  250. (Hdg (matrix*vector H dg))
  251. (A (matrix*scalar
  252. (m:outer-product (vector->column-matrix dx)
  253. (vector->row-matrix dx))
  254. (/ 1 (v:dot-product dx dg))))
  255. (B (matrix*scalar
  256. (m:outer-product (vector->column-matrix Hdg)
  257. (vector->row-matrix Hdg))
  258. (/ -1 (v:dot-product dg Hdg))))
  259. (newH (matrix+matrix H (matrix+matrix A B))))
  260. (loop newH newx newfx newgx (fix:+ count 1)))))))))))))
  261. ;;; The following procedures, DFP and DFP-BRENT, call directly upon
  262. ;;; FLETCHER-POWELL. The first uses Davidon's line search which is
  263. ;;; efficient, and would be the normal choice. The second uses Brent's
  264. ;;; line search, which is less efficient but more reliable.
  265. (define (dfp f g x est ftol maxiter)
  266. (fletcher-powell line-min-davidon f g x est ftol maxiter))
  267. (define (dfp-brent f g x est ftol maxiter)
  268. (fletcher-powell line-min-brent f g x est ftol maxiter))
  269. ;;; The following is a variation on DFP, due (independently, we are told)
  270. ;;; to Broyden, Fletcher, Goldfarb, and Shanno. It differs in the formula
  271. ;;; used to update H, and is said to be more immune than DFP to imprecise
  272. ;;; line-search. Consequently, it is offered with Davidon's line search
  273. ;;; wired in.
  274. (define bfgs-wallp? false)
  275. (define (bfgs f g x est ftol maxiter)
  276. (let ((n (vector-length x)))
  277. (if (null? g) (set! g (generate-gradient-procedure
  278. f n (* 1000 *machine-epsilon*))))
  279. (let loop ((H (m:make-identity n))
  280. (x x)
  281. (fx (f x))
  282. (gx (g x))
  283. (count 0))
  284. (if bfgs-wallp? (print (list x fx gx)))
  285. (let ((v (matrix*vector H (scalar*vector -1 gx))))
  286. (if (positive? (v:dot-product v gx))
  287. (begin
  288. (if bfgs-wallp?
  289. (display (list "H reset to Identity at iteration" count)))
  290. (loop (m:make-identity n) x fx gx count))
  291. (let ((r (line-min-davidon f g x v est)))
  292. (if (eq? (car r) 'no-min)
  293. (list 'no-min (cons x fx) count)
  294. (let ((newx (cadr r))
  295. (newfx (caddr r)))
  296. (if (close-enuf? newfx fx ftol) ;convergence criterion
  297. (list 'ok (cons newx newfx) count)
  298. (if (fix:= count maxiter)
  299. (list 'maxcount (cons newx newfx) count)
  300. (let* ((newgx (g newx))
  301. (dx (vector-vector newx x))
  302. (dg (vector-vector newgx gx))
  303. (Hdg (matrix*vector H dg))
  304. (dxdg (v:dot-product dx dg))
  305. (dgHdg (v:dot-product dg Hdg))
  306. (u (vector-vector (scalar*vector (/ 1 dxdg) dx)
  307. (scalar*vector (/ 1 dgHdg) Hdg)))
  308. (A (matrix*scalar
  309. (m:outer-product (vector->column-matrix dx)
  310. (vector->row-matrix dx))
  311. (/ 1 dxdg)))
  312. (B (matrix*scalar
  313. (m:outer-product (vector->column-matrix Hdg)
  314. (vector->row-matrix Hdg))
  315. (/ -1 dgHdg)))
  316. (C (matrix*scalar
  317. (m:outer-product (vector->column-matrix u)
  318. (vector->row-matrix u))
  319. dgHdg))
  320. (newH
  321. (matrix+matrix (matrix+matrix H A)
  322. (matrix+matrix B C))))
  323. (loop newH newx newfx newgx (fix:+ count 1)))))))))))))