L-BFGS-B  3.0
Large-scale Bound-constrained Optimization
cauchy.f
Go to the documentation of this file.
1 c> \file cauchy.f
2 
3 c> \brief Compute the Generalized Cauchy Point along the projected gradient direction.
4 c>
5 c> For given x, l, u, g (with sbgnrm > 0), and a limited memory
6 c> BFGS matrix B defined in terms of matrices WY, WS, WT, and
7 c> scalars head, col, and theta, this subroutine computes the
8 c> generalized Cauchy point (GCP), defined as the first local
9 c> minimizer of the quadratic
10 c>
11 c> Q(x + s) = g's + 1/2 s'Bs
12 c>
13 c> along the projected gradient direction P(x-tg,l,u).
14 c> The routine returns the GCP in xcp.
15 c>
16 c> @param n On entry n is the dimension of the problem.<br/>
17 c> On exit n is unchanged.
18 c>
19 c> @param x On entry x is the starting point for the GCP computation.<br/>
20 c> On exit x is unchanged.
21 c>
22 c> @param l On entry l is the lower bound of x.<br/>
23 c> On exit l is unchanged.
24 c>
25 c> @param u On entry u is the upper bound of x.<br/>
26 c> On exit u is unchanged.
27 c>
28 c> @param nbd On entry nbd represents the type of bounds imposed on the
29 c> variables, and must be specified as follows:
30 c> nbd(i)=<ul><li>0 if x(i) is unbounded,</li>
31 c> <li>1 if x(i) has only a lower bound,</li>
32 c> <li>2 if x(i) has both lower and upper bounds, and</li>
33 c> <li>3 if x(i) has only an upper bound.</li></ul>
34 c> On exit nbd is unchanged.
35 c>
36 c> @param g On entry g is the gradient of f(x). g must be a nonzero vector.<br/>
37 c> On exit g is unchanged.
38 c>
39 c> @param iorder iorder will be used to store the breakpoints in the piecewise
40 c> linear path and free variables encountered.<br/>
41 c> On exit,<ul><li>iorder(1),...,iorder(nleft)
42 c> are indices of breakpoints
43 c> which have not been encountered;</li>
44 c> <li>iorder(nleft+1),...,iorder(nbreak)
45 c> are indices of
46 c> encountered breakpoints; and</li>
47 c> <li>iorder(nfree),...,iorder(n)
48 c> are indices of variables which
49 c> have no bound constraits along the search direction.</li></ul>
50 c>
51 c> @param iwhere On entry iwhere indicates only the permanently fixed (iwhere=3)
52 c> or free (iwhere= -1) components of x.<br/>
53 c> On exit iwhere records the status of the current x variables.
54 c> iwhere(i)=<ul><li>-3 if x(i) is free and has bounds, but is not moved</li>
55 c> <li> 0 if x(i) is free and has bounds, and is moved</li>
56 c> <li> 1 if x(i) is fixed at l(i), and l(i) .ne. u(i)</li>
57 c> <li> 2 if x(i) is fixed at u(i), and u(i) .ne. l(i)</li>
58 c> <li> 3 if x(i) is always fixed, i.e., u(i)=x(i)=l(i)</li>
59 c> <li>-1 if x(i) is always free, i.e., it has no bounds.</li></ul>
60 c>
61 c> @param t working array; will be used to store the break points.
62 c>
63 c> @param d the Cauchy direction P(x-tg)-x
64 c>
65 c> @param xcp returns the GCP on exit
66 c>
67 c> @param m On entry m is the maximum number of variable metric corrections
68 c> used to define the limited memory matrix.<br/>
69 c> On exit m is unchanged.
70 c>
71 c> @param ws On entry this stores S, a set of s-vectors, that defines the
72 c> limited memory BFGS matrix.<br/>
73 c> On exit this array is unchanged.
74 c>
75 c> @param wy On entry this stores Y, a set of y-vectors, that defines the
76 c> limited memory BFGS matrix.<br/>
77 c> On exit this array is unchanged.
78 c>
79 c> @param sy On entry this stores S'Y, that defines the
80 c> limited memory BFGS matrix.<br/>
81 c> On exit this array is unchanged.
82 c>
83 c> @param wt On entry this stores the
84 c> Cholesky factorization of (theta*S'S+LD^(-1)L'), that defines the
85 c> limited memory BFGS matrix.<br/>
86 c> On exit this array is unchanged.
87 c>
88 c> @param theta On entry theta is the scaling factor specifying B_0 = theta I.<br/>
89 c> On exit theta is unchanged.
90 c>
91 c> @param col On entry col is the actual number of variable metric
92 c> corrections stored so far.<br/>
93 c> On exit col is unchanged.
94 c>
95 c> @param head On entry head is the location of the first s-vector (or y-vector)
96 c> in S (or Y).<br/>
97 c> On exit col is unchanged.
98 c>
99 c> @param p will be used to store the vector p = W^(T)d.
100 c>
101 c> @param c will be used to store the vector c = W^(T)(xcp-x).
102 c>
103 c> @param wbp will be used to store the row of W corresponding
104 c> to a breakpoint.
105 c>
106 c> @param v working array
107 c>
108 c> @param nseg On exit nseg records the number of quadratic segments explored
109 c> in searching for the GCP.
110 c>
111 c> @param iprint variable that must be set by the user.<br/>
112 c> It controls the frequency and type of output generated:
113 c> <ul><li>iprint<0 no output is generated;</li>
114 c> <li>iprint=0 print only one line at the last iteration;</li>
115 c> <li>0<iprint<99 print also f and |proj g| every iprint iterations;</li>
116 c> <li>iprint=99 print details of every iteration except n-vectors;</li>
117 c> <li>iprint=100 print also the changes of active set and final x;</li>
118 c> <li>iprint>100 print details of every iteration including x and g;</li></ul>
119 c> When iprint > 0, the file iterate.dat will be created to
120 c> summarize the iteration.
121 c>
122 c> @param sbgnrm On entry sbgnrm is the norm of the projected gradient at x.<br/>
123 c> On exit sbgnrm is unchanged.
124 c>
125 c> @param info On entry info is 0.
126 c> On exit info =<ul><li>0 for normal return,</li>
127 c> <li>= nonzero for abnormal return when the the system
128 c> used in routine bmv is singular.</li></ul>
129 c> @param epsmch machine precision epsilon
130  subroutine cauchy(n, x, l, u, nbd, g, iorder, iwhere, t, d, xcp,
131  + m, wy, ws, sy, wt, theta, col, head, p, c, wbp,
132  + v, nseg, iprint, sbgnrm, info, epsmch)
133  implicit none
134  integer n, m, head, col, nseg, iprint, info,
135  + nbd(n), iorder(n), iwhere(n)
136  double precision theta, epsmch,
137  + x(n), l(n), u(n), g(n), t(n), d(n), xcp(n),
138  + wy(n, col), ws(n, col), sy(m, m),
139  + wt(m, m), p(2*m), c(2*m), wbp(2*m), v(2*m)
140 c
141 c References:
142 c
143 c [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
144 c memory algorithm for bound constrained optimization'',
145 c SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
146 c
147 c [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: FORTRAN
148 c Subroutines for Large Scale Bound Constrained Optimization''
149 c Tech. Report, NAM-11, EECS Department, Northwestern University,
150 c 1994.
151 c
152 c (Postscript files of these papers are available via anonymous
153 c ftp to eecs.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
154 c
155 c * * *
156 c
157 c NEOS, November 1994. (Latest revision June 1996.)
158 c Optimization Technology Center.
159 c Argonne National Laboratory and Northwestern University.
160 c Written by
161 c Ciyou Zhu
162 c in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
163 c
164 c
165 c ************
166 
167  logical xlower,xupper,bnded
168  integer i,j,col2,nfree,nbreak,pointr,
169  + ibp,nleft,ibkmin,iter
170  double precision f1,f2,dt,dtm,tsum,dibp,zibp,dibp2,bkmin,
171  + tu,tl,wmc,wmp,wmw,ddot,tj,tj0,neggi,sbgnrm,
172  + f2_org
173  double precision one,zero
174  parameter (one=1.0d0,zero=0.0d0)
175 
176 c Check the status of the variables, reset iwhere(i) if necessary;
177 c compute the Cauchy direction d and the breakpoints t; initialize
178 c the derivative f1 and the vector p = W'd (for theta = 1).
179 
180  if (sbgnrm .le. zero) then
181  if (iprint .ge. 0) write (6,*) 'Subgnorm = 0. GCP = X.'
182  call dcopy(n,x,1,xcp,1)
183  return
184  endif
185  bnded = .true.
186  nfree = n + 1
187  nbreak = 0
188  ibkmin = 0
189  bkmin = zero
190  col2 = 2*col
191  f1 = zero
192  if (iprint .ge. 99) write (6,3010)
193 
194 c We set p to zero and build it up as we determine d.
195 
196  do 20 i = 1, col2
197  p(i) = zero
198  20 continue
199 
200 c In the following loop we determine for each variable its bound
201 c status and its breakpoint, and update p accordingly.
202 c Smallest breakpoint is identified.
203 
204  do 50 i = 1, n
205  neggi = -g(i)
206  if (iwhere(i) .ne. 3 .and. iwhere(i) .ne. -1) then
207 c if x(i) is not a constant and has bounds,
208 c compute the difference between x(i) and its bounds.
209  if (nbd(i) .le. 2) tl = x(i) - l(i)
210  if (nbd(i) .ge. 2) tu = u(i) - x(i)
211 
212 c If a variable is close enough to a bound
213 c we treat it as at bound.
214  xlower = nbd(i) .le. 2 .and. tl .le. zero
215  xupper = nbd(i) .ge. 2 .and. tu .le. zero
216 
217 c reset iwhere(i).
218  iwhere(i) = 0
219  if (xlower) then
220  if (neggi .le. zero) iwhere(i) = 1
221  else if (xupper) then
222  if (neggi .ge. zero) iwhere(i) = 2
223  else
224  if (abs(neggi) .le. zero) iwhere(i) = -3
225  endif
226  endif
227  pointr = head
228  if (iwhere(i) .ne. 0 .and. iwhere(i) .ne. -1) then
229  d(i) = zero
230  else
231  d(i) = neggi
232  f1 = f1 - neggi*neggi
233 c calculate p := p - W'e_i* (g_i).
234  do 40 j = 1, col
235  p(j) = p(j) + wy(i,pointr)* neggi
236  p(col + j) = p(col + j) + ws(i,pointr)*neggi
237  pointr = mod(pointr,m) + 1
238  40 continue
239  if (nbd(i) .le. 2 .and. nbd(i) .ne. 0
240  + .and. neggi .lt. zero) then
241 c x(i) + d(i) is bounded; compute t(i).
242  nbreak = nbreak + 1
243  iorder(nbreak) = i
244  t(nbreak) = tl/(-neggi)
245  if (nbreak .eq. 1 .or. t(nbreak) .lt. bkmin) then
246  bkmin = t(nbreak)
247  ibkmin = nbreak
248  endif
249  else if (nbd(i) .ge. 2 .and. neggi .gt. zero) then
250 c x(i) + d(i) is bounded; compute t(i).
251  nbreak = nbreak + 1
252  iorder(nbreak) = i
253  t(nbreak) = tu/neggi
254  if (nbreak .eq. 1 .or. t(nbreak) .lt. bkmin) then
255  bkmin = t(nbreak)
256  ibkmin = nbreak
257  endif
258  else
259 c x(i) + d(i) is not bounded.
260  nfree = nfree - 1
261  iorder(nfree) = i
262  if (abs(neggi) .gt. zero) bnded = .false.
263  endif
264  endif
265  50 continue
266 
267 c The indices of the nonzero components of d are now stored
268 c in iorder(1),...,iorder(nbreak) and iorder(nfree),...,iorder(n).
269 c The smallest of the nbreak breakpoints is in t(ibkmin)=bkmin.
270 
271  if (theta .ne. one) then
272 c complete the initialization of p for theta not= one.
273  call dscal(col,theta,p(col+1),1)
274  endif
275 
276 c Initialize GCP xcp = x.
277 
278  call dcopy(n,x,1,xcp,1)
279 
280  if (nbreak .eq. 0 .and. nfree .eq. n + 1) then
281 c is a zero vector, return with the initial xcp as GCP.
282  if (iprint .gt. 100) write (6,1010) (xcp(i), i = 1, n)
283  return
284  endif
285 
286 c Initialize c = W'(xcp - x) = 0.
287 
288  do 60 j = 1, col2
289  c(j) = zero
290  60 continue
291 
292 c Initialize derivative f2.
293 
294  f2 = -theta*f1
295  f2_org = f2
296  if (col .gt. 0) then
297  call bmv(m,sy,wt,col,p,v,info)
298  if (info .ne. 0) return
299  f2 = f2 - ddot(col2,v,1,p,1)
300  endif
301  dtm = -f1/f2
302  tsum = zero
303  nseg = 1
304  if (iprint .ge. 99)
305  + write (6,*) 'There are ',nbreak,' breakpoints '
306 
307 c If there are no breakpoints, locate the GCP and return.
308 
309  if (nbreak .eq. 0) goto 888
310 
311  nleft = nbreak
312  iter = 1
313 
314 
315  tj = zero
316 
317 c------------------- the beginning of the loop -------------------------
318 
319  777 continue
320 
321 c Find the next smallest breakpoint;
322 c compute dt = t(nleft) - t(nleft + 1).
323 
324  tj0 = tj
325  if (iter .eq. 1) then
326 c Since we already have the smallest breakpoint we need not do
327 c heapsort yet. Often only one breakpoint is used and the
328 c cost of heapsort is avoided.
329  tj = bkmin
330  ibp = iorder(ibkmin)
331  else
332  if (iter .eq. 2) then
333 c Replace the already used smallest breakpoint with the
334 c breakpoint numbered nbreak > nlast, before heapsort call.
335  if (ibkmin .ne. nbreak) then
336  t(ibkmin) = t(nbreak)
337  iorder(ibkmin) = iorder(nbreak)
338  endif
339 c Update heap structure of breakpoints
340 c (if iter=2, initialize heap).
341  endif
342  call hpsolb(nleft,t,iorder,iter-2)
343  tj = t(nleft)
344  ibp = iorder(nleft)
345  endif
346 
347  dt = tj - tj0
348 
349  if (dt .ne. zero .and. iprint .ge. 100) then
350  write (6,4011) nseg,f1,f2
351  write (6,5010) dt
352  write (6,6010) dtm
353  endif
354 
355 c If a minimizer is within this interval, locate the GCP and return.
356 
357  if (dtm .lt. dt) goto 888
358 
359 c Otherwise fix one variable and
360 c reset the corresponding component of d to zero.
361 
362  tsum = tsum + dt
363  nleft = nleft - 1
364  iter = iter + 1
365  dibp = d(ibp)
366  d(ibp) = zero
367  if (dibp .gt. zero) then
368  zibp = u(ibp) - x(ibp)
369  xcp(ibp) = u(ibp)
370  iwhere(ibp) = 2
371  else
372  zibp = l(ibp) - x(ibp)
373  xcp(ibp) = l(ibp)
374  iwhere(ibp) = 1
375  endif
376  if (iprint .ge. 100) write (6,*) 'Variable ',ibp,' is fixed.'
377  if (nleft .eq. 0 .and. nbreak .eq. n) then
378 c all n variables are fixed,
379 c return with xcp as GCP.
380  dtm = dt
381  goto 999
382  endif
383 
384 c Update the derivative information.
385 
386  nseg = nseg + 1
387  dibp2 = dibp**2
388 
389 c Update f1 and f2.
390 
391 c temporarily set f1 and f2 for col=0.
392  f1 = f1 + dt*f2 + dibp2 - theta*dibp*zibp
393  f2 = f2 - theta*dibp2
394 
395  if (col .gt. 0) then
396 c update c = c + dt*p.
397  call daxpy(col2,dt,p,1,c,1)
398 
399 c choose wbp,
400 c the row of W corresponding to the breakpoint encountered.
401  pointr = head
402  do 70 j = 1,col
403  wbp(j) = wy(ibp,pointr)
404  wbp(col + j) = theta*ws(ibp,pointr)
405  pointr = mod(pointr,m) + 1
406  70 continue
407 
408 c compute (wbp)Mc, (wbp)Mp, and (wbp)M(wbp)'.
409  call bmv(m,sy,wt,col,wbp,v,info)
410  if (info .ne. 0) return
411  wmc = ddot(col2,c,1,v,1)
412  wmp = ddot(col2,p,1,v,1)
413  wmw = ddot(col2,wbp,1,v,1)
414 
415 c update p = p - dibp*wbp.
416  call daxpy(col2,-dibp,wbp,1,p,1)
417 
418 c complete updating f1 and f2 while col > 0.
419  f1 = f1 + dibp*wmc
420  f2 = f2 + 2.0d0*dibp*wmp - dibp2*wmw
421  endif
422 
423  f2 = max(epsmch*f2_org,f2)
424  if (nleft .gt. 0) then
425  dtm = -f1/f2
426  goto 777
427 c to repeat the loop for unsearched intervals.
428  else if(bnded) then
429  f1 = zero
430  f2 = zero
431  dtm = zero
432  else
433  dtm = -f1/f2
434  endif
435 
436 c------------------- the end of the loop -------------------------------
437 
438  888 continue
439  if (iprint .ge. 99) then
440  write (6,*)
441  write (6,*) 'GCP found in this segment'
442  write (6,4010) nseg,f1,f2
443  write (6,6010) dtm
444  endif
445  if (dtm .le. zero) dtm = zero
446  tsum = tsum + dtm
447 
448 c Move free variables (i.e., the ones w/o breakpoints) and
449 c the variables whose breakpoints haven't been reached.
450 
451  call daxpy(n,tsum,d,1,xcp,1)
452 
453  999 continue
454 
455 c Update c = c + dtm*p = W'(x^c - x)
456 c which will be used in computing r = Z'(B(x^c - x) + g).
457 
458  if (col .gt. 0) call daxpy(col2,dtm,p,1,c,1)
459  if (iprint .gt. 100) write (6,1010) (xcp(i),i = 1,n)
460  if (iprint .ge. 99) write (6,2010)
461 
462  1010 format ('Cauchy X = ',/,(4x,1p,6(1x,d11.4)))
463  2010 format (/,'---------------- exit CAUCHY----------------------',/)
464  3010 format (/,'---------------- CAUCHY entered-------------------')
465  4010 format ('Piece ',i3,' --f1, f2 at start point ',1p,2(1x,d11.4))
466  4011 format (/,'Piece ',i3,' --f1, f2 at start point ',
467  + 1p,2(1x,d11.4))
468  5010 format ('Distance to the next break point = ',1p,d11.4)
469  6010 format ('Distance to the stationary point = ',1p,d11.4)
470 
471  return
472 
473  end
subroutine bmv(m, sy, wt, col, v, p, info)
This subroutine computes the product of the 2m x 2m middle matrix in the compact L-BFGS formula of B ...
Definition: bmv.f:36
subroutine cauchy(n, x, l, u, nbd, g, iorder, iwhere, t, d, xcp, m, wy, ws, sy, wt, theta, col, head, p, c, wbp, v, nseg, iprint, sbgnrm, info, epsmch)
Compute the Generalized Cauchy Point along the projected gradient direction.
Definition: cauchy.f:133
subroutine hpsolb(n, t, iorder, iheap)
This subroutine sorts out the least element of t, and puts the remaining elements of t in a heap.
Definition: hpsolb.f:22