/[ascend]/trunk/minos54/mi25bfac.f
ViewVC logotype

Contents of /trunk/minos54/mi25bfac.f

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations) (download)
Fri Oct 29 20:54:12 2004 UTC (19 years, 7 months ago) by aw0a
File size: 145557 byte(s)
Setting up web subdirectory in repository
1 ************************************************************************
2 *
3 * File mi25bfac fortran.
4 *
5 * m2bfac m2bmap m2belm m2bsol m2sing
6 * lu1fac lu1fad lu1gau lu1mar lu1pen
7 * lu1max lu1or1 lu1or2 lu1or3 lu1or4
8 * lu1pq1 lu1pq2 lu1pq3 lu1rec
9 * lu6chk lu6sol lu7add lu7elm lu7for lu7zap lu8rpc
10 *
11 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
12
13 subroutine m2bfac( gotfac, nfac, m, mbs, n, nb, nr, nn, ns,
14 $ lcrash, fsub, objadd,
15 $ ne, nka, a, ha, ka,
16 $ kb, hs, bl, bu, bbl, bbu,
17 $ r, w, x, xn, y, y2, z, nwcore )
18
19 implicit double precision (a-h,o-z)
20 logical gotfac
21 integer*4 ha(ne), hs(nb)
22 integer ka(nka),kb(mbs)
23 double precision a(ne), bl(nb), bu(nb), bbl(mbs), bbu(mbs),
24 $ r(nr), w(m), x(mbs), xn(nb),
25 $ y(mbs), y2(m), z(nwcore)
26
27 * ------------------------------------------------------------------
28 * m2bfac computes a factorization of the current basis.
29 * gotfac must be false the first time m2bfac is called.
30 * It might be true after the first cycle.
31 *
32 * If lcrash = 3, linear inequality rows (LG rows) are to be
33 * treated as free rows.
34 *
35 * 04 May 1992: invmod and invitn not reset if gotfac is true.
36 * This will help m5solv keep track of LU properly
37 * during later cycles.
38 * 04 Jun 1992: lcrash added.
39 * ------------------------------------------------------------------
40
41 common /m1eps / eps,eps0,eps1,eps2,eps3,eps4,eps5,plinfy
42 common /m1file/ iread,iprint,isumm
43 common /m2lu1 / minlu,maxlu,lena,nbelem,ip,iq,lenc,lenr,
44 $ locc,locr,iploc,iqloc,lua,indc,indr
45 common /m2lu4 / parmlu(30),luparm(30)
46 common /m2mapz/ maxw,maxz
47 common /m5lobj/ sinf,wtobj,minimz,ninf,iobj,jobj,kobj
48 common /m5log1/ idebug,ierr,lprint
49 common /m5lp1 / itn,itnlim,nphs,kmodlu,kmodpi
50 common /m5lp2 / invrq,invitn,invmod
51 common /m5prc / nparpr,nmulpr,kprc,newsb
52
53 logical modtol, prnt
54
55 nfac = nfac + 1
56 obj = sinf
57 if (ninf .eq. 0) obj = minimz * fsub + objadd
58 prnt = iprint .gt. 0 .and. mod(lprint,10) .gt. 0
59 if (prnt) write(iprint, 1000) nfac, invrq, itn, ninf, obj
60
61 m1 = m + 1
62 ms = m + ns
63 l1 = n + 1
64 ntry = 0
65 maxtry = 10
66 if (gotfac) then
67 if (invrq .eq. 0) go to 500
68 end if
69
70 * Load the basic variables into kb, slacks first.
71 * Set kobj to tell us where the linear objective is.
72
73 20 ntry = ntry + 1
74 invrq = 0
75 invitn = 0
76 invmod = 0
77 ierr = 0
78 k = 0
79
80 do 50 j = l1, nb
81 if (hs(j) .eq. 3) then
82 k = k + 1
83 kb(k) = j
84 if (j .eq. jobj) kobj = k
85 end if
86 50 continue
87
88 nbelem = k
89 nslack = k
90 nonlin = 0
91 do 100 j = 1, n
92 if (hs(j) .eq. 3) then
93 k = k + 1
94 if (k .le. m) then
95 kb(k) = j
96 if (j .le. nn) nonlin = nonlin + 1
97 nbelem = nbelem + ka(j+1) - ka(j)
98 end if
99 end if
100 100 continue
101
102 nbasic = k
103 if (nbasic .gt. m) go to 920
104
105 if (nbasic .lt. m) then
106
107 * Not enough basics.
108 * Set the remaining kb(k) = 0 for m2belm and m2sing.
109
110 j = nbasic + 1
111 do 150 k = j, m
112 kb(k) = 0
113 150 continue
114 end if
115
116 * Make the objective the iobj-th basic, so we know where it is.
117
118 if (iobj .gt. 0) then
119 j = kb(iobj)
120 kb(iobj) = jobj
121 kb(kobj) = j
122 end if
123
124 lin = nbasic - nslack - nonlin
125 if (lin .lt. 0) lin = 0
126 bdnsty = 100.0 * nbelem / (m*m)
127 if (prnt) write(iprint, 1010) nonlin, lin, nslack, nbelem, bdnsty
128
129 * -----------------------------------------------------------------
130 * Load the basis matrix into the LU arrays.
131 * Then factorize it.
132 *
133 * modtol says if this is the first iteration after Crash.
134 * If so, we use big singularity tols to prevent getting an
135 * unnecessarily ill-conditioned starting basis.
136 * -----------------------------------------------------------------
137 minlen = nbelem*5/4
138 if (minlen .gt. lena) go to 940
139
140 call m2belm( m, n, ne, nka, a, ha, ka, kb,
141 $ z(lua), z(indc), z(indr), lena )
142
143 modtol = itn .eq. 0 .and. lcrash .gt. 0
144
145 if (modtol) then
146 utol1 = parmlu(4)
147 utol2 = parmlu(5)
148 parmlu(4) = max( utol1, eps3 )
149 parmlu(5) = max( utol2, eps3 )
150 end if
151
152 call m2bsol( 0, m, w, y, z, nwcore )
153
154 if (modtol) then
155 parmlu(4) = utol1
156 parmlu(5) = utol2
157 end if
158
159 nsing = luparm(11)
160 minlen = max( minlen, luparm(13) )
161 if (ierr .ge. 7) go to 940
162 if (ierr .ge. 2) go to 950
163 ierr = 0
164 if (nsing .eq. 0) go to 500
165
166 * -----------------------------------------------------------------
167 * The basis appears to be singular.
168 * Suspect columns are indicated by non-positive components of w(*).
169 * Replace them by the relevant slacks and try again.
170 * -----------------------------------------------------------------
171 if (ntry .gt. maxtry) go to 960
172 call m2sing( m, n, nb, w, z(ip), z(iq), bl, bu, hs, kb, xn )
173
174 * See if any superbasics slacks were made basic.
175
176 if (ns .gt. 0) then
177 ns0 = ns
178 do 410 k = 1, ns0
179 jq = 1 + ns0 - k
180 j = kb(m+jq)
181 if (hs(j) .eq. 3) then
182 call m6rdel( m, 1, nr, ns, ms,
183 $ kb, bbl, bbu, x, r, x, x, jq, .false. )
184 ns = ns - 1
185 ms = m + ns
186 end if
187 410 continue
188 if (ns .lt. ns0) r(1) = 0.0
189 end if
190
191 go to 20
192
193 * ------------------------------------------------------------------
194 * Compute the basic variables and check that A * xn = 0.
195 * If gotfac was true, ntry = 0.
196 * ------------------------------------------------------------------
197 500 gotfac = .false.
198 call m5setx( 1, m, n, nb, ms, kb,
199 $ ne, nka, a, ha, ka,
200 $ bl, bu, x, xn, y, y2, z, nwcore )
201 if (ierr .gt. 0 .and. ntry .eq. 0) go to 20
202 if (ierr .gt. 0) go to 980
203
204 * Load basic and superbasic bounds into bbl, bbu.
205
206 do 550 k = 1, ms
207 j = kb(k)
208 bbl(k) = bl(j)
209 bbu(k) = bu(j)
210 550 continue
211
212 * For Crash option 3, linear LG rows should appear to be free.
213
214 if (lcrash .eq. 3) then
215 do 600 k = 1, ms
216 j = kb(k)
217 if (j .gt. n) then
218 if (bl(j) .lt. bu(j)) then
219 bbl(k) = - plinfy
220 bbu(k) = + plinfy
221 end if
222 end if
223 600 continue
224 end if
225
226 * Normal exit.
227
228 if (idebug .eq. 100) then
229 if (iprint .gt. 0) write(iprint, 2000) (kb(k), x(k), k = 1, ms)
230 end if
231 900 return
232
233 * -------------------------------------------------
234 * Error exits.
235 * m1page( ) decides whether to write on a new page.
236 * m1page(2) also writes '=1' if GAMS.
237 * -------------------------------------------------
238
239 * Wrong number of basics.
240
241 920 ierr = 32
242 call m1page( 2 )
243 if (iprint .gt. 0) write(iprint, 1300) nbasic
244 if (isumm .gt. 0) write(isumm , 1300) nbasic
245 return
246
247 * Not enough memory.
248
249 940 ierr = 20
250 more = maxz + 3*(minlen - lena)
251 call m1page( 2 )
252 if (iprint .gt. 0) write(iprint, 1400) maxz, more
253 if (isumm .gt. 0) write(isumm , 1400) maxz, more
254 return
255
256 * Error in the LU package.
257
258 950 ierr = 21
259 call m1page( 2 )
260 if (iprint .gt. 0) write(iprint, 1500)
261 if (isumm .gt. 0) write(isumm , 1500)
262 return
263
264 * The basis is structurally singular even after the third try.
265 * Time to give up.
266
267 960 ierr = 22
268 call m1page( 2 )
269 if (iprint .gt. 0) write(iprint, 1600) ntry
270 if (isumm .gt. 0) write(isumm , 1600) ntry
271 return
272
273 * Fatal row error.
274
275 980 ierr = 10
276 return
277
278
279 1000 format(/ ' Factorize', i6,
280 $ ' Demand', i9, ' Iteration', i6,
281 $ ' Infeas', i9, ' Objective', 1p, e17.9)
282 1010 format(' Nonlinear', i6, ' Linear', i9, ' Slacks', i9,
283 $ ' Elems', i10, ' Density', f8.2)
284 1300 format(' EXIT -- system error. Wrong no. of basic variables:',
285 $ i8)
286 1400 format(' EXIT -- not enough storage for the basis factors'
287 $ // ' Words available =', i8
288 $ // ' Increase Workspace (total) to at least', i8)
289 1500 format(' EXIT -- error in basis package')
290 1600 format(' EXIT -- the basis is singular after', i4,
291 $ ' factorization attempts')
292 2000 format(/ ' BS and SB values:' // (5(i7, g17.8)))
293
294 * end of m2bfac
295 end
296
297 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
298
299 subroutine m2bmap( mode, m, n, ne, minz, maxz, nguess )
300
301 * ------------------------------------------------------------------
302 * m2bmap sets up the core allocation for the basis factors.
303 * ------------------------------------------------------------------
304
305 common /m1file/ iread,iprint,isumm
306 common /m1word/ nwordr,nwordi,nwordh
307 common /m2lu1 / minlu,maxlu,lena,nbelem,ip,iq,lenc,lenr,
308 $ locc,locr,iploc,iqloc,lua,indc,indr
309
310 intrinsic max, min
311
312 minlu = minz
313 maxlu = maxz
314 mh = (m - 1)/nwordh + 1
315 mi = (m - 1)/nwordi + 1
316 ip = minlu
317 iq = ip + mh
318 lenc = iq + mh
319 lenr = lenc + mh
320 locc = lenr + mh
321 locr = locc + mi
322 iploc = locr + mi
323 iqloc = iploc + mh
324 lua = iqloc + mh
325 lena = (maxlu - lua - 1)*nwordh/(nwordh + 2)
326 indc = lua + lena
327 indr = indc + (lena - 1)/nwordh + 1
328
329 * Estimate the number of nonzeros in the basis factorization.
330 * necola = estimate of nonzeros per column of a.
331 * We guess that the density of the basis factorization is
332 * 2 times as great, and then allow 1 more such lot for elbow room.
333 * 18 sep 1989: Tony and Alex change m to min( m, n ) below.
334
335 necola = ne / n
336 necola = max( necola, 5 )
337 mina = 3 * min( m, n ) * necola
338 nguess = lua + mina + 2*mina/nwordh
339 if (mode .ge. 3) then
340 if (iprint .gt. 0) write(iprint, 1000) lena
341 end if
342 return
343
344 1000 format(/ ' Nonzeros allowed for in LU factors', i9)
345
346 * end of m2bmap
347 end
348
349 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
350
351 subroutine m2belm( m, n, ne, nka, a, ha, ka, kb,
352 $ alu, indc, indr, lena )
353
354 implicit double precision (a-h,o-z)
355 double precision a(ne), alu(lena)
356 integer*4 ha(ne), indc(lena), indr(lena)
357 integer ka(nka), kb(m)
358
359 * ------------------------------------------------------------------
360 * m2belm extracts the basis elements from the constraint matrix,
361 * ready for use by the LU factorization routines.
362 * ------------------------------------------------------------------
363
364 parameter ( one = 1.0d+0 )
365
366 nz = 0
367 do 200 k = 1, m
368 j = kb(k)
369 if (j .eq. 0) go to 200
370 if (j .le. n) then
371 i1 = ka(j)
372 i2 = ka(j+1) - 1
373 do 100 i = i1, i2
374 ir = ha(i)
375 nz = nz + 1
376 alu(nz) = a(i)
377 indc(nz) = ir
378 indr(nz) = k
379 100 continue
380 else
381
382 * Treat slacks specially.
383
384 nz = nz + 1
385 alu(nz) = one
386 indc(nz) = j - n
387 indr(nz) = k
388 end if
389 200 continue
390
391 * end of m2belm
392 end
393
394 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
395
396 subroutine m2bsol( mode, m, w, y, z, nwcore )
397
398 implicit double precision (a-h,o-z)
399 double precision w(m), y(m), z(nwcore)
400
401 * ------------------------------------------------------------------
402 * m2bsol calls up the relevant basis-factorization routines.
403 *
404 * mode
405 * ----
406 * 0 Factorize current basis from scratch, so that B = L*U.
407 * 1 Solve L*w = w(input). y is not touched.
408 * 2 Solve L*w = w(input) and solve B*y = w(input).
409 * 3 Solve B(transpose)*y = w. Note that w is destroyed.
410 * 4 Update the LU factors when the jp-th column is replaced
411 * by a vector v. jp is in common block /m5log3/.
412 * On input, w must satisfy L*w = v. w will be destroyed.
413 * 5 Solve L(transpose)*w = w(input). y is not touched.
414 *
415 * The following tolerances are used...
416 *
417 * luparm(3) = maxcol lu1fac: maximum number of columns
418 * searched allowed in a Markowitz-type
419 * search for the next pivot element.
420 * parmlu(1) = lmax1 = maximum multiplier allowed in L during
421 * refactorization.
422 * parmlu(2) = lmax2 = maximum multiplier allowed during updates.
423 * parmlu(3) = small = minimum element kept in B or in
424 * transformed matrix during elimination.
425 * parmlu(4) = utol1 = abs tol for flagging small diagonals of U.
426 * parmlu(5) = utol2 = rel tol for flagging small diagonals of U.
427 * parmlu(6) = uspace = factor allowing waste space in row/col lists.
428 * parmlu(7) = dens1 = the density at which the Markowitz strategy
429 * should search maxcol columns and no rows.
430 * parmlu(8) = dens2 = the density at which the Markowitz strategy
431 * should search only 1 column.
432 * (In one version of lu1fac, the remaining
433 * matrix is treated as dense if there is
434 * sufficient storage.)
435 *
436 * 25 Nov 1991: parmlu(1,2,4,5,8) are now defined by the SPECS file.
437 * 12 Jun 1992: Decided lu1fac's switch to dense LU was giving
438 * trouble. Went back to treating all of LU as sparse.
439 * ------------------------------------------------------------------
440
441 common /m1eps / eps,eps0,eps1,eps2,eps3,eps4,eps5,plinfy
442 common /m1file/ iread,iprint,isumm
443 common /m2lu1 / minlu,maxlu,lena,nbelem,ip,iq,lenc,lenr,
444 $ locc,locr,iploc,iqloc,lua,indc,indr
445 common /m2lu2 / factol(5),lamin,nsing1,nsing2
446 common /m2lu3 / lenl,lenu,ncp,lrow,lcol
447 common /m2lu4 / parmlu(30),luparm(30)
448 common /m5freq/ kchk,kinv,ksav,klog,ksumm,i1freq,i2freq,msoln
449 common /m5loc / lpi ,lpi2 ,lw ,lw2 ,
450 $ lx ,lx2 ,ly ,ly2 ,
451 $ lgsub ,lgsub2,lgrd ,lgrd2 ,
452 $ lr ,lrg ,lrg2 ,lxn
453 common /m5log1/ idebug,ierr,lprint
454 common /m5log3/ djq,theta,pivot,cond,nonopt,jp,jq,modr1,modr2
455 common /m5lp2 / invrq,invitn,invmod
456
457
458 if (mode .eq. 0) then
459 * ---------------------------------------------------------------
460 * mode = 0. Factorize the basis.
461 * Note that parmlu(1,2,4,5,8) are defined by the SPECS file
462 * in m3dflt and m3key.
463 * ---------------------------------------------------------------
464 luparm(1) = iprint
465 luparm(2) = 0
466 if (idebug .eq. 51) luparm(2) = 1
467 if (idebug .eq. 52) luparm(2) = 2
468 if (iprint .le. 0) luparm(2) = -1
469 luparm(3) = 5
470 if (i1freq .gt. 0) luparm(3) = i1freq
471 * (For test purposes, i1freq in the Specs file can alter maxcol.)
472
473 parmlu(3) = eps0
474 parmlu(6) = 3.0
475 parmlu(7) = 0.3
476
477 call lu1fac( m, m , nbelem , lena , luparm , parmlu,
478 $ z(lua ), z(indc ), z(indr), z(ip) , z(iq) ,
479 $ z(lenc ), z(lenr ), z(locc), z(locr),
480 $ z(iploc), z(iqloc), z(ly ), z(ly2 ), w, inform )
481
482 lamin = luparm(13)
483 ndens1 = luparm(17)
484 ndens2 = luparm(18)
485 lenl = luparm(23)
486 lenu = luparm(24)
487 lrow = luparm(25)
488 ncp = luparm(26)
489 mersum = luparm(27)
490 nutri = luparm(28)
491 nltri = luparm(29)
492 amax = parmlu(10)
493 elmax = parmlu(11)
494 umax = parmlu(12)
495 dumin = parmlu(14)
496
497 ierr = inform
498 bdincr = (lenl + lenu - nbelem) * 100.0 / max( nbelem, 1 )
499 avgmer = mersum
500 floatm = m
501 avgmer = avgmer / floatm
502 growth = umax / (amax + eps)
503 nbump = m - nutri - nltri
504 if (iprint .gt. 0 .and. mod(lprint,10) .gt. 0) then
505 write(iprint, 1000) ncp , avgmer, lenl , lenu ,
506 $ bdincr, m , nutri, ndens1,
507 $ elmax , amax , umax , dumin ,
508 $ growth, nltri , nbump, ndens2
509 end if
510
511 else if (mode .le. 2) then
512 * ---------------------------------------------------------------
513 * mode = 1 or 2. Solve L*w = w(input).
514 * When LU*y = w is being solved in MINOS, norm(w) will sometimes
515 * be small (e.g. after periodic refactorization). Hence for
516 * mode 2 we scale parmlu(3) to alter what lu6sol thinks is small.
517 * ---------------------------------------------------------------
518 small = parmlu(3)
519 if (mode .eq. 2) parmlu(3) = small * dnorm1( m, w, 1 )
520
521 call lu6sol( 1, m, m, w, y, lena, luparm, parmlu,
522 $ z(lua ), z(indc), z(indr), z(ip), z(iq),
523 $ z(lenc), z(lenr), z(locc), z(locr), inform )
524
525 parmlu(3) = small
526
527 if (mode .eq. 2) then
528 * ------------------------------------------------------------
529 * mode = 2. Solve U*y = w.
530 * ------------------------------------------------------------
531 call lu6sol( 3, m, m, w, y, lena, luparm, parmlu,
532 $ z(lua ), z(indc), z(indr), z(ip), z(iq),
533 $ z(lenc), z(lenr), z(locc), z(locr), inform )
534 end if
535
536 else if (mode .eq. 3) then
537 * ---------------------------------------------------------------
538 * mode = 3. Solve B(transpose)*y = w.
539 * ---------------------------------------------------------------
540 call lu6sol( 6, m, m, y, w, lena, luparm, parmlu,
541 $ z(lua ), z(indc), z(indr), z(ip), z(iq),
542 $ z(lenc), z(lenr), z(locc), z(locr), inform )
543
544 else if (mode .eq. 4) then
545 * ---------------------------------------------------------------
546 * mode = 4. Update the LU factors of B after basis change.
547 * ---------------------------------------------------------------
548 invmod = invmod + 1
549 call lu8rpc( 1, 2, m, m, jp, w, w,
550 $ lena, luparm, parmlu,
551 $ z(lua ), z(indc), z(indr), z(ip), z(iq),
552 $ z(lenc), z(lenr), z(locc), z(locr),
553 $ inform, diag, wnorm )
554 if (inform .ne. 0) invrq = 7
555 lenl = luparm(23)
556 lenu = luparm(24)
557 lrow = luparm(25)
558 ncp = luparm(26)
559 end if
560
561 return
562
563 1000 format(' Compressns', i5, ' Merit', f10.2,
564 $ ' lenL', i11, ' lenU', i11, ' Increase', f7.2,
565 $ ' m ', i6, ' Ut', i6, ' d1', i6, 1p
566 $ / ' Lmax', e11.1, ' Bmax', e11.1,
567 $ ' Umax', e11.1, ' Umin', e11.1,
568 $ ' Growth',e9.1, ' Lt', i6, ' bp', i6, ' d2', i6)
569
570 * end of m2bsol
571 end
572
573 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
574
575 subroutine m2sing( m, n, nb, w, ip, iq, bl, bu, hs, kb, xn )
576
577 implicit double precision (a-h,o-z)
578 integer*4 ip(m), iq(m), hs(nb)
579 integer kb(m)
580 double precision bl(nb), bu(nb), w(m), xn(nb)
581
582 * -----------------------------------------------------------------
583 * m2sing is called if the LU factorization of the basis appears
584 * to be singular. If w(j) is not positive, the j-th basic
585 * variable kb(j) is replaced by the appropriate slack.
586 * If any kb(j) = 0, only a partial basis was supplied.
587 *
588 * 08 Apr 1992: Now generate internal values for hs(j) if necessary
589 * (-1 or 4) to be compatible with m5hs.
590 * -----------------------------------------------------------------
591
592 common /m1file/ iread,iprint,isumm
593
594 intrinsic max, min
595 parameter ( zero = 0.0d+0, nprint = 5 )
596
597 nsing = 0
598 do 100 k = 1, m
599 j = iq(k)
600 if (w(j) .gt. zero) go to 100
601 j = kb(j)
602 if (j .gt. 0 ) then
603
604 * Make variable j nonbasic (and feasible).
605 * hs(j) = -1 means xn(j) is strictly between its bounds.
606
607 if (xn(j) .le. bl(j)) then
608 xn(j) = bl(j)
609 hs(j) = 0
610 else if (xn(j) .ge. bu(j)) then
611 xn(j) = bu(j)
612 hs(j) = 1
613 else
614 hs(j) = -1
615 end if
616
617 if (bl(j) .eq. bu(j)) hs(j) = 4
618 end if
619
620 * Make the appropriate slack basic.
621
622 i = ip(k)
623 hs(n+i) = 3
624 nsing = nsing + 1
625 if (nsing .le. nprint) then
626 if (iprint .gt. 0) write(iprint, 1000) j, i
627 if (isumm .gt. 0) write(isumm , 1000) j, i
628 end if
629 100 continue
630
631 if (nsing .gt. nprint) then
632 if (iprint .gt. 0) write(iprint, 1100) nsing
633 if (isumm .gt. 0) write(isumm , 1100) nsing
634 end if
635 return
636
637 1000 format(' Column', i7, ' replaced by slack', i7)
638 1100 format(' and so on. Total slacks inserted =', i6)
639
640 * end of m2sing
641 end
642
643 ************************************************************************
644 *
645 * File LU1.sparse FORTRAN
646 *
647 * lu1fac lu1fad lu1gau lu1mar lu1pen
648 * lu1max lu1or1 lu1or2 lu1or3 lu1or4 < File LU1UTIL
649 * lu1pq1 lu1pq2 lu1pq3 lu1rec
650 *
651 *
652 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
653
654 subroutine lu1fac( m , n , nelem, lena , luparm, parmlu,
655 $ a , indc , indr , ip , iq ,
656 $ lenc , lenr , locc , locr ,
657 $ iploc, iqloc, ipinv, iqinv, w , inform )
658
659 implicit double precision (a-h,o-z)
660 integer luparm(30)
661 double precision parmlu(30), a(lena) , w(n)
662 integer*4 indc(lena), indr(lena), ip(m) , iq(n),
663 $ lenc(n) , lenr(m) ,
664 $ iploc(n) , iqloc(m) , ipinv(m), iqinv(n)
665 integer locc(n) , locr(m)
666
667 * ------------------------------------------------------------------
668 * lu1fac computes a factorization A = L*U, where A is a sparse
669 * matrix with m rows and n columns, P*L*P' is lower triangular
670 * and P*U*Q is upper triangular for certain permutations P, Q
671 * (which are returned in the arrays ip, iq).
672 * Stability is ensured by limiting the size of the elements of L.
673 *
674 * The nonzeros of A are input via the parallel arrays a, indc, indr
675 * which should contain nelem entries of the form aij, i, j
676 * in any order.
677 *
678 * ******************************************************************
679 * * Beware !!! The row indices i must be in indc, *
680 * * and the column indices j must be in indr. *
681 * ******************************************************************
682 *
683 * It does not matter if some of the entries in a(*) are zero.
684 * Entries satisfying abs( a(i) ) .le. parmlu(3) are ignored.
685 * Other parameters in luparm and parmlu are described below.
686 *
687 * The matrix A may be singular. On exit, nsing = luparm(11) gives
688 * the number of apparent singularities. This is the number of
689 * "small" diagonals of the permuted factor U, as judged by
690 * the input tolerances utol1 = parmlu(4) and utol2 = parmlu(5).
691 * The diagonal element diagj associated with column j of A is
692 * "small" if
693 * abs( diagj ) .le. utol1
694 * or
695 * abs( diagj ) .le. utol2 * max( uj ),
696 *
697 * where max( uj ) is the maximum element in the j-th column of U.
698 * The position of such elements is returned in w(*). In general,
699 * w(j) = + max( uj ), but if column j is a singularity,
700 * w(j) = - max( uj ). Thus, w(j) .le. 0 if column j appears to be
701 * dependent on the other columns of A.
702 * ==================================================================
703 *
704 *
705 * Notes on the array names
706 * ------------------------
707 *
708 * During the LU factorization, the sparsity pattern of the matrix
709 * being factored is stored twice: in a column list and a row list.
710 *
711 * The column list is ( a, indc, locc, lenc )
712 * where
713 * a(*) holds the nonzeros,
714 * indc(*) holds the indices for the column list,
715 * locc(j) points to the start of column j in a(*) and indc(*),
716 * lenc(j) is the number of nonzeros in column j.
717 *
718 * The row list is ( indr, locr, lenr )
719 * where
720 * indr(*) holds the indices for the row list,
721 * locr(i) points to the start of row i in indr(*),
722 * lenr(i) is the number of nonzeros in row i.
723 *
724 *
725 * At all stages of the LU factorization, ip contains a complete
726 * row permutation. At the start of stage k, ip(1), ..., ip(k-1)
727 * are the first k-1 rows of the final row permutation P.
728 * The remaining rows are stored in an ordered list
729 * ( ip, iploc, ipinv )
730 * where
731 * iploc(nz) points to the start in ip(*) of the set of rows
732 * that currently contain nz nonzeros,
733 * ipinv(i) points to the position of row i in ip(*).
734 *
735 * For example,
736 * iploc(1) = k (and this is where rows of length 1 begin),
737 * iploc(2) = k+p if there are p rows of length 1
738 * (and this is where rows of length 2 begin).
739 *
740 * Similarly for iq, iqloc, iqinv.
741 * ==================================================================
742 *
743 *
744 * 00 Jun 1983 Original version.
745 * 00 Jul 1987 nrank saved in luparm(16).
746 * 12 Apr 1989 ipinv, iqinv added as workspace.
747 * 26 Apr 1989 maxtie replaced by maxcol in Markowitz search.
748 * 16 Mar 1992 jumin saved in luparm(19).
749 * 10 Jun 1992 lu1fad has to move empty rows and cols to the bottom
750 * (via lu1pq3) before doing the dense LU.
751 * 12 Jun 1992 Deleted dense LU (lu1ful and lu1vlu).
752 *
753 * Systems Optimization Laboratory, Stanford University.
754 * ---------------------------------------------------------------------
755 *
756 * Input parameters Typical value
757 *
758 * luparm( 1) = nout File number for printed messages. 6
759 * luparm( 2) = lprint Print level. 0
760 * < 0 suppresses output.
761 * = 0 gives error messages.
762 * = 1 gives debug output from some of the
763 * other routines in LUSOL.
764 * >= 2 gives the pivot row and column and the
765 * no. of rows and columns involved at
766 * each elimination step in lu1fac.
767 * luparm( 3) = maxcol lu1fac: maximum number of columns 5
768 * searched allowed in a Markowitz-type
769 * search for the next pivot element.
770 * For some of the factorization, the
771 * number of rows searched is
772 * maxrow = maxcol - 1.
773 *
774 *
775 * Output parameters
776 *
777 * luparm(10) = inform Return code from last call to any LU routine.
778 * luparm(11) = nsing No. of singularities marked in the
779 * output array w(*).
780 * luparm(12) = jsing Column index of last singularity.
781 * luparm(13) = minlen Minimum recommended value for lena.
782 * luparm(14) = maxlen ?
783 * luparm(15) = nupdat No. of updates performed by the lu8 routines.
784 * luparm(16) = nrank No. of nonempty rows of U.
785 * luparm(17) = ndens1 No. of columns remaining when the density of
786 * the matrix being factorized reached dens1.
787 * luparm(18) = ndens2 No. of columns remaining when the density of
788 * the matrix being factorized reached dens2.
789 * luparm(19) = jumin The column index associated with dumin.
790 * luparm(20) = numl0 No. of columns in initial L.
791 * luparm(21) = lenl0 Size of initial L (no. of nonzeros).
792 * luparm(22) = lenu0 Size of initial U.
793 * luparm(23) = lenl Size of current L.
794 * luparm(24) = lenu Size of current U.
795 * luparm(25) = lrow Length of row file.
796 * luparm(26) = ncp No. of compressions of LU data structures.
797 * luparm(27) = mersum lu1fac: sum of Markowitz merit counts.
798 * luparm(28) = nutri lu1fac: triangular rows in U.
799 * luparm(29) = nltri lu1fac: triangular rows in L.
800 * luparm(30) =
801 *
802 *
803 * Input parameters Typical value
804 *
805 * parmlu( 1) = elmax1 Max multiplier allowed in L 10.0
806 * during factor.
807 * parmlu( 2) = elmax2 Max multiplier allowed in L 10.0
808 * during updates.
809 * parmlu( 3) = small Absolute tolerance for eps**0.8
810 * treating reals as zero. IBM double: 3.0d-13
811 * parmlu( 4) = utol1 Absolute tol for flagging eps**0.66667
812 * small diagonals of U. IBM double: 3.7d-11
813 * parmlu( 5) = utol2 Relative tol for flagging eps**0.66667
814 * small diagonals of U. IBM double: 3.7d-11
815 * parmlu( 6) = uspace Factor limiting waste space in U. 3.0
816 * In lu1fac, the row or column lists
817 * are compressed if their length
818 * exceeds uspace times the length of
819 * either file after the last compression.
820 * parmlu( 7) = dens1 The density at which the Markowitz 0.3
821 * strategy should search maxcol columns
822 * and no rows.
823 * parmlu( 8) = dens2 the density at which the Markowitz 0.6
824 * strategy should search only 1 column
825 * or (preferably) use a dense LU for
826 * all the remaining rows and columns.
827 *
828 *
829 * Output parameters
830 *
831 * parmlu(10) = amax Maximum element in A.
832 * parmlu(11) = elmax Maximum multiplier in current L.
833 * parmlu(12) = umax Maximum element in current U.
834 * parmlu(13) = dumax Maximum diagonal in U.
835 * parmlu(14) = dumin Minimum diagonal in U.
836 * parmlu(15) =
837 * parmlu(16) =
838 * parmlu(17) =
839 * parmlu(18) =
840 * parmlu(19) =
841 * parmlu(20) = resid lu6sol: residual after solve with U or U'.
842 * ...
843 * parmlu(30) =
844 * ---------------------------------------------------------------------
845
846 parameter ( zero = 0.0d+0 )
847
848 * Grab relevant input parameters.
849
850 nout = luparm(1)
851 lprint = luparm(2)
852 small = parmlu(3)
853
854 * Initialize output parameters.
855
856 inform = 0
857 minlen = nelem + 2*(m + n)
858 nrank = 0
859 numl0 = 0
860 lenl = 0
861 lenu = 0
862 lrow = 0
863 lcol = nelem
864 mersum = 0
865 nutri = m
866 nltri = 0
867 amax = 1.0
868
869 * Initialize workspace parameters.
870
871 luparm(26) = 0
872 if (lena .lt. minlen) go to 970
873
874
875 * ------------------------------------------------------------------
876 * Organize the aij's in a, indc, indr.
877 * lu1or1 deletes small entries, tests for illegal i,j's,
878 * and counts the nonzeros in each row and column.
879 * lu1or2 reorders the elements of A by columns.
880 * lu1or3 uses the column list to test for duplicate entries
881 * (same indices i,j).
882 * lu1or4 constructs a row list from the column list.
883 * ------------------------------------------------------------------
884 call lu1or1( m , n , nelem, small,
885 $ a , indc , indr , lenc , lenr,
886 $ amax, numnz, lerr , inform )
887 if (inform .ne. 0) go to 930
888
889 nelem = numnz
890 if (nelem .gt. 0) then
891 call lu1or2( n, nelem, a, indc, indr, lenc, locc )
892 call lu1or3( m, n, nelem, indc, lenc, locc, w,
893 $ lerr, inform )
894 if (inform .ne. 0) go to 940
895
896 call lu1or4( m, n, nelem,
897 $ indc, indr, lenc, lenr, locc, locr )
898 end if
899
900 * ------------------------------------------------------------------
901 * Set up lists of rows and columns with equal numbers of nonzeros,
902 * using indc(*) as workspace.
903 * Then compute the factorization A = L*U.
904 * ------------------------------------------------------------------
905 call lu1pq1( m, n, lenr, ip, iploc, ipinv, indc(nelem + 1) )
906 call lu1pq1( n, m, lenc, iq, iqloc, iqinv, indc(nelem + 1) )
907
908 call lu1fad( m , n , nelem, lena , luparm, parmlu,
909 $ a , indc , indr , ip , iq ,
910 $ lenc , lenr , locc , locr ,
911 $ iploc , iqloc, ipinv, iqinv ,
912 $ inform, lenl , lenu , minlen, mersum,
913 $ nutri , nltri )
914
915 if (inform .eq. 7) go to 970
916 if (inform .gt. 0) go to 980
917
918 * Move empty rows and cols to the end of the permutations ip, iq.
919
920 call lu1pq3( m, lenr, ip, mrank )
921 call lu1pq3( n, lenc, iq, nrank )
922 nrank = min( mrank, nrank )
923
924 * ------------------------------------------------------------------
925 * The LU factors are at the top of a, indc, indr,
926 * with the columns of L and the rows of U in the order
927 *
928 * ( free ) ... ( u3 ) ( l3 ) ( u2 ) ( l2 ) ( u1 ) ( l1 ).
929 *
930 * Starting with ( l1 ) and ( u1 ), move the rows of U to the
931 * left and the columns of L to the right, giving
932 *
933 * ( u1 ) ( u2 ) ( u3 ) ... ( free ) ... ( l3 ) ( l2 ) ( l1 ).
934 *
935 * Also, set numl0 = the number of nonempty columns of U.
936 * ------------------------------------------------------------------
937 lu = 0
938 ll = lena + 1
939 lm = ll
940 ltopl = ll - lenl - lenu
941 lrow = lenu
942
943 do 360 k = 1, nrank
944 i = ip(k)
945 lenuk = - lenr(i)
946 lenr(i) = lenuk
947 j = iq(k)
948 lenlk = - lenc(j) - 1
949 if (lenlk .gt. 0) then
950 numl0 = numl0 + 1
951 iqloc(numl0) = lenlk
952 end if
953
954 if (lu + lenuk .lt. ltopl) then
955 * ============================================================
956 * There is room to move ( uk ). Just right-shift ( lk ).
957 * ============================================================
958 do 310 idummy = 1, lenlk
959 ll = ll - 1
960 lm = lm - 1
961 a(ll) = a(lm)
962 indc(ll) = indc(lm)
963 indr(ll) = indr(lm)
964 310 continue
965 else
966 * ============================================================
967 * There is no room for ( uk ) yet. We have to
968 * right-shift the whole of the remaining LU file.
969 * Note that ( lk ) ends up in the correct place.
970 * ============================================================
971 llsave = ll - lenlk
972 nmove = lm - ltopl
973
974 do 330 idummy = 1, nmove
975 ll = ll - 1
976 lm = lm - 1
977 a(ll) = a(lm)
978 indc(ll) = indc(lm)
979 indr(ll) = indr(lm)
980 330 continue
981
982 ltopl = ll
983 ll = llsave
984 lm = ll
985 end if
986
987 * =========================================================
988 * Left-shift ( uk ).
989 * =========================================================
990 locr(i) = lu + 1
991 l2 = lm - 1
992 lm = lm - lenuk
993
994 do 350 l = lm, l2
995 lu = lu + 1
996 a(lu) = a(l)
997 indr(lu) = indr(l)
998 350 continue
999 360 continue
1000
1001 * ------------------------------------------------------------------
1002 * Save the lengths of the nonempty columns of L,
1003 * and initialize locc(j) for the LU update routines.
1004 * ------------------------------------------------------------------
1005 do 370 k = 1, numl0
1006 lenc(k) = iqloc(k)
1007 370 continue
1008
1009 do 390 j = 1, n
1010 locc(j) = 0
1011 390 continue
1012
1013 * ------------------------------------------------------------------
1014 * Test for singularity.
1015 * lu6chk sets nsing, jsing, jumin, elmax, umax, dumax, dumin.
1016 * inform = 1 if there are singularities (nsing gt 0).
1017 * ------------------------------------------------------------------
1018 luparm(16) = nrank
1019 luparm(23) = lenl
1020 call lu6chk( 1, m, n, w, lena, luparm, parmlu,
1021 $ a, indc, indr, ip, iq,
1022 $ lenc, lenr, locc, locr, inform )
1023 go to 990
1024
1025 * ------------
1026 * Error exits.
1027 * ------------
1028 930 inform = 3
1029 if (lprint .ge. 0) write(nout, 1300) lerr, indc(lerr), indr(lerr)
1030 go to 990
1031
1032 940 inform = 4
1033 if (lprint .ge. 0) write(nout, 1400) lerr, indc(lerr), indr(lerr)
1034 go to 990
1035
1036 970 inform = 7
1037 if (lprint .ge. 0) write(nout, 1700) lena, minlen
1038 go to 990
1039
1040 980 inform = 8
1041 if (lprint .ge. 0) write(nout, 1800)
1042
1043 * Store output parameters.
1044
1045 990 luparm(10) = inform
1046 luparm(13) = minlen
1047 luparm(15) = 0
1048 luparm(16) = nrank
1049 luparm(20) = numl0
1050 luparm(21) = lenl
1051 luparm(22) = lenu
1052 luparm(23) = lenl
1053 luparm(24) = lenu
1054 luparm(25) = lrow
1055 luparm(27) = mersum
1056 luparm(28) = nutri
1057 luparm(29) = nltri
1058 parmlu(10) = amax
1059 return
1060
1061 1300 format(/ ' lu1fac error... entry a(', i8, ') has an illegal',
1062 $ ' row or column index'
1063 $ //' indc, indr =', 2i8)
1064 1400 format(/ ' lu1fac error... entry a(', i8, ') has the same',
1065 $ ' indices as an earlier entry'
1066 $ //' indc, indr =', 2i8)
1067 1700 format(/ ' lu1fac error... insufficient storage'
1068 $ //' Increase lena from', i8, ' to at least', i8)
1069 1800 format(/ ' lu1fac error... fatal bug',
1070 $ ' (sorry --- this should never happen)')
1071 * end of lu1fac
1072 end
1073
1074 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1075
1076 subroutine lu1fad( m , n , nelem, lena , luparm, parmlu,
1077 $ a , indc , indr , ip , iq ,
1078 $ lenc , lenr , locc , locr ,
1079 $ iploc , iqloc, ipinv, iqinv ,
1080 $ inform, lenl , lenu , minlen, mersum,
1081 $ nutri , nltri )
1082
1083 implicit double precision (a-h,o-z)
1084 integer luparm(30)
1085 double precision parmlu(30), a(lena)
1086 integer*4 indc(lena), indr(lena), ip(m), iq(n)
1087 integer*4 lenc(n) , lenr(m)
1088 integer locc(n) , locr(m)
1089 integer*4 iploc(n) , iqloc(m), ipinv(m), iqinv(n)
1090
1091 * ------------------------------------------------------------------
1092 * lu1fad is a driver for the numerical phase of lu1fac.
1093 * At each stage it computes a column of L and a row of U,
1094 * using a Markowitz criterion to select the pivot element,
1095 * subject to a stability criterion that bounds the elements of L.
1096 *
1097 * 00 Jan 1986 Version documented in LUSOL paper:
1098 * Gill, Murray, Saunders and Wright (1987),
1099 * Maintaining LU factors of a general sparse matrix,
1100 * Linear algebra and its applications 88/89, 239-270.
1101 *
1102 * 02 Feb 1989 Following Suhl and Aittoniemi (1987), the largest
1103 * element in each column is now kept at the start of
1104 * the column, i.e. in position locc(j) of a and indc.
1105 * This should speed up the Markowitz searches.
1106 * To save time on highly triangular matrices, we wait
1107 * until there are no further columns of length 1
1108 * before setting and maintaining that property.
1109 *
1110 * 12 Apr 1989 ipinv and iqinv added (inverses of ip and iq)
1111 * to save searching ip and iq for rows and columns
1112 * altered in each elimination step. (Used in lu1pq2)
1113 *
1114 * 19 Apr 1989 Code segmented to reduce its size.
1115 * lu1gau does most of the Gaussian elimination work.
1116 * lu1mar does just the Markowitz search.
1117 * lu1max moves biggest elements to top of columns.
1118 * lu1pen deals with pending fill-in in the row list.
1119 * lu1pq2 updates the row and column permutations.
1120 *
1121 * 26 Apr 1989 maxtie replaced by maxcol, maxrow in the Markowitz
1122 * search. maxcol, maxrow change as density increases.
1123 *
1124 * Systems Optimization Laboratory, Stanford University.
1125 * ------------------------------------------------------------------
1126
1127 intrinsic abs , min , max
1128 logical utri , ltri, spars1, spars2, dense
1129 double precision lmax, small
1130
1131 double precision zero, one,
1132 $ lmin
1133 parameter ( zero = 0.0d+0, one = 1.0d+0,
1134 $ lmin = 1.0001d+0 )
1135
1136 * ------------------------------------------------------------------
1137 * Local variables
1138 * ---------------
1139 *
1140 * lcol is the length of the column file. It points to the last
1141 * nonzero in the column list.
1142 * lrow is the analogous quantity for the row file.
1143 * lfile is the file length (lcol or lrow) after the most recent
1144 * compression of the column list or row list.
1145 * nrowd and ncold are the number of rows and columns in the
1146 * matrix defined by the pivot column and row. They are the
1147 * dimensions of the submatrix D being altered at this stage.
1148 * melim and nelim are the number of rows and columns in the
1149 * same matrix D, excluding the pivot column and row.
1150 * mleft and nleft are the number of rows and columns
1151 * still left to be factored.
1152 * nzchng is the increase in nonzeros in the matrix that remains
1153 * to be factored after the current elimination
1154 * (usually negative).
1155 * nzleft is the number of nonzeros still left to be factored.
1156 * nspare is the space we leave at the end of the last row or
1157 * column whenever a row or column is being moved to the end
1158 * of its file. nspare = 1 or 2 might help reduce the
1159 * number of file compressions when storage is tight.
1160 *
1161 * The row and column ordering permutes A into the form
1162 *
1163 * ------------------------
1164 * \ |
1165 * \ U1 |
1166 * \ |
1167 * --------------------
1168 * |\
1169 * | \
1170 * | \
1171 * P A Q = | \
1172 * | \
1173 * | --------------
1174 * | | |
1175 * | | |
1176 * | L1 | A2 |
1177 * | | |
1178 * | | |
1179 * --------------------
1180 *
1181 * where the block A2 is factored as A2 = L2 U2.
1182 * The phases of the factorization are as follows.
1183 *
1184 * utri is true when U1 is being determined.
1185 * Any column of length 1 is accepted immediately.
1186 *
1187 * ltri is true when L1 is being determined.
1188 * lu1mar exits as soon as an acceptable pivot is found
1189 * in a row of length 1.
1190 *
1191 * spars1 is true while the density of the (modified) A2 is less
1192 * than the parameter dens1 = parmlu(7) = 0.3 say.
1193 * lu1mar searches maxcol columns and maxrow rows,
1194 * where maxcol = luparm(3), maxrow = maxcol - 1.
1195 * lu1max is used to keep the biggest element at the top
1196 * of all remaining columns.
1197 *
1198 * spars2 is true while the density of the modified A2 is less
1199 * than the parameter dens2 = parmlu(8) = 0.6 say.
1200 * lu1mar searches maxcol columns and no rows.
1201 * lu1max fixes up only the first maxcol columns.
1202 *
1203 * dense is true once the density of A2 reaches dens2.
1204 * lu1mar searches only 1 column (the shortest).
1205 * lu1max fixes up only the first column.
1206 *
1207 * ------------------------------------------------------------------
1208 * To eliminate timings, comment out all lines containing "time".
1209 * ------------------------------------------------------------------
1210
1211 * integer eltime, mktime
1212
1213 * call timer ( 'start ', 3 )
1214 * ntime = (n / 4.0) + lmin
1215
1216 nout = luparm(1)
1217 lprint = luparm(2)
1218 maxcol = luparm(3)
1219 maxrow = maxcol - 1
1220 lenl = 0
1221 lenu = 0
1222 lfile = nelem
1223 lrow = nelem
1224 lcol = nelem
1225 lu1 = lena + 1
1226 minmn = min( m, n )
1227 maxmn = max( m, n )
1228 nzleft = nelem
1229 nspare = 1
1230
1231 lmax = parmlu(1)
1232 small = parmlu(3)
1233 uspace = parmlu(6)
1234 dens1 = parmlu(7)
1235 dens2 = parmlu(8)
1236 utri = .true.
1237 ltri = .false.
1238 spars1 = .false.
1239 spars2 = .false.
1240 dense = .false.
1241
1242 * Check parameters.
1243
1244 lmax = max( lmax, lmin )
1245
1246 * Initialize output parameters.
1247
1248 ndens1 = 0
1249 ndens2 = 0
1250
1251 * ------------------------------------------------------------------
1252 * Start of main loop.
1253 * ------------------------------------------------------------------
1254
1255 do 800 nrowu = 1, minmn
1256
1257 * mktime = (nrowu / ntime) + 4
1258 * eltime = (nrowu / ntime) + 9
1259 mleft = m + 1 - nrowu
1260 nleft = n + 1 - nrowu
1261
1262 * Bail out if there are no nonzero rows left.
1263
1264 if (iploc(1) .gt. m) go to 900
1265
1266 * ===============================================================
1267 * Find a suitable pivot element.
1268 * ===============================================================
1269
1270 if ( utri ) then
1271 * ------------------------------------------------------------
1272 * So far all columns have had length 1.
1273 * We are still looking for the (backward) triangular part of A
1274 * that forms the first rows and columns of U.
1275 * ------------------------------------------------------------
1276 lq1 = iqloc(1)
1277 lq2 = n
1278 if (m .gt. 1) lq2 = iqloc(2) - 1
1279
1280 if (lq1 .le. lq2) then
1281
1282 * We still have a column of length 1. Grab it!
1283
1284 jbest = iq(lq1)
1285 lc = locc(jbest)
1286 ibest = indc(lc)
1287 mbest = 0
1288 else
1289
1290 * This is the end of the U triangle.
1291 * Move the largest elements to the top of each column
1292 * to make the remaining Markowitz searches more efficient.
1293 * We will not return to this part of the code.
1294
1295 utri = .false.
1296 ltri = .true.
1297 spars1 = .true.
1298 nutri = nrowu - 1
1299 call lu1max( lq1, n, iq, a, indc, lenc, locc )
1300 end if
1301 end if
1302
1303 if ( spars1 ) then
1304 * ------------------------------------------------------------
1305 * Perform a normal Markowitz search.
1306 * Search cols of length 1, then rows of length 1,
1307 * then cols of length 2, then rows of length 2, etc.
1308 * ------------------------------------------------------------
1309 * call timer ( 'start ', mktime )
1310 call lu1mar( m , n , lena , maxmn,
1311 $ lmax , maxcol, maxrow,
1312 $ ibest, jbest , mbest ,
1313 $ a , indc , indr , ip , iq,
1314 $ lenc , lenr , locc , locr ,
1315 $ iploc, iqloc )
1316 * call timer ( 'finish', mktime )
1317
1318 if ( ltri ) then
1319
1320 * So far all rows have had length 1.
1321 * We are still looking for the (forward) triangle of A
1322 * that forms the first rows and columns of L.
1323
1324 if (mbest .gt. 0) then
1325 ltri = .false.
1326 nltri = nrowu - 1 - nutri
1327 end if
1328 else
1329
1330 * See if what's left is as dense as dens1.
1331
1332 if (nzleft .ge. mleft * nleft * dens1) then
1333 spars1 = .false.
1334 spars2 = .true.
1335 ndens1 = nleft
1336 maxrow = 0
1337 end if
1338 end if
1339
1340 else if ( spars2 .or. dense ) then
1341 * ------------------------------------------------------------
1342 * Perform a restricted Markowitz search,
1343 * looking at only the first maxcol columns. (maxrow = 0.)
1344 * ------------------------------------------------------------
1345 * call timer ( 'start ', mktime )
1346 call lu1mar( m , n , lena , maxmn,
1347 $ lmax , maxcol, maxrow,
1348 $ ibest, jbest , mbest ,
1349 $ a , indc , indr , ip , iq,
1350 $ lenc , lenr , locc , locr ,
1351 $ iploc, iqloc )
1352 * call timer ( 'finish', mktime )
1353
1354 * See if what's left is as dense as dens2.
1355
1356 if ( spars2 ) then
1357 if (nzleft .ge. mleft * nleft * dens2) then
1358 spars2 = .false.
1359 dense = .true.
1360 ndens2 = nleft
1361 maxcol = 1
1362 end if
1363 end if
1364 end if
1365
1366 * ===============================================================
1367 * The best aij has been found.
1368 * The pivot row ibest and the pivot column jbest
1369 * Define a dense matrix D of size nrowd by ncold.
1370 * ===============================================================
1371 ncold = lenr(ibest)
1372 nrowd = lenc(jbest)
1373 melim = nrowd - 1
1374 nelim = ncold - 1
1375 mersum = mersum + mbest
1376 lenl = lenl + melim
1377 lenu = lenu + ncold
1378 if (lprint .ge. 2)
1379 $ write(nout, 1100) nrowu, ibest, jbest, nrowd, ncold
1380
1381 * ===============================================================
1382 * Allocate storage for the next column of L and next row of U.
1383 * Initially the top of a, indc, indr are used as follows:
1384 *
1385 * ncold melim ncold melim
1386 *
1387 * a |...........|...........|ujbest..ujn|li1......lim|
1388 *
1389 * indc |...........| lenr(i) | lenc(j) | markl(i) |
1390 *
1391 * indr |...........| iqloc(i) | jfill(j) | ifill(i) |
1392 *
1393 * ^ ^ ^ ^ ^
1394 * lfree lsave lu1 ll1 oldlu1
1395 *
1396 * Later the correct indices are inserted:
1397 *
1398 * indc | | | |i1........im|
1399 *
1400 * indr | | |jbest....jn|ibest..ibest|
1401 *
1402 * ===============================================================
1403 ll1 = lu1 - melim
1404 lu1 = ll1 - ncold
1405 lsave = lu1 - nrowd
1406 lfree = lsave - ncold
1407
1408 * Make sure the column file has room.
1409 * Also force a compression if its length exceeds a certain limit.
1410
1411 limit = uspace * lfile + m + n + 1000
1412 minfre = ncold + melim
1413 nfree = lfree - lcol
1414 if (nfree .lt. minfre .or. lcol .gt. limit) then
1415 call lu1rec( n, .true., luparm,
1416 $ lcol, lena, a, indc, lenc, locc )
1417 lfile = lcol
1418 nfree = lfree - lcol
1419 if (nfree .lt. minfre) go to 970
1420 end if
1421
1422 * Make sure the row file has room.
1423
1424 minfre = melim + ncold
1425 nfree = lfree - lrow
1426 if (nfree .lt. minfre .or. lrow .gt. limit) then
1427 call lu1rec( m, .false., luparm,
1428 $ lrow, lena, a, indr, lenr, locr )
1429 lfile = lrow
1430 nfree = lfree - lrow
1431 if (nfree .lt. minfre) go to 970
1432 end if
1433
1434 * ===============================================================
1435 * Move the pivot element to the front of its row
1436 * and to the top of its column.
1437 * ===============================================================
1438 lpivr = locr(ibest)
1439 lpivr1 = lpivr + 1
1440 lpivr2 = lpivr + nelim
1441
1442 do 330 l = lpivr, lpivr2
1443 if (indr(l) .eq. jbest) go to 335
1444 330 continue
1445
1446 335 indr(l) = indr(lpivr)
1447 indr(lpivr) = jbest
1448
1449 lpivc = locc(jbest)
1450 lpivc1 = lpivc + 1
1451 lpivc2 = lpivc + melim
1452
1453 do 340 l = lpivc, lpivc2
1454 if (indc(l) .eq. ibest) go to 345
1455 340 continue
1456
1457 345 indc(l) = indc(lpivc)
1458 indc(lpivc) = ibest
1459 abest = a(l)
1460 a(l) = a(lpivc)
1461 a(lpivc) = abest
1462
1463 * ===============================================================
1464 * Delete the pivot row from the column file
1465 * and store it as the next row of U.
1466 * set indr(lu) = 0 to initialize jfill ptrs on columns of D,
1467 * indc(lu) = lenj to save the original column lengths.
1468 * ===============================================================
1469 a(lu1) = abest
1470 indr(lu1) = jbest
1471 indc(lu1) = nrowd
1472 lu = lu1
1473
1474 do 360 lr = lpivr1, lpivr2
1475 lu = lu + 1
1476 j = indr(lr)
1477 lenj = lenc(j)
1478 lenc(j) = lenj - 1
1479 lc1 = locc(j)
1480 last = lc1 + lenc(j)
1481
1482 do 350 l = lc1, last
1483 if (indc(l) .eq. ibest) go to 355
1484 350 continue
1485
1486 355 a(lu) = a(l)
1487 indr(lu) = 0
1488 indc(lu) = lenj
1489 a(l) = a(last)
1490 indc(l) = indc(last)
1491 indc(last) = 0
1492 360 continue
1493
1494 if (indc(lcol) .eq. 0) lcol = lcol - 1
1495
1496 * ===============================================================
1497 * Delete the pivot column from the row file
1498 * and store the nonzeros of the next column of L.
1499 * Set indc(ll) = 0 to initialize markl(*) markers,
1500 * indr(ll) = 0 to initialize ifill(*) row fill-in cntrs,
1501 * indc(ls) = leni to save the original row lengths,
1502 * indr(ls) = iqloc(i) to save parts of iqloc(*),
1503 * iqloc(i) = lsave - ls to point to the nonzeros of L
1504 * = -1, -2, -3, ... in mark(*).
1505 * ===============================================================
1506 indc(lsave) = ncold
1507 if (melim .eq. 0) go to 700
1508
1509 ll = ll1 - 1
1510 ls = lsave
1511 abest = one / abest
1512
1513 do 390 lc = lpivc1, lpivc2
1514 ll = ll + 1
1515 ls = ls + 1
1516 i = indc(lc)
1517 leni = lenr(i)
1518 lenr(i) = leni - 1
1519 lr1 = locr(i)
1520 last = lr1 + lenr(i)
1521
1522 do 380 l = lr1, last
1523 if (indr(l) .eq. jbest) go to 385
1524 380 continue
1525
1526 385 indr(l) = indr(last)
1527 indr(last) = 0
1528
1529 a(ll) = - a(lc) * abest
1530 indc(ll) = 0
1531 indr(ll) = 0
1532 indc(ls) = leni
1533 indr(ls) = iqloc(i)
1534 iqloc(i) = lsave - ls
1535 390 continue
1536
1537 if (indr(lrow) .eq. 0) lrow = lrow - 1
1538
1539 * ===============================================================
1540 * Do the Gaussian elimination.
1541 * This involves adding a multiple of the pivot column
1542 * to all other columns in the pivot row.
1543 *
1544 * Sometimes more than one call to lu1gau is needed to allow
1545 * compression of the column file.
1546 * lfirst says which column the elimination should start with.
1547 * minfre is a bound on the storage needed for any one column.
1548 * lu points to off-diagonals of u.
1549 * nfill keeps track of pending fill-in in the row file.
1550 * ===============================================================
1551 if (nelim .eq. 0) go to 700
1552 lfirst = lpivr1
1553 minfre = mleft + nspare
1554 lu = 1
1555 nfill = 0
1556
1557 * 400 call timer ( 'start ', eltime )
1558 400 call lu1gau( m , melim , ncold , nspare, small,
1559 $ lpivc1, lpivc2, lfirst, lpivr2, lfree, minfre,
1560 $ lrow , lcol , lu , nfill ,
1561 $ a , indc , indr ,
1562 $ lenc , lenr , locc , locr ,
1563 $ iqloc , a(ll1), indc(ll1),
1564 $ a(lu1), indr(ll1), indr(lu1) )
1565 * call timer ( 'finish', eltime )
1566
1567 if (lfirst .gt. 0) then
1568
1569 * The elimination was interrupted.
1570 * Compress the column file and try again.
1571 * lfirst, lu and nfill have appropriate new values.
1572
1573 call lu1rec( n, .true., luparm,
1574 $ lcol, lena, a, indc, lenc, locc )
1575 lfile = lcol
1576 lpivc = locc(jbest)
1577 lpivc1 = lpivc + 1
1578 lpivc2 = lpivc + melim
1579 nfree = lfree - lcol
1580 if (nfree .lt. minfre) go to 970
1581 go to 400
1582 end if
1583
1584 * ===============================================================
1585 * The column file has been fully updated.
1586 * Deal with any pending fill-in in the row file.
1587 * ===============================================================
1588 if (nfill .gt. 0) then
1589
1590 * Compress the row file if necessary.
1591 * lu1gau has set nfill to be the number of pending fill-ins
1592 * plus the current length of any rows that need to be moved.
1593
1594 minfre = nfill
1595 nfree = lfree - lrow
1596 if (nfree .lt. minfre) then
1597 call lu1rec( m, .false., luparm,
1598 $ lrow, lena, a, indr, lenr, locr )
1599 lfile = lrow
1600 lpivr = locr(ibest)
1601 lpivr1 = lpivr + 1
1602 lpivr2 = lpivr + nelim
1603 nfree = lfree - lrow
1604 if (nfree .lt. minfre) go to 970
1605 end if
1606
1607 * Move rows that have pending fill-in to end of the row file.
1608 * Then insert the fill-in.
1609
1610 call lu1pen( m , melim , ncold , nspare,
1611 $ lpivc1, lpivc2, lpivr1, lpivr2, lrow,
1612 $ lenc , lenr , locc , locr ,
1613 $ indc , indr , indr(ll1), indr(lu1) )
1614 end if
1615
1616 * ===============================================================
1617 * Restore the saved values of iqloc.
1618 * Insert the correct indices for the col of L and the row of U.
1619 * ===============================================================
1620 700 lenr(ibest) = 0
1621 lenc(jbest) = 0
1622
1623 ll = ll1 - 1
1624 ls = lsave
1625
1626 do 710 lc = lpivc1, lpivc2
1627 ll = ll + 1
1628 ls = ls + 1
1629 i = indc(lc)
1630 iqloc(i) = indr(ls)
1631 indc(ll) = i
1632 indr(ll) = ibest
1633 710 continue
1634
1635 lu = lu1 - 1
1636
1637 do 720 lr = lpivr, lpivr2
1638 lu = lu + 1
1639 indr(lu) = indr(lr)
1640 720 continue
1641
1642 * ===============================================================
1643 * Free the space occupied by the pivot row
1644 * and update the column permutation.
1645 * Then free the space occupied by the pivot column
1646 * and update the row permutation.
1647 *
1648 * nzchng is found in both calls to lu1pq2, but we use it only
1649 * after the second.
1650 * ===============================================================
1651 call lu1pq2( ncold, nzchng,
1652 $ indr(lpivr), indc( lu1 ), lenc, iqloc, iq, iqinv )
1653
1654 call lu1pq2( nrowd, nzchng,
1655 $ indc(lpivc), indc(lsave), lenr, iploc, ip, ipinv )
1656
1657 nzleft = nzleft + nzchng
1658
1659 * ===============================================================
1660 * Move the largest element to the top of each relevant column.
1661 * ===============================================================
1662 if ( spars1 ) then
1663
1664 * Do all modified columns.
1665
1666 if ( nelim .gt. 0 ) then
1667 * call timer ( 'start ', 16 )
1668 call lu1max( lu1+1, lu, indr, a, indc, lenc, locc )
1669 * call timer ( 'finish', 16 )
1670 end if
1671
1672 else if ( spars2 .or. dense ) then
1673
1674 * Just do the maxcol shortest columns.
1675
1676 * call timer ( 'start ', 16 )
1677 lq1 = iqloc(1)
1678 lq2 = min( lq1 + maxcol - 1, n )
1679 call lu1max( lq1, lq2, iq, a, indc, lenc, locc )
1680 * call timer ( 'finish', 16 )
1681 end if
1682
1683 * ===============================================================
1684 * Negate lengths of pivot row and column so they will be
1685 * eliminated during compressions.
1686 * ===============================================================
1687 lenr(ibest) = - ncold
1688 lenc(jbest) = - nrowd
1689
1690 * Test for fatal bug: row or column lists overwriting L and U.
1691
1692 if (lrow .gt. lsave) go to 980
1693 if (lcol .gt. lsave) go to 980
1694
1695 * Reset the length of the row file if pivot row was at the end.
1696 * Similarly for the column file.
1697
1698 if (lrow .eq. lpivr2) then
1699 lrow = lpivr
1700 do 770 l = 1, lpivr
1701 if (indr(lrow) .ne. 0) go to 780
1702 lrow = lrow - 1
1703 770 continue
1704 end if
1705
1706 780 if (lcol .eq. lpivc2) then
1707 lcol = lpivc
1708 do 790 l = 1, lpivc
1709 if (indc(lcol) .ne. 0) go to 800
1710 lcol = lcol - 1
1711 790 continue
1712 end if
1713 800 continue
1714
1715 * ------------------------------------------------------------------
1716 * End of main loop.
1717 * ------------------------------------------------------------------
1718
1719 * Normal exit.
1720
1721 900 inform = 0
1722 minlen = lenl + lenu + 2*(m + n)
1723 go to 990
1724
1725 * Not enough space free after a compress.
1726 * Set minlen to an estimate of the necessary value of lena.
1727
1728 970 inform = 7
1729 minlen = lena + lfile + 2*(m + n)
1730 go to 990
1731
1732 * Fatal error. This will never happen!
1733 * (Famous last words.)
1734
1735 980 inform = 8
1736
1737 * exit.
1738
1739 990 luparm(17) = ndens1
1740 luparm(18) = ndens2
1741 * call timer ( 'finish', 3 )
1742 return
1743
1744 1100 format(' lu1fad. nrowu =', i7, ' ibest, jbest =', 2i7,
1745 $ ' nrowd, ncold =', 2i5)
1746 * end of lu1fad
1747 end
1748
1749 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1750
1751 subroutine lu1gau( m , melim , ncold , nspare, small,
1752 $ lpivc1, lpivc2, lfirst, lpivr2, lfree, minfre,
1753 $ lrow , lcol , lu , nfill ,
1754 $ a , indc , indr ,
1755 $ lenc , lenr , locc , locr ,
1756 $ mark , al , markl ,
1757 $ au , ifill , jfill )
1758
1759 implicit double precision (a-h,o-z)
1760 double precision a(*) , al(melim) , au(ncold)
1761 integer*4 indc(*) , indr(*) , lenc(*) , lenr(*),
1762 $ mark(*) , markl(melim),
1763 $ ifill(melim), jfill(ncold)
1764 integer locc(*) , locr(*)
1765
1766 * ------------------------------------------------------------------
1767 * lu1gau does most of the work for each step of Gaussian elimination.
1768 * A multiple of the pivot column is added to each other column j
1769 * in the pivot row. The column list is fully updated.
1770 * The row list is updated if there is room, but some fill-ins may
1771 * remain, as indicated by ifill and jfill.
1772 *
1773 *
1774 * Input:
1775 * lfirst is the first column to be processed.
1776 * lu + 1 is the corresponding element of U in au(*).
1777 * nfill keeps track of pending fill-in.
1778 * a(*) contains the nonzeros for each column j.
1779 * indc(*) contains the row indices for each column j.
1780 * al(*) contains the new column of L. A multiple of it is
1781 * used to modify each column.
1782 * mark(*) has been set to -1, -2, -3, ... in the rows
1783 * corresponding to nonzero 1, 2, 3, ... of the col of L.
1784 * au(*) contains the new row of U. Each nonzero gives the
1785 * required multiple of the column of L.
1786 *
1787 * Workspace:
1788 * markl(*) marks the nonzeros of L actually used.
1789 * (A different mark, namely j, is used for each column.)
1790 *
1791 * Output:
1792 * lfirst = 0 if all columns were completed,
1793 * > 0 otherwise.
1794 * lu returns the position of the last nonzero of U
1795 * actually used, in case we come back in again.
1796 * nfill keeps track of the total extra space needed in the
1797 * row file.
1798 * ifill(ll) counts pending fill-in for rows involved in the new
1799 * column of L.
1800 * jfill(lu) marks the first pending fill-in stored in columns
1801 * involved in the new row of U.
1802 *
1803 * 16 Apr 1989 First version.
1804 * 23 Apr 1989 lfirst, lu, nfill are now input and output
1805 * to allow re-entry if elimination is interrupted.
1806 * ------------------------------------------------------------------
1807
1808 intrinsic abs
1809 logical atend
1810
1811 do 600 lr = lfirst, lpivr2
1812 j = indr(lr)
1813 lenj = lenc(j)
1814 nfree = lfree - lcol
1815 if (nfree .lt. minfre) go to 900
1816
1817 * ---------------------------------------------------------------
1818 * Inner loop to modify existing nonzeros in column j.
1819 * Loop 440 performs most of the arithmetic involved in the
1820 * whole LU factorization.
1821 * ndone counts how many multipliers were used.
1822 * ndrop counts how many modified nonzeros are negligibly small.
1823 * ---------------------------------------------------------------
1824 lu = lu + 1
1825 uj = au(lu)
1826 lc1 = locc(j)
1827 lc2 = lc1 + lenj - 1
1828 atend = lcol .eq. lc2
1829 ndone = 0
1830 if (lenj .eq. 0) go to 500
1831
1832 ndrop = 0
1833
1834 do 440 l = lc1, lc2
1835 i = indc(l)
1836 ll = - mark(i)
1837 if (ll .gt. 0) then
1838 ndone = ndone + 1
1839 markl(ll) = j
1840 a(l) = a(l) + al(ll) * uj
1841 if (abs( a(l) ) .le. small) then
1842 ndrop = ndrop + 1
1843 end if
1844 end if
1845 440 continue
1846
1847 * ---------------------------------------------------------------
1848 * Remove any negligible modified nonzeros from both
1849 * the column file and the row file.
1850 * ---------------------------------------------------------------
1851 if (ndrop .eq. 0) go to 500
1852 k = lc1
1853
1854 do 480 l = lc1, lc2
1855 i = indc(l)
1856 if (abs( a(l) ) .le. small) go to 460
1857 a(k) = a(l)
1858 indc(k) = i
1859 k = k + 1
1860 go to 480
1861
1862 * Delete the nonzero from the row file.
1863
1864 460 lenj = lenj - 1
1865 lenr(i) = lenr(i) - 1
1866 lr1 = locr(i)
1867 last = lr1 + lenr(i)
1868
1869 do 470 lrep = lr1, last
1870 if (indr(lrep) .eq. j) go to 475
1871 470 continue
1872
1873 475 indr(lrep) = indr(last)
1874 indr(last) = 0
1875 if (lrow .eq. last) lrow = lrow - 1
1876 480 continue
1877
1878 * Free the deleted elements.
1879
1880 do 490 l = k, lc2
1881 indc(l) = 0
1882 490 continue
1883 if (atend) lcol = k - 1
1884
1885 * ---------------------------------------------------------------
1886 * Deal with the fill-in in column j.
1887 * ---------------------------------------------------------------
1888 500 if (ndone .eq. melim) go to 590
1889
1890 * See if column j already has room for the fill-in.
1891
1892 if (atend) go to 540
1893 last = lc1 + lenj - 1
1894 l1 = last + 1
1895 l2 = last + (melim - ndone)
1896
1897 do 510 l = l1, l2
1898 if (indc(l) .gt. 0) go to 520
1899 510 continue
1900 go to 540
1901
1902 * We must move column j to the end of the column file.
1903 * Leave some spare room at the end of the current last column.
1904
1905 520 atend = .true.
1906
1907 do 522 l = lcol + 1, lcol + nspare
1908 lcol = l
1909 indc(l) = 0
1910 522 continue
1911
1912 l1 = lc1
1913 lc1 = lcol + 1
1914 locc(j) = lc1
1915
1916 do 525 l = l1, last
1917 lcol = lcol + 1
1918 a(lcol) = a(l)
1919 indc(lcol) = indc(l)
1920 indc(l) = 0
1921 525 continue
1922
1923 * ---------------------------------------------------------------
1924 * Inner loop for the fill-in in column j.
1925 * This is usually not very expensive.
1926 * ---------------------------------------------------------------
1927 540 indr(lrow+1) = 0
1928 last = lc1 + lenj - 1
1929 ll = 0
1930
1931 do 560 lc = lpivc1, lpivc2
1932 ll = ll + 1
1933 if (markl(ll) .eq. j ) go to 560
1934 aij = al(ll) * uj
1935 if (abs( aij ) .le. small) go to 560
1936 lenj = lenj + 1
1937 last = last + 1
1938 a(last) = aij
1939 i = indc(lc)
1940 indc(last) = i
1941 leni = lenr(i)
1942
1943 * Add the fill-in to row i if there is room.
1944
1945 l = locr(i) + leni
1946 if (indr(l) .eq. 0) then
1947 indr(l) = j
1948 lenr(i) = leni + 1
1949 if (lrow .lt. l) lrow = l
1950 else
1951
1952 * Row i does not have room for the fill-in.
1953 * Increment ifill(ll) to count how often this has
1954 * happened to row i. Also, add m to the row index
1955 * indc(last) in column j to mark it as a fill-in that is
1956 * still pending.
1957 *
1958 * If this is the first pending fill-in for row i,
1959 * nfill includes the current length of row i
1960 * (since the whole row has to be moved later).
1961 *
1962 * If this is the first pending fill-in for column j,
1963 * jfill(lu) records the current length of column j
1964 * (to shorten the search for pending fill-ins later).
1965
1966 if (ifill(ll) .eq. 0) nfill = nfill + leni + nspare
1967 if (jfill(lu) .eq. 0) jfill(lu) = lenj
1968 nfill = nfill + 1
1969 ifill(ll) = ifill(ll) + 1
1970 indc(last) = m + i
1971 end if
1972 560 continue
1973
1974 if ( atend ) lcol = last
1975
1976 * End loop for column j. Store its final length.
1977
1978 590 lenc(j) = lenj
1979 600 continue
1980
1981 * Successful completion.
1982
1983 lfirst = 0
1984 return
1985
1986 * Interruption. We have to come back in after the
1987 * column file is compressed. Give lfirst a new value.
1988 * lu and nfill will retain their current values.
1989
1990 900 lfirst = lr
1991 return
1992
1993 * end of lu1gau
1994 end
1995
1996 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1997
1998 subroutine lu1mar( m , n , lena , maxmn,
1999 $ lmax , maxcol, maxrow,
2000 $ ibest, jbest , mbest ,
2001 $ a , indc , indr , ip , iq,
2002 $ lenc , lenr , locc , locr ,
2003 $ iploc, iqloc )
2004
2005 implicit double precision (a-h,o-z)
2006 double precision lmax , a(lena)
2007 integer*4 indc(lena), indr(lena), ip(m) , iq(n) ,
2008 $ lenc(n) , lenr(m) , iploc(n), iqloc(m)
2009 integer locc(n) , locr(m)
2010
2011 * ------------------------------------------------------------------
2012 * lu1mar uses a Markowitz criterion to select a pivot element
2013 * for the next stage of a sparse LU factorization,
2014 * subject to a stability criterion that bounds the elements of L.
2015 *
2016 * 00 Jan 1986 Version documented in LUSOL paper:
2017 * Gill, Murray, Saunders and Wright (1987),
2018 * Maintaining LU factors of a general sparse matrix,
2019 * Linear algebra and its applications 88/89, 239-270.
2020 *
2021 * 02 Feb 1989 Following Suhl and Aittoniemi (1987), the largest
2022 * element in each column is now kept at the start of
2023 * the column, i.e. in position locc(j) of a and indc.
2024 * This should speed up the Markowitz searches.
2025 *
2026 * 26 Apr 1989 Both columns and rows searched during spars1 phase.
2027 * Only columns searched during spars2 phase.
2028 * maxtie replaced by maxcol and maxrow.
2029 *
2030 * Systems Optimization Laboratory, Stanford University.
2031 * ------------------------------------------------------------------
2032
2033 intrinsic abs, max
2034 double precision lbest
2035 double precision zero, gamma
2036 parameter ( zero = 0.0d+0, gamma = 2.0d+0 )
2037
2038 * gamma is "gamma" in the tie-breaking rule TB4 in the LUSOL paper.
2039
2040 * ------------------------------------------------------------------
2041 * Search cols of length nz = 1, then rows of length nz = 1,
2042 * then cols of length nz = 2, then rows of length nz = 2, etc.
2043 * ------------------------------------------------------------------
2044 mbest = m * n
2045 ncol = 0
2046 nrow = 0
2047
2048 do 300 nz = 1, maxmn
2049 nz1 = nz - 1
2050 if (ncol .ge. maxcol) go to 200
2051 if (mbest .le. nz1**2) go to 900
2052 if (nz .gt. m ) go to 200
2053
2054 * ---------------------------------------------------------------
2055 * Search the set of columns of length nz.
2056 * ---------------------------------------------------------------
2057 lq1 = iqloc(nz)
2058 lq2 = n
2059 if (nz .lt. m) lq2 = iqloc(nz + 1) - 1
2060
2061 do 180 lq = lq1, lq2
2062 ncol = ncol + 1
2063 j = iq(lq)
2064 lc1 = locc(j)
2065 lc2 = lc1 + nz1
2066 amax = abs( a(lc1) )
2067
2068 * Test all aijs in this column.
2069 * amax is the largest element (the first in the column).
2070 * cmax is the largest multiplier if aij becomes pivot.
2071
2072 do 160 lc = lc1, lc2
2073 i = indc(lc)
2074 merit = nz1 * (lenr(i) - 1)
2075 if (merit .gt. mbest) go to 160
2076
2077 * aij has a promising merit.
2078 * Apply the stability test.
2079 * We require aij to be sufficiently large compared to
2080 * all other nonzeros in column j. This is equivalent
2081 * to requiring cmax to be bounded by lmax.
2082
2083 if (lc .eq. lc1) then
2084
2085 * This is the maximum element, amax.
2086 * Find the biggest element in the rest of the column
2087 * and hence get cmax. We know cmax .le. 1, but
2088 * we still want it exactly in order to break ties.
2089
2090 aij = amax
2091 cmax = zero
2092 do 140 l = lc1 + 1, lc2
2093 cmax = max( cmax, abs( a(l) ) )
2094 140 continue
2095 cmax = cmax / amax
2096 else
2097
2098 * aij is not the biggest element, so cmax .ge. 1.
2099 * Bail out if cmax will be too big.
2100
2101 aij = abs( a(lc) )
2102 if (amax .gt. aij * lmax) go to 160
2103 cmax = amax / aij
2104 end if
2105
2106 * aij is big enough. Its maximum multiplier is cmax.
2107 * Accept it immediately if its merit is less than mbest.
2108
2109 if (merit .eq. mbest) then
2110
2111 * Break ties (merit = mbest).
2112 * In this version we minimize cmax
2113 * but if it is already small we maximize the pivot.
2114
2115 if (lbest .le. gamma .and. cmax .le. gamma) then
2116 if (abest .ge. aij ) go to 160
2117 else
2118 if (lbest .le. cmax) go to 160
2119 end if
2120 end if
2121
2122 * aij is the best pivot so far.
2123
2124 ibest = i
2125 jbest = j
2126 mbest = merit
2127 abest = aij
2128 lbest = cmax
2129 160 continue
2130
2131 * Finished with that column.
2132
2133 if (ncol .ge. maxcol) go to 200
2134 180 continue
2135
2136 * ---------------------------------------------------------------
2137 * Search the set of rows of length nz.
2138 * ---------------------------------------------------------------
2139 200 if (nrow .ge. maxrow) go to 290
2140 if (mbest .le. nz*nz1) go to 900
2141 if (nz .gt. n ) go to 300
2142 lp1 = iploc(nz)
2143 lp2 = m
2144 if (nz .lt. n) lp2 = iploc(nz + 1) - 1
2145
2146 do 280 lp = lp1, lp2
2147 nrow = nrow + 1
2148 i = ip(lp)
2149 leni = lenr(i)
2150 lr1 = locr(i)
2151 lr2 = lr1 + leni - 1
2152
2153 do 260 lr = lr1, lr2
2154 j = indr(lr)
2155 merit = nz1 * (lenc(j) - 1)
2156 if (merit .gt. mbest) go to 260
2157
2158 * aij has a promising merit.
2159 * Find where aij is in column j.
2160
2161 lenj = lenc(j)
2162 lc1 = locc(j)
2163 lc2 = lc1 + lenj - 1
2164 amax = abs( a(lc1) )
2165 do 220 lc = lc1, lc2
2166 if (indc(lc) .eq. i) go to 230
2167 220 continue
2168
2169 * Apply the same stability test as above.
2170
2171 230 if (lc .eq. lc1) then
2172
2173 * This is the maximum element, amax.
2174 * Find the biggest element in the rest of the column
2175 * and hence get cmax. We know cmax .le. 1, but
2176 * we still want it exactly in order to break ties.
2177
2178 aij = amax
2179 cmax = zero
2180 do 240 l = lc1 + 1, lc2
2181 cmax = max( cmax, abs( a(l) ) )
2182 240 continue
2183 cmax = cmax / amax
2184 else
2185
2186 * aij is not the biggest element, so cmax .ge. 1.
2187 * Bail out if cmax will be too big.
2188
2189 aij = abs( a(lc) )
2190 if (amax .gt. aij * lmax) go to 260
2191 cmax = amax / aij
2192 end if
2193
2194 * aij is big enough. Its maximum multiplier is cmax.
2195 * Accept it immediately if its merit is less than mbest.
2196
2197 if (merit .eq. mbest) then
2198
2199 * Break ties as before (merit = mbest).
2200
2201 if (lbest .le. gamma .and. cmax .le. gamma) then
2202 if (abest .ge. aij ) go to 260
2203 else
2204 if (lbest .le. cmax) go to 260
2205 end if
2206 end if
2207
2208 * aij is the best pivot so far.
2209
2210 ibest = i
2211 jbest = j
2212 mbest = merit
2213 abest = aij
2214 lbest = cmax
2215 if (nz .eq. 1) go to 900
2216 260 continue
2217
2218 * Finished with that row.
2219
2220 if (nrow .ge. maxrow) go to 290
2221 280 continue
2222
2223 * See if it's time to quit.
2224
2225 290 if (nrow .ge. maxrow .and. ncol .ge. maxcol) go to 900
2226 300 continue
2227
2228 900 return
2229
2230 * end of lu1mar
2231 end
2232
2233 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2234
2235 subroutine lu1pen( m , melim , ncold , nspare,
2236 $ lpivc1, lpivc2, lpivr1, lpivr2, lrow,
2237 $ lenc , lenr , locc , locr ,
2238 $ indc , indr , ifill , jfill )
2239
2240 integer*4 indc(*) , indr(*) , lenc(*), lenr(*),
2241 $ ifill(melim), jfill(ncold)
2242 integer locc(*) , locr(*)
2243
2244 * ------------------------------------------------------------------
2245 * lu1pen deals with pending fill-in in the row file.
2246 * ifill(ll) says if a row involved in the new column of L
2247 * has to be updated. If positive, it is the total
2248 * length of the final updated row.
2249 * jfill(lu) says if a column involved in the new row of U
2250 * contains any pending fill-ins. If positive, it points
2251 * to the first fill-in in the column that has yet to be
2252 * added to the row file.
2253 * ------------------------------------------------------------------
2254
2255 * Move the rows that have pending fill-in
2256 * to the end of the row file, leaving space for the fill-in.
2257 * Leave some spare room at the end of the current last row.
2258
2259 ll = 0
2260
2261 do 650 lc = lpivc1, lpivc2
2262 ll = ll + 1
2263 if (ifill(ll) .eq. 0) go to 650
2264
2265 do 620 l = lrow + 1, lrow + nspare
2266 lrow = l
2267 indr(l) = 0
2268 620 continue
2269
2270 i = indc(lc)
2271 lr1 = locr(i)
2272 lr2 = lr1 + lenr(i) - 1
2273 locr(i) = lrow + 1
2274
2275 do 630 lr = lr1, lr2
2276 lrow = lrow + 1
2277 indr(lrow) = indr(lr)
2278 indr(lr) = 0
2279 630 continue
2280
2281 lrow = lrow + ifill(ll)
2282 650 continue
2283
2284 * Scan all columns of D and insert the pending fill-in
2285 * into the row file.
2286
2287 lu = 1
2288
2289 do 680 lr = lpivr1, lpivr2
2290 lu = lu + 1
2291 if (jfill(lu) .eq. 0) go to 680
2292 j = indr(lr)
2293 lc1 = locc(j) + jfill(lu) - 1
2294 lc2 = locc(j) + lenc(j) - 1
2295
2296 do 670 lc = lc1, lc2
2297 i = indc(lc) - m
2298 if (i .gt. 0) then
2299 indc(lc) = i
2300 last = locr(i) + lenr(i)
2301 indr(last) = j
2302 lenr(i) = lenr(i) + 1
2303 end if
2304 670 continue
2305 680 continue
2306
2307 * end of lu1pen
2308 end
2309
2310 ************************************************************************
2311 *
2312 * file lu1util fortran
2313 *
2314 * lu1max lu1or1 lu1or2 lu1or3 lu1or4
2315 * lu1pq1 lu1pq2 lu1pq3 lu1rec
2316 *
2317 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2318
2319 subroutine lu1max( kol1, kol2, kol,
2320 $ a , indc, lenc, locc )
2321
2322 implicit double precision (a-h,o-z)
2323 double precision a(*)
2324 integer*4 kol(kol2), indc(*), lenc(*)
2325 integer locc(*)
2326
2327 * ------------------------------------------------------------------
2328 * lu1max moves the largest element in each of a set of columns
2329 * to the top of its column.
2330 * ------------------------------------------------------------------
2331
2332 intrinsic abs
2333
2334 do 120 k = kol1, kol2
2335 j = kol(k)
2336 lc1 = locc(j)
2337
2338 * The next 10 lines are equivalent to
2339 * l = idamax( lenc(j), a(lc1), 1 ) + lc1 - 1
2340 *>>>>>>>>
2341 lc2 = lc1 + lenc(j) - 1
2342 amax = abs( a(lc1) )
2343 l = lc1
2344
2345 do 110 lc = lc1 + 1, lc2
2346 if (amax .lt. abs( a(lc) )) then
2347 amax = abs( a(lc) )
2348 l = lc
2349 end if
2350 110 continue
2351 *>>>>>>>>
2352 if (l .gt. lc1) then
2353 amax = a(l)
2354 a(l) = a(lc1)
2355 a(lc1) = amax
2356 i = indc(l)
2357 indc(l) = indc(lc1)
2358 indc(lc1) = i
2359 end if
2360 120 continue
2361
2362 * end of lu1max
2363 end
2364
2365 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2366
2367 subroutine lu1or1( m, n, nelem, small,
2368 $ a, indc, indr, lenc, lenr,
2369 $ amax, numnz, lerr, inform )
2370
2371 implicit double precision (a-h,o-z)
2372 double precision a(nelem)
2373 integer*4 indc(nelem), indr(nelem)
2374 integer*4 lenc(n), lenr(m)
2375
2376 * ------------------------------------------------------------------
2377 * lu1or1 organizes the elements of an m by n matrix A as
2378 * follows. On entry, the parallel arrays a, indc, indr,
2379 * contain nelem entries of the form aij, i, j,
2380 * in any order. nelem must be positive.
2381 *
2382 * Entries not larger than the input parameter small are treated as
2383 * zero and removed from a, indc, indr. The remaining entries are
2384 * defined to be nonzero. numnz returns the number of such nonzeros
2385 * and amax returns the magnitude of the largest nonzero.
2386 * The arrays lenc, lenr return the number of nonzeros in each
2387 * column and row of A.
2388 *
2389 * inform = 0 on exit, except inform = 1 if any of the indices in
2390 * indc, indr imply that the element aij lies outside the m by n
2391 * dimensions of A.
2392 *
2393 * Version of February 1985.
2394 * ------------------------------------------------------------------
2395 do 10 i = 1, m
2396 lenr(i) = 0
2397 10 continue
2398
2399 do 20 j = 1, n
2400 lenc(j) = 0
2401 20 continue
2402
2403 amax = 0.0d+0
2404 numnz = nelem
2405 l = nelem + 1
2406
2407 do 100 ldummy = 1, nelem
2408 l = l - 1
2409 if (abs( a(l) ) .gt. small) then
2410 i = indc(l)
2411 j = indr(l)
2412 amax = max( amax, abs( a(l) ) )
2413 if (i .lt. 1 .or. i .gt. m) go to 910
2414 if (j .lt. 1 .or. j .gt. n) go to 910
2415 lenr(i) = lenr(i) + 1
2416 lenc(j) = lenc(j) + 1
2417 else
2418
2419 * Replace a negligible element by last element. Since
2420 * we are going backwards, we know the last element is ok.
2421
2422 a(l) = a(numnz)
2423 indc(l) = indc(numnz)
2424 indr(l) = indr(numnz)
2425 numnz = numnz - 1
2426 end if
2427 100 continue
2428
2429 inform = 0
2430 return
2431
2432 910 lerr = l
2433 inform = 1
2434 return
2435
2436 * end of lu1or1
2437 end
2438
2439 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2440
2441 subroutine lu1or2( n, numa, a, inum, jnum, len, loc )
2442 integer*4 inum(numa), jnum(numa), len(n)
2443 integer loc(n)
2444 double precision a(numa), ace, acep
2445
2446 * ------------------------------------------------------------------
2447 * lu1or2 sorts a list of matrix elements a(i,j) into column
2448 * order, given numa entries a(i,j), i, j in the parallel
2449 * arrays a, inum, jnum respectively. The matrix is assumed
2450 * to have n columns and an arbitrary number of rows.
2451 *
2452 * On entry, len(*) must contain the length of each column.
2453 *
2454 * On exit, a(*) and inum(*) are sorted, jnum(*) = 0, and
2455 * loc(j) points to the start of column j.
2456 *
2457 * lu1or2 is derived from mc20ad, a routine in the Harwell
2458 * Subroutine Library, author J. K. Reid.
2459 * ------------------------------------------------------------------
2460
2461 * Set loc(j) to point to the beginning of column j.
2462
2463 l = 1
2464 do 150 j = 1, n
2465 loc(j) = l
2466 l = l + len(j)
2467 150 continue
2468
2469 * Sort the elements into column order.
2470 * The algorithm is an in-place sort and is of order numa.
2471
2472 do 230 i = 1, numa
2473 * Establish the current entry.
2474 jce = jnum(i)
2475 if (jce .eq. 0) go to 230
2476 ace = a(i)
2477 ice = inum(i)
2478 jnum(i) = 0
2479
2480 * Chain from current entry.
2481
2482 do 200 j = 1, numa
2483
2484 * The current entry is not in the correct position.
2485 * Determine where to store it.
2486
2487 l = loc(jce)
2488 loc(jce) = loc(jce) + 1
2489
2490 * Save the contents of that location.
2491
2492 acep = a(l)
2493 icep = inum(l)
2494 jcep = jnum(l)
2495
2496 * Store current entry.
2497
2498 a(l) = ace
2499 inum(l) = ice
2500 jnum(l) = 0
2501
2502 * If next current entry needs to be processed,
2503 * copy it into current entry.
2504
2505 if (jcep .eq. 0) go to 230
2506 ace = acep
2507 ice = icep
2508 jce = jcep
2509 200 continue
2510 230 continue
2511
2512 * Reset loc(j) to point to the start of column j.
2513
2514 ja = 1
2515 do 250 j = 1, n
2516 jb = loc(j)
2517 loc(j) = ja
2518 ja = jb
2519 250 continue
2520
2521 * end of lu1or2
2522 end
2523
2524 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2525
2526 subroutine lu1or3( m, n, nelem,
2527 $ indc, lenc, locc, iw,
2528 $ lerr, inform )
2529
2530 integer*4 indc(nelem), lenc(n), iw(m)
2531 integer locc(n)
2532
2533 * ------------------------------------------------------------------
2534 * lu1or3 looks for duplicate elements in an m by n matrix A
2535 * defined by the column list indc, lenc, locc.
2536 * iw is used as a work vector of length m.
2537 *
2538 * Version of February 1985.
2539 * ------------------------------------------------------------------
2540
2541 do 100 i = 1, m
2542 iw(i) = 0
2543 100 continue
2544
2545 do 200 j = 1, n
2546 if (lenc(j) .gt. 0) then
2547 l1 = locc(j)
2548 l2 = l1 + lenc(j) - 1
2549
2550 do 150 l = l1, l2
2551 i = indc(l)
2552 if (iw(i) .eq. j) go to 910
2553 iw(i) = j
2554 150 continue
2555 end if
2556 200 continue
2557
2558 inform = 0
2559 return
2560
2561 910 lerr = l
2562 inform = 1
2563 return
2564
2565 * end of lu1or3
2566 end
2567
2568 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2569
2570 subroutine lu1or4( m, n, nelem,
2571 $ indc, indr, lenc, lenr, locc, locr )
2572
2573 integer*4 indc(nelem), indr(nelem), lenc(n), lenr(m)
2574 integer locc(n), locr(m)
2575
2576 * ------------------------------------------------------------------
2577 * lu1or4 constructs a row list indr, locr
2578 * from a corresponding column list indc, locc,
2579 * given the lengths of both columns and rows in lenc, lenr.
2580 * ------------------------------------------------------------------
2581
2582 * Initialize locr(i) to point just beyond where the
2583 * last component of row i will be stored.
2584
2585 l = 1
2586 do 10 i = 1, m
2587 l = l + lenr(i)
2588 locr(i) = l
2589 10 continue
2590
2591 * By processing the columns backwards and decreasing locr(i)
2592 * each time it is accessed, it will end up pointing to the
2593 * beginning of row i as required.
2594
2595 l2 = nelem
2596 j = n + 1
2597
2598 do 40 jdummy = 1, n
2599 j = j - 1
2600 if (lenc(j) .gt. 0) then
2601 l1 = locc(j)
2602
2603 do 30 l = l1, l2
2604 i = indc(l)
2605 lr = locr(i) - 1
2606 locr(i) = lr
2607 indr(lr) = j
2608 30 continue
2609
2610 l2 = l1 - 1
2611 end if
2612 40 continue
2613
2614 * end of lu1or4
2615 end
2616
2617 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2618
2619 subroutine lu1pq1( m, n, len, iperm, loc, inv, num )
2620
2621 integer*4 len(m), iperm(m), loc(n), inv(m), num(n)
2622
2623 * ------------------------------------------------------------------
2624 * lu1pq1 constructs a permutation iperm from the array len.
2625 *
2626 * On entry:
2627 * len(i) holds the number of nonzeros in the i-th row (say)
2628 * of an m by n matrix.
2629 * num(*) can be anything (workspace).
2630 *
2631 * On exit:
2632 * iperm contains a list of row numbers in the order
2633 * rows of length 0, rows of length 1,..., rows of length n.
2634 * loc(nz) points to the first row containing nz nonzeros,
2635 * nz = 1, n.
2636 * inv(i) points to the position of row i within iperm(*).
2637 * ------------------------------------------------------------------
2638
2639 * Count the number of rows of each length.
2640
2641 nzero = 0
2642 do 10 nz = 1, n
2643 num(nz) = 0
2644 loc(nz) = 0
2645 10 continue
2646
2647 do 20 i = 1, m
2648 nz = len(i)
2649 if (nz .eq. 0) then
2650 nzero = nzero + 1
2651 else
2652 num(nz) = num(nz) + 1
2653 end if
2654 20 continue
2655
2656 * Set starting locations for each length.
2657
2658 l = nzero + 1
2659 do 60 nz = 1, n
2660 loc(nz) = l
2661 l = l + num(nz)
2662 num(nz) = 0
2663 60 continue
2664
2665 * Form the list.
2666
2667 nzero = 0
2668 do 100 i = 1, m
2669 nz = len(i)
2670 if (nz .eq. 0) then
2671 nzero = nzero + 1
2672 iperm(nzero) = i
2673 else
2674 l = loc(nz) + num(nz)
2675 iperm(l) = i
2676 num(nz) = num(nz) + 1
2677 end if
2678 100 continue
2679
2680 * Define the inverse of iperm.
2681
2682 do 120 l = 1, m
2683 i = iperm(l)
2684 inv(i) = l
2685 120 continue
2686
2687 * end of lu1pq1
2688 end
2689
2690 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2691
2692 subroutine lu1pq2( nzpiv, nzchng,
2693 $ indr , lenold, lennew, iqloc, iq, iqinv )
2694
2695 integer*4 indr(nzpiv), lenold(nzpiv), lennew(*),
2696 $ iqloc(*) , iq(*) , iqinv(*)
2697
2698 * ===============================================================
2699 * lu1pq2 frees the space occupied by the pivot row,
2700 * and updates the column permutation iq.
2701 *
2702 * Also used to free the pivot column and update the row perm ip.
2703 *
2704 * nzpiv (input) is the length of the pivot row (or column).
2705 * nzchng (output) is the net change in total nonzeros.
2706 *
2707 * 14 Apr 1989 First version.
2708 * ===============================================================
2709
2710 nzchng = 0
2711
2712 do 200 lr = 1, nzpiv
2713 j = indr(lr)
2714 indr(lr) = 0
2715 nz = lenold(lr)
2716 nznew = lennew(j)
2717
2718 if (nz .ne. nznew) then
2719 l = iqinv(j)
2720 nzchng = nzchng + (nznew - nz)
2721
2722 * l above is the position of column j in iq (so j = iq(l)).
2723
2724 if (nz .lt. nznew) then
2725
2726 * Column j has to move towards the end of iq.
2727
2728 110 next = nz + 1
2729 lnew = iqloc(next) - 1
2730 if (lnew .ne. l) then
2731 jnew = iq(lnew)
2732 iq(l) = jnew
2733 iqinv(jnew) = l
2734 end if
2735 l = lnew
2736 iqloc(next) = lnew
2737 nz = next
2738 if (nz .lt. nznew) go to 110
2739 else
2740
2741 * Column j has to move towards the front of iq.
2742
2743 120 lnew = iqloc(nz)
2744 if (lnew .ne. l) then
2745 jnew = iq(lnew)
2746 iq(l) = jnew
2747 iqinv(jnew) = l
2748 end if
2749 l = lnew
2750 iqloc(nz) = lnew + 1
2751 nz = nz - 1
2752 if (nz .gt. nznew) go to 120
2753 end if
2754
2755 iq(lnew) = j
2756 iqinv(j) = lnew
2757 end if
2758 200 continue
2759
2760 * end of lu1pq2
2761 end
2762
2763 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2764
2765 subroutine lu1pq3( n, len, iperm, nrank )
2766
2767 integer*4 len(n), iperm(n)
2768
2769 * ------------------------------------------------------------------
2770 * lu1pq3 looks at the permutation iperm(*) and moves any entries
2771 * to the end whose corresponding length len(*) is zero.
2772 * ------------------------------------------------------------------
2773
2774 nrank = n
2775
2776 do 20 k = n, 1, -1
2777 i = iperm(k)
2778
2779 if (len(i) .eq. 0) then
2780 nrank = nrank - 1
2781 if (k .le. nrank) then
2782 do 10 l = k, nrank
2783 iperm(l) = iperm(l+1)
2784 10 continue
2785 iperm(nrank + 1) = i
2786 end if
2787 end if
2788 20 continue
2789
2790 * end of lu1pq3
2791 end
2792
2793 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2794
2795 subroutine lu1rec( n, reals, luparm,
2796 $ ltop, lena, a, ind, len, loc )
2797
2798 logical reals
2799 integer luparm(30)
2800 double precision a(lena)
2801 integer*4 ind(lena), len(n)
2802 integer loc(n)
2803
2804 * ------------------------------------------------------------------
2805 * lu1rec recovers space in ind(*) and optionally a(*),
2806 * by eliminating entries for which ind(l) is zero.
2807 * The elements of ind(*) must not be negative.
2808 *
2809 * If len(i) is positive, entry i contains that many elements,
2810 * starting at loc(i). Otherwise, entry i is eliminated.
2811 * ------------------------------------------------------------------
2812
2813 do 10 i = 1, n
2814 leni = len(i)
2815 if (leni .gt. 0) then
2816 l = loc(i) + leni - 1
2817 len(i) = ind(l)
2818 ind(l) = - i
2819 end if
2820 10 continue
2821
2822 k = 0
2823 last = 0
2824
2825 do 20 l = 1, ltop
2826 if (ind(l) .ne. 0) then
2827 k = k + 1
2828 i = ind(l)
2829 ind(k) = i
2830 if (reals) a(k) = a(l)
2831 if (i .lt. 0) then
2832
2833 * This is the end of entry i.
2834
2835 i = - i
2836 ind(k) = len(i)
2837 loc(i) = last + 1
2838 len(i) = k - last
2839 last = k
2840 end if
2841 end if
2842 20 continue
2843
2844 nout = luparm(1)
2845 lprint = luparm(2)
2846 if (lprint .ge. 1) write(nout, 1000) ltop, k, reals
2847 luparm(26) = luparm(26) + 1
2848 ltop = k
2849 return
2850
2851 1000 format(' lu1rec. File compressed from', i8, ' to', i8, l4)
2852
2853 * end of lu1rec
2854 end
2855
2856 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2857 *
2858 * file lu6a fortran
2859 *
2860 * lu6chk lu6sol
2861 *
2862 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2863
2864 subroutine lu6chk( mode, m, n, w,
2865 $ lena, luparm, parmlu,
2866 $ a, indc, indr, ip, iq,
2867 $ lenc, lenr, locc, locr,
2868 $ inform )
2869
2870 implicit double precision (a-h,o-z)
2871 integer luparm(30)
2872 double precision parmlu(30), a(lena), w(n)
2873 integer*4 indc(lena), indr(lena), ip(m), iq(n)
2874 integer*4 lenc(n), lenr(m)
2875 integer locc(n), locr(m)
2876
2877 * ------------------------------------------------------------------
2878 * lu6chk looks at the LU factorization A = L*U.
2879 *
2880 * If mode = 1, the important input parameters are
2881 *
2882 * utol1 = parmlu(4),
2883 * utol2 = parmlu(5),
2884 *
2885 * and the significant output parameters are
2886 *
2887 * inform = luparm(10),
2888 * nsing = luparm(11),
2889 * jsing = luparm(12),
2890 * jumin = luparm(19),
2891 * elmax = parmlu(11),
2892 * umax = parmlu(12).
2893 * dumax = parmlu(13),
2894 * dumin = parmlu(14),
2895 * and w(*).
2896 *
2897 * elmax and umax return the largest elements in L and U.
2898 * dumax and dumin return the largest and smallest diagonals of U
2899 * (excluding diagonals that are exactly zero).
2900 *
2901 * In general, w(j) is set to the maximum absolute element in
2902 * the j-th column of U. However, if the corresponding diagonal
2903 * of U is small in absolute terms or relative to w(j)
2904 * (as judged by the parameters utol1, utol2 respectively),
2905 * then w(j) is changed to - w(j).
2906 *
2907 * Thus, if w(j) is not positive, the j-th column of A
2908 * appears to be dependent on the other columns of A.
2909 * The number of such columns, and the position of the last one,
2910 * are returned as nsing and jsing.
2911 *
2912 * Note that nrank is assumed to be set already, and is not altered.
2913 * typically, nsing will satisfy nrank + nsing = n, but if
2914 * utol1 and utol2 are rather large, nsing may exceed n - nrank.
2915 *
2916 * inform = 0 if A appears to have full column rank (nsing = 0).
2917 * inform = 1 otherwise (nsing .gt. 0).
2918 *
2919 * Version of July 1987.
2920 * 9 may 1988: f77 version.
2921 * ------------------------------------------------------------------
2922
2923 intrinsic abs, max, min
2924 parameter ( zero = 0.0d+0 )
2925
2926 nout = luparm(1)
2927 lprint = luparm(2)
2928 nrank = luparm(16)
2929 lenl = luparm(23)
2930 utol1 = parmlu(4)
2931 utol2 = parmlu(5)
2932 inform = 0
2933
2934 * ------------------------------------------------------------------
2935 * Find elmax.
2936 * ------------------------------------------------------------------
2937 elmax = zero
2938
2939 do 120 l = lena + 1 - lenl, lena
2940 elmax = max( elmax, abs(a(l)) )
2941 120 continue
2942
2943 * ------------------------------------------------------------------
2944 * Find umax, and set w(j) = maximum element in j-th column of U.
2945 * ------------------------------------------------------------------
2946 umax = zero
2947 do 220 j = 1, n
2948 w(j) = zero
2949 220 continue
2950
2951 do 260 k = 1, nrank
2952 i = ip(k)
2953 l1 = locr(i)
2954 l2 = l1 + lenr(i) - 1
2955
2956 do 250 l = l1, l2
2957 j = indr(l)
2958 aij = abs( a(l) )
2959 w(j) = max( w(j), aij )
2960 umax = max( umax, aij )
2961 250 continue
2962 260 continue
2963
2964 * ------------------------------------------------------------------
2965 * Negate w(j) if the corresponding diagonal of U is
2966 * too small in absolute terms or relative to the other elements
2967 * in the same column of U.
2968 * Also find dumax and dumin, the extreme diagonals of U.
2969 * ------------------------------------------------------------------
2970 nsing = 0
2971 jsing = 0
2972 jumin = 0
2973 dumax = zero
2974 dumin = 1.0d+30
2975
2976 do 320 k = 1, n
2977 j = iq(k)
2978 if (k .gt. nrank) go to 310
2979 i = ip(k)
2980 l1 = locr(i)
2981 diag = abs( a(l1) )
2982 dumax = max( dumax, diag )
2983 if (dumin .gt. diag) then
2984 dumin = diag
2985 jumin = j
2986 end if
2987 if (diag .gt. utol1 .and. diag .gt. utol2 * w(j)) go to 320
2988
2989 310 nsing = nsing + 1
2990 jsing = j
2991 w(j) = - w(j)
2992 320 continue
2993
2994 if (jumin .eq. 0) dumin = zero
2995 luparm(11) = nsing
2996 luparm(12) = jsing
2997 luparm(19) = jumin
2998 parmlu(11) = elmax
2999 parmlu(12) = umax
3000 parmlu(13) = dumax
3001 parmlu(14) = dumin
3002 if (nsing .gt. 0) then
3003
3004 * The matrix has been judged singular.
3005
3006 inform = 1
3007 ndefic = n - nrank
3008 if (lprint .ge. 0)
3009 $ write(nout, 1100) nrank, ndefic, nsing, jsing, dumax, dumin
3010 end if
3011
3012 * Exit.
3013
3014 luparm(10) = inform
3015 return
3016
3017 1100 format(/ ' lu6chk warning. The matrix appears to be singular.'
3018 $ / ' nrank =', i7, 8x, 'rank of U'
3019 $ / ' n - nrank =', i7, 8x, 'rank deficiency'
3020 $ / ' nsing =', i7, 8x, 'singularities'
3021 $ / ' jsing =', i7, 8x, 'last singular column'
3022 $ / ' dumax =', 1p,e11.2, 4x, 'largest triangular diag'
3023 $ / ' dumin =', 1p,e11.2, 4x, 'smallest triangular diag')
3024 * end of lu6chk
3025 end
3026
3027 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3028
3029 subroutine lu6sol( mode, m, n, v, w,
3030 $ lena, luparm, parmlu,
3031 $ a, indc, indr, ip, iq,
3032 $ lenc, lenr, locc, locr,
3033 $ inform )
3034
3035 implicit double precision (a-h,o-z)
3036 integer luparm(30)
3037 double precision parmlu(30), a(lena), v(m), w(n)
3038 integer*4 indc(lena), indr(lena), ip(m), iq(n)
3039 integer*4 lenc(n), lenr(m)
3040 integer locc(n), locr(m)
3041
3042 * ------------------------------------------------------------------
3043 * lu6sol uses the factorization A = L*U as follows...
3044 *
3045 * mode
3046 * ----
3047 * 1 v solves L*v = v(input). w is not touched.
3048 * 2 v solves L(t)*v = v(input). w is not touched.
3049 * 3 w solves U*w = v. v is not altered.
3050 * 4 v solves U(t)*v = w. w is destroyed.
3051 * 5 w solves A*w = v. v is altered as in 1.
3052 * 6 v solves A(t)*v = w. w is destroyed.
3053 *
3054 * if mode .ge. 3, v and w must not be the same arrays.
3055 *
3056 * ip(*), iq(*) hold row and column numbers in pivotal order.
3057 * lenc(k) is the length of the k-th column of initial L.
3058 * lenr(i) is the length of the i-th row of U.
3059 * locc(*) is not used.
3060 * locr(i) is the start of the i-th row of U.
3061 *
3062 * U is assumed to be in upper-trapezoidal form (nrank by n).
3063 * the first entry for each row is the diagonal element
3064 * (according to the permutations ip, iq). It is stored at
3065 * location locr(i) in a(*), indr(*).
3066 *
3067 * On exit, inform = 0 except as follows.
3068 * if mode .ge. 3 and if U (and hence A) is singular, then
3069 * inform = 1 if there is a nonzero residual in solving the system
3070 * involving U. parmlu(20) returns the norm of the residual.
3071 *
3072 * Version of July 1987.
3073 * 9 May 1988: f77 version.
3074 * ------------------------------------------------------------------
3075
3076 intrinsic abs
3077 parameter ( zero = 0.0d+0 )
3078
3079 nrank = luparm(16)
3080 numl0 = luparm(20)
3081 lenl0 = luparm(21)
3082 if (numl0 .eq. 0) lenl0 = 0
3083 lenl = luparm(23)
3084 small = parmlu(3)
3085 inform = 0
3086 nrank1 = nrank + 1
3087 resid = zero
3088 go to (100, 200, 300, 400, 100, 400), mode
3089
3090 * ==================================================================
3091 * mode = 1 or 5. Solve l * v(new) = v(old).
3092 * ==================================================================
3093 100 l1 = lena + 1
3094
3095 do 120 k = 1, numl0
3096 len = lenc(k)
3097 l = l1
3098 l1 = l1 - len
3099 ipiv = indr(l1)
3100 vpiv = v(ipiv)
3101 if (abs( vpiv ) .le. small) go to 120
3102
3103 * ***** The following loop could be coded specially.
3104
3105 do 110 ldummy = 1, len
3106 l = l - 1
3107 j = indc(l)
3108 v(j) = v(j) + a(l) * vpiv
3109 110 continue
3110 120 continue
3111
3112 150 l = lena - lenl0 + 1
3113 numl = lenl - lenl0
3114
3115 * ***** The following loop could be coded specially.
3116
3117 do 160 ldummy = 1, numl
3118 l = l - 1
3119 i = indr(l)
3120 if (abs( v(i) ) .le. small) go to 160
3121 j = indc(l)
3122 v(j) = v(j) + a(l) * v(i)
3123 160 continue
3124
3125 190 if (mode .eq. 5) go to 300
3126 go to 900
3127
3128 * ==================================================================
3129 * mode = 2. Solve L(transpose) * v(new) = v(old).
3130 * ==================================================================
3131 200 l1 = lena - lenl + 1
3132 l2 = lena - lenl0
3133
3134 * ***** The following loop could be coded specially.
3135
3136 do 220 l = l1, l2
3137 j = indc(l)
3138 if (abs( v(j) ) .le. small) go to 220
3139 i = indr(l)
3140 v(i) = v(i) + a(l) * v(j)
3141 220 continue
3142
3143 do 280 k = numl0, 1, -1
3144 len = lenc(k)
3145 sum = zero
3146 l1 = l2 + 1
3147 l2 = l2 + len
3148
3149 * ***** The following loop could be coded specially.
3150
3151 do 270 l = l1, l2
3152 j = indc(l)
3153 sum = sum + a(l) * v(j)
3154 270 continue
3155
3156 ipiv = indr(l1)
3157 v(ipiv) = v(ipiv) + sum
3158 280 continue
3159
3160 go to 900
3161
3162 * ==================================================================
3163 * mode = 3. Solve U * w = v.
3164 * ==================================================================
3165
3166 * Find the first nonzero in v(1) ... v(nrank), counting backwards.
3167
3168 300 do 310 klast = nrank, 1, -1
3169 i = ip(klast)
3170 if (abs( v(i) ) .gt. small) go to 320
3171 310 continue
3172
3173 320 do 330 k = klast + 1, n
3174 j = iq(k)
3175 w(j) = zero
3176 330 continue
3177
3178 * Do the back-substitution, using rows 1 to klast of U.
3179
3180 340 do 380 k = klast, 1, -1
3181 i = ip(k)
3182 t = v(i)
3183 l1 = locr(i)
3184 l2 = l1 + 1
3185 l3 = l1 + lenr(i) - 1
3186
3187 * ***** The following loop could be coded specially.
3188
3189 do 350 l = l2, l3
3190 j = indr(l)
3191 t = t - a(l) * w(j)
3192 350 continue
3193
3194 j = iq(k)
3195 if (abs( t ) .le. small) then
3196 w(j) = zero
3197 else
3198 w(j) = t / a(l1)
3199 end if
3200 380 continue
3201
3202 * Compute residual for overdetermined systems.
3203
3204 do 390 k = nrank1, m
3205 i = ip(k)
3206 resid = resid + abs( v(i) )
3207 390 continue
3208
3209 go to 900
3210
3211 * ==================================================================
3212 * mode = 4 or 6. Solve U(transpose) * v = w.
3213 * ==================================================================
3214 400 do 410 k = nrank1, m
3215 i = ip(k)
3216 v(i) = zero
3217 410 continue
3218
3219 * Do the forward-substitution, skipping columns of U(transpose)
3220 * when the associated element of w(*) is negligible.
3221
3222 do 480 k = 1, nrank
3223 i = ip(k)
3224 j = iq(k)
3225 t = w(j)
3226 if (abs( t ) .le. small) then
3227 v(i) = zero
3228 go to 480
3229 end if
3230
3231 l1 = locr(i)
3232 t = t / a(l1)
3233 v(i) = t
3234 l2 = l1 + lenr(i) - 1
3235 l1 = l1 + 1
3236
3237 * ***** The following loop could be coded specially.
3238
3239 do 450 l = l1, l2
3240 j = indr(l)
3241 w(j) = w(j) - t * a(l)
3242 450 continue
3243 480 continue
3244
3245 * Compute residual for overdetermined systems.
3246
3247 do 490 k = nrank1, n
3248 j = iq(k)
3249 resid = resid + abs( w(j) )
3250 490 continue
3251
3252 if (mode .eq. 6) go to 200
3253
3254 * Exit.
3255
3256 900 if (resid .gt. zero) inform = 1
3257 luparm(10) = inform
3258 parmlu(20) = resid
3259 return
3260
3261 * end of lu6sol
3262 end
3263
3264 ************************************************************************
3265 *
3266 * file lu7a fortran
3267 *
3268 * lu7add lu7cyc lu7elm lu7for lu7rnk lu7zap
3269 *
3270 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3271
3272 subroutine lu7add( m, n, jadd, v,
3273 $ lena, luparm, parmlu,
3274 $ lenl, lenu, lrow, nrank,
3275 $ a, indr, ip, lenr, locr,
3276 $ inform, klast, vnorm )
3277
3278 implicit double precision (a-h,o-z)
3279 integer luparm(30)
3280 double precision parmlu(30), a(lena), v(m)
3281 integer*4 indr(lena), ip(m), lenr(m)
3282 integer locr(m)
3283
3284 * ------------------------------------------------------------------
3285 * lu7add inserts the first nrank elements of the vector v(*)
3286 * as column jadd of U. We assume that U does not yet have any
3287 * entries in this column.
3288 * Elements no larger than parmlu(3) are treated as zero.
3289 * klast will be set so that the last row to be affected
3290 * (in pivotal order) is row ip(klast).
3291 *
3292 * 09 May 1988: First f77 version.
3293 * ------------------------------------------------------------------
3294
3295 intrinsic abs
3296 parameter ( zero = 0.0d+0 )
3297
3298 small = parmlu(3)
3299 vnorm = zero
3300 klast = 0
3301
3302 do 200 k = 1, nrank
3303 i = ip(k)
3304 if (abs( v(i) ) .le. small) go to 200
3305 klast = k
3306 vnorm = vnorm + abs( v(i) )
3307 leni = lenr(i)
3308
3309 * Compress row file if necessary.
3310
3311 minfre = leni + 1
3312 nfree = lena - lenl - lrow
3313 if (nfree .lt. minfre) then
3314 call lu1rec( m, .true., luparm, lrow, lena,
3315 $ a, indr, lenr, locr )
3316 nfree = lena - lenl - lrow
3317 if (nfree .lt. minfre) go to 970
3318 end if
3319
3320 * Move row i to the end of the row file,
3321 * unless it is already there.
3322 * No need to move if there is a gap already.
3323
3324 if (leni .eq. 0) locr(i) = lrow + 1
3325 lr1 = locr(i)
3326 lr2 = lr1 + leni - 1
3327 if (lr2 .eq. lrow) go to 150
3328 if (indr(lr2+1) .eq. 0) go to 180
3329 locr(i) = lrow + 1
3330
3331 do 140 l = lr1, lr2
3332 lrow = lrow + 1
3333 a(lrow) = a(l)
3334 j = indr(l)
3335 indr(l) = 0
3336 indr(lrow) = j
3337 140 continue
3338
3339 150 lr2 = lrow
3340 lrow = lrow + 1
3341
3342 * Add the element of v.
3343
3344 180 lr2 = lr2 + 1
3345 a(lr2) = v(i)
3346 indr(lr2) = jadd
3347 lenr(i) = leni + 1
3348 lenu = lenu + 1
3349 200 continue
3350
3351 * Normal exit.
3352
3353 inform = 0
3354 go to 990
3355
3356 * Not enough storage.
3357
3358 970 inform = 7
3359
3360 990 return
3361
3362 * end of lu7add
3363 end
3364
3365 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3366
3367 subroutine lu7cyc( kfirst, klast, ip )
3368
3369 integer*4 ip(klast)
3370
3371 * ------------------------------------------------------------------
3372 * lu7cyc performs a cyclic permutation on the row or column ordering
3373 * stored in ip, moving entry kfirst down to klast.
3374 * If kfirst .ge. klast, lu7cyc should not be called.
3375 * Sometimes klast = 0 and nothing should happen.
3376 *
3377 * 09 May 1988: First f77 version.
3378 * ------------------------------------------------------------------
3379
3380 if (kfirst .lt. klast) then
3381 ifirst = ip(kfirst)
3382
3383 do 100 k = kfirst, klast - 1
3384 ip(k) = ip(k + 1)
3385 100 continue
3386
3387 ip(klast) = ifirst
3388 end if
3389
3390 * end of lu7cyc
3391 end
3392
3393 *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3394
3395 subroutine lu7elm( m, n, jelm, v,
3396 $ lena, luparm, parmlu,
3397 $ lenl, lenu, lrow, nrank,
3398 $ a, indc, indr, ip, iq, lenr, locc, locr,
3399 $ inform, diag )
3400
3401 implicit double precision (a-h,o-z)
3402 integer luparm(30)
3403 double precision parmlu(30), a(lena), v(m)
3404 integer*4 indc(lena), indr(lena), ip(m), iq(n), lenr(m)
3405 integer locc(n), locr(m)
3406
3407 * ------------------------------------------------------------------
3408 * lu7elm eliminates the subdiagonal elements of a vector v(*),
3409 * where L*v = y for some vector y.
3410 * If jelm > 0, y has just become column jelm of the matrix A.
3411 * lu7elm should not be called unless m is greater than nrank.
3412 *
3413 * 09 May 1988: First f77 version.
3414 * No longer calls lu7for at end. lu8rpc, lu8mod do so.
3415 * ------------------------------------------------------------------
3416
3417 intrinsic abs
3418 parameter ( zero = 0.0d+0 )
3419
3420 small = parmlu(3)
3421 nrank1 = nrank + 1
3422 diag = zero
3423
3424 * Compress row file if necessary.
3425
3426 minfre = m - nrank
3427 nfree = lena - lenl - lrow
3428 if (nfree .ge. minfre) go to 100
3429 call lu1rec( m, .true., luparm, lrow, lena, a, indr, lenr, locr )
3430 nfree = lena - lenl - lrow
3431 if (nfree .lt. minfre) go to 970
3432
3433 * Pack the subdiagonals of v into L, and find the largest.
3434
3435 100 vmax = zero
3436 kmax = 0
3437 l = lena - lenl + 1
3438
3439 do 200 k = nrank1, m
3440 i = ip(k)
3441 vi = abs( v(i) )
3442 if (vi .le. small) go to 200
3443 l = l - 1
3444 a(l) = v(i)
3445 indc(l) = i
3446 if (vmax .ge. vi ) go to 200
3447 vmax = vi
3448 kmax = k
3449 lmax = l
3450 200 continue
3451
3452 if (kmax .eq. 0) go to 900
3453
3454 * ------------------------------------------------------------------
3455 * Remove vmax by overwriting it with the last packed v(i).
3456 * Then set the multipliers in L for the other elements.
3457 * ------------------------------------------------------------------