/[ascend]/trunk/ascend4/solver/rankiba2.c
ViewVC logotype

Annotation of /trunk/ascend4/solver/rankiba2.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations) (download) (as text)
Fri Oct 29 20:54:12 2004 UTC (17 years, 8 months ago) by aw0a
File MIME type: text/x-csrc
File size: 39443 byte(s)
Setting up web subdirectory in repository
1 aw0a 1 /*
2     * ranki_ba2 implementation functions.
3     * By: Benjamin Andrew Allan
4     * Created: 11/96
5     * Copyright 1996 Benjamin A. Allan
6     *
7     * Version: $Revision: 1.5 $
8     * Version control file: $RCSfile: rankiba2.c,v $
9     * Date last modified: $Date: 1997/07/18 12:15:24 $
10     * Last modified by: $Author: mthomas $
11     *
12     * The functions and algorithms implemented here should be exactly numerically
13     * equivalent to ranki_kw2, only the data structures for pivots and
14     * elimination differing.
15     * Here we will work in _really_ sparse elimination and a linked list
16     * representation of the diagonal until factorization is finished.
17     * This beast (with good reordering) should be competitive with ma28,
18     * even though it uses mtx which is much more flexible.
19     * It should beat the living daylights out of lu1sol in application,
20     * given that lu1sol can't solve multiple rhs and this will.
21     *
22     * The fundamental notion that makes this factorization work is that
23     * in matrix algebra and LU in particular
24     * permutation is really irrelevant EXCEPT
25     * when executing the triangular backsolve (or equivalent) during
26     * the elimination of stuff under the diagonal.
27     * The already pivoted rows must be used in the correct sequence, and
28     * the nonzeros to be eliminated must be eliminated in the order that
29     * introduces no fill after elimination.
30     * Since we can use fancy data structures to track the absolute minimum
31     * of sequencing information, we can in fact do all the real work
32     * based on any consistent column/row indexing. The most convenient,
33     * and the one used is the org_ index.
34     * No sorting happens until the very latest time it is needed,
35     * which insures that only the minimum required sorting ever happens.
36     * Dual access to the nonzeros (by orgcol and by linked list) makes
37     * things really fast at a cost of memory = k*sys->capacity ints.
38     * k is a small 2 digit integer.
39     */
40    
41    
42     #ifndef RBADEBUG
43     #define RBADEBUG 0
44     #endif
45     /* turns on more spew than you can possibly imagine.
46     * has user (ballan) hardcoded file name dependencies.
47     * Once the code is proven to work as kw2 on singular
48     * systems, all the RBA stuff should be stripped.
49     */
50    
51     /* LINQR_DROP_TOLERANCE defined in main. */
52    
53     #define EMPTY -1
54     /* the end of lists of pivots (possibly rejected)
55     * or of nonzeros in elimination.
56     * any nzdata with an orgcol of EMPTY is not
57     * to be eliminated because it is not real.
58     */
59    
60     #define INSERTED(a) ((a).orgcol >= 0)
61     /* tell if an element a is active.
62     * works for nzlistentries and pivlistentry.
63     */
64    
65     struct NZListEntry {
66     int32 leftnz; /* link to next nz to the left in the imaginary row. */
67     int32 rightnz; /* link to nz to the right in the imaginary row. */
68     /* left and right nz links are the orgcols, or EMPTY */
69     int32 pseudo_curcol; /* pseudo column number. = actual curcol right of pivot,
70     * = calculated from relative_location in elimination
71     * region left of pivot, and would be actual curcol
72     * if this was the older rankikw implementations.
73     */
74     int32 orgcol; /* org col of this nz. if <0, element not inserted. */
75     /*
76     * Efficiency note:
77     * at the present time (11/5/96) the code uses (extensively in debug mode)
78     * EMPTY to denote the beginning and ending of nz lists.
79     * This is unnecessarily wasteful and should be #if'd out in
80     * favor counters which keep track of the list lengths. then
81     * all that is needed to print the list is the head (or tail) and
82     * the expected length.
83     */
84     };
85     /* data structure for tracking nonzeros in the row being eliminated. */
86    
87     struct PivListEntry {
88     int32 orgrow; /* org row of pivot for this orgcol */
89     int32 prev; /* org col of "prev" pivot. prev -> toward upper left */
90     int32 next; /* org col of "next" pivot. next -> toward lower right */
91     int32 relative_location;
92     /* DEFAULT should be INT_MIN, which signals the org_col was
93     * never used or attempted as a pivot.
94     * For accepted pivots:
95     * relative_location is the location in the final diagonal
96     * we will obtain relative to the first accepted pivot.
97     * The first accepted pivot has relative_location 0.
98     * Pivots picked up just marching down the diagonal have
99     * increasingly negative relative_location.
100     * Pivots picked up after an elimination call will have
101     * increasingly positive relative_location since they are
102     * "dragged" to the upper left in the ranki scheme.
103     * The whole point of this list data structure choice is to eliminate
104     * the explicit data vector rotation that kills ranki_kw and jz on
105     * larger problems.
106     *
107     * For reject pivots:
108     * The rejected rows/cols accumulate at the bottom-right of the
109     * "diagonal." Since they aren't really used, the relative_location
110     * is meaningless, therefore it is DIAG_MIN just so we can distinguish
111     * between rejects and never-touched cols.
112     */
113     };
114     /* depending on efficiency, we may want to break up the pivlistentry
115     * type into simple a bunch of parallel integer vectors.
116     * As is, it makes the memory management easy and keeps most
117     * accesses relatively page local.
118     */
119    
120     #define DIAG_MIN (INT_MIN+10)
121     /*
122     * Define a gap of 10 for good measure between what we consider
123     * 'lower-rightmost' and the most negative 32 bit integer.
124     */
125    
126     #define EDSC 1234567890
127     #define EDEC 987654321
128     static
129     struct elimination_data {
130     long startcheck;
131     struct PivListEntry *pivdata; /* org col indexed */
132     struct NZListEntry *nzdata; /* org col indexed */
133     size_t capacity; /* total entries allocated in pivdata */
134     size_t length; /* entries currently in use (possibly) */
135     long endcheck;
136     } g_ba_elimdata = {EDSC,
137     NULL, NULL, (size_t)0, (size_t)0,
138     EDEC};
139     /*
140     * This provides a workspace for the pivot list while it is being
141     * constructed during factorization. The contents of this
142     * data space are indeterminate between calls to rankiba2_factor,
143     * particularly if a floating point exception occurred.
144     * pvidata is an array that is indexed by org_col of the matrix
145     * being factored.
146     * The reused nzdata space is also here. Between eliminations,
147     * nzdata has certain expectation values that we maintain.
148     * Between factorizations, nzdata content is undefined due to
149     * possibility of floating point exceptions.
150     */
151    
152     /*
153     * Conceptually we are growing a pivot list during factoring:
154     * eliminated & dragged pivots <- (first pivot) -> simply taken pivots
155     * where (first pivot) has a relative location 0.
156     * The pivot list is reduced to a vector only at the end of factorization,
157     * for easy use by the triangular solution functions.
158     * The matrix is not permuted to its final form until the factorization is
159     * completed since we are doing everything off of the pivot list and
160     * org row/col indices.
161     * The yet to be pivoted columns and rows WILL be permuted to keep the
162     * pivot search and rank defect handling the same.
163     */
164    
165     #if RBADEBUG
166     FILE *gscr = NULL;
167     #endif
168     /* file ptr for debugging */
169    
170     /*
171     * Allocates g_ba_elimdata and members to the size given.
172     * If size given is 0, frees any allocated memory.
173     * If init !=0, will do any appropriate initializations.
174     * Note that g_ba_elimdata is a memory recycle structure,
175     * so that it only grows in size, it never shrinks because
176     * someone is likely to come along and ask it to be big
177     * again. If memory is at a premium, it might pay to
178     * call reset periodically instead of only at shutdown.
179     *
180     * Returns 0 if successful.
181     * Returns 1 if insufficient memory.
182     * If memory corruption is detected, we will reinit from
183     * scratch, possibly discarding memory, and return 0 after
184     * whining about the error.
185     */
186     static
187     int reset_elimination_data(int32 size, int init)
188     {
189     int i;
190     struct NZListEntry *tmpn = NULL;
191     struct PivListEntry *tmpp = NULL;
192    
193     /* check for errors */
194     if (g_ba_elimdata.startcheck != EDSC || g_ba_elimdata.endcheck != EDEC ) {
195     FPRINTF(stderr,"g_ba_elimdata corrupted! Reset may leak memory\n");
196     g_ba_elimdata.pivdata = NULL;
197     g_ba_elimdata.nzdata = NULL;
198     g_ba_elimdata.capacity = (size_t)0;
199     g_ba_elimdata.length = (size_t)0;
200     g_ba_elimdata.startcheck = EDSC;
201     g_ba_elimdata.endcheck = EDEC;
202     }
203     /* check for free */
204     if (size == (size_t)0) {
205     if (g_ba_elimdata.capacity != (size_t)0) {
206     if (g_ba_elimdata.nzdata != NULL) {
207     ascfree(g_ba_elimdata.nzdata);
208     }
209     if (g_ba_elimdata.pivdata != NULL) {
210     ascfree(g_ba_elimdata.pivdata);
211     }
212     g_ba_elimdata.nzdata = NULL;
213     g_ba_elimdata.pivdata = NULL;
214     g_ba_elimdata.capacity = (size_t)0;
215     g_ba_elimdata.length = (size_t)0;
216     }
217     return 0;
218     }
219     /* check for reuse/expansion */
220     if ((size_t)size <= g_ba_elimdata.capacity) {
221     g_ba_elimdata.length = size;
222     } else {
223     /* expand both */
224     if (g_ba_elimdata.capacity == (size_t)0) {
225     tmpp = (struct PivListEntry *)ascmalloc(size*sizeof(struct PivListEntry));
226     tmpn = (struct NZListEntry *)ascmalloc(size*sizeof(struct NZListEntry));
227     } else {
228     tmpp = (struct PivListEntry *)
229     ascrealloc(g_ba_elimdata.pivdata,size*sizeof(struct PivListEntry));
230     tmpn = (struct NZListEntry *)
231     ascrealloc(g_ba_elimdata.nzdata,size*sizeof(struct NZListEntry));
232     }
233     if (tmpp==NULL || tmpn==NULL) {
234     return 1;
235     } else {
236     g_ba_elimdata.pivdata = tmpp;
237     g_ba_elimdata.nzdata = tmpn;
238     g_ba_elimdata.length = size;
239     g_ba_elimdata.capacity = size;
240     }
241     }
242     if (init) {
243     for (i = 0; i <size; i++) {
244     g_ba_elimdata.pivdata[i].relative_location = INT_MIN;
245     g_ba_elimdata.nzdata[i].orgcol = EMPTY;
246     }
247     }
248     return 0;
249     }
250    
251     /*
252     * Assumes nothing about leftorg.
253     * Returns orgcol of leftmost element.
254     * newi and orgcol must correspond to an element
255     * definitely NOT already in the elimination nzlist.
256     */
257     static
258     int32 rankiba_firstinsert_nz(int32 leftorg,
259     int32 newi,
260     int32 orgcol,
261     struct NZListEntry *nzdata,
262     int32 *rightorg)
263     {
264     int32 pcc,searchcol;
265    
266     nzdata[orgcol].pseudo_curcol = newi;
267     nzdata[orgcol].orgcol = orgcol;
268    
269     if (leftorg >= 0) {
270     /* not first element */
271     pcc = nzdata[leftorg].pseudo_curcol;
272     #if RBADEBUG
273     assert(pcc!=newi);
274     #endif
275     if (pcc > newi) {
276     /* newi is new left edge. right edge unchanged */
277     #if RBADEBUG
278     assert(nzdata[leftorg].leftnz == EMPTY);
279     #endif
280     nzdata[leftorg].leftnz = orgcol;
281     nzdata[orgcol].rightnz = leftorg;
282     nzdata[orgcol].leftnz = EMPTY;
283     return orgcol;
284     } else {
285     /* hunt right for insertion point */
286     searchcol = leftorg;
287     while (searchcol != EMPTY &&
288     (pcc = nzdata[searchcol].pseudo_curcol) < newi) {
289     searchcol = nzdata[searchcol].rightnz;
290     }
291    
292     #if RBADEBUG
293     assert(pcc!=newi);
294     #endif
295     if (searchcol == EMPTY) {
296     /* newi is new right edge. left edge unchanged */
297     #if RBADEBUG
298     assert( nzdata[*rightorg].rightnz == EMPTY);
299     #endif
300     nzdata[*rightorg].rightnz = orgcol;
301     nzdata[orgcol].leftnz = *rightorg;
302     nzdata[orgcol].rightnz = EMPTY;
303     *rightorg = orgcol;
304     } else {
305     /* search col is now the element to the right of
306     * where newi should be inserted.
307     */
308     /* point new element at neighbors */
309     nzdata[orgcol].rightnz = searchcol;
310     nzdata[orgcol].leftnz = nzdata[searchcol].leftnz;
311     /* repoint neighbors */
312     nzdata[searchcol].leftnz = orgcol;
313     nzdata[nzdata[orgcol].leftnz].rightnz = orgcol;
314     }
315     return leftorg; /* unchanged left */
316     }
317     #ifndef NDEBUG
318     Asc_Panic(2, "rankiba_firstinsert_nz", "fix me");
319     abort(); /* NOTREACHED */
320     #endif
321     } else {
322     /* first element in elimination section. */
323     /* the nzlinks will be -1 still, though perhaps we
324     * could avoid the init task by setting them here.
325     */
326     nzdata[orgcol].leftnz = EMPTY;
327     nzdata[orgcol].rightnz = EMPTY;
328     *rightorg = orgcol;
329     return orgcol;
330     }
331     }
332    
333     #if RBADEBUG
334     /* checks that we haven't already added it */
335     static
336     int onright(int32 addtail, int32 orgcol, struct NZListEntry *nzdata)
337     {
338     while (addtail!=EMPTY) {
339     if (nzdata[addtail].orgcol == orgcol) return 1;
340     addtail = nzdata[addtail].rightnz;
341     }
342     return 0;
343     }
344     /*
345     * write, starting on left and moving right, an nzlist.
346     * assumes EMPTY is used to terminate lists.
347     */
348     static
349     void right_nzlist(FILE *fp, struct NZListEntry *nzd, int32 oc, real64 *d)
350     {
351     assert(fp!=NULL && nzd != NULL);
352     while (oc!= EMPTY) {
353     FPRINTF(fp,"nz[%d]: lnz %4d, rnz %4d, pcc %4d, oc %4d %12.8g\n",
354     oc,nzd[oc].leftnz,nzd[oc].rightnz,nzd[oc].pseudo_curcol,nzd[oc].orgcol,
355     d[oc]);
356     oc = nzd[oc].rightnz;
357     }
358     }
359     /*
360     * write, starting on left and moving right, an nzlist.
361     * assumes EMPTY is used to terminate lists.
362     */
363     static
364     void right_pivlist(FILE *fp, struct PivListEntry *nzd, int32 oc)
365     {
366     assert(fp!=NULL && nzd != NULL);
367     while (oc!= EMPTY) {
368     FPRINTF(fp,"piv[%d]: prev %d, next %d, orow %d, relloc %d\n",
369     oc,nzd[oc].prev,nzd[oc].next,nzd[oc].orgrow,nzd[oc].relative_location);
370     oc = nzd[oc].next;
371     if (oc == nzd[oc].next) {
372     FPRINTF(fp,"right_pivlist miscalled\n");
373     break;
374     }
375     }
376     }
377     #endif
378    
379     /*
380     * assumes leftorg > 0.
381     * Further assumes that we will never try to add anything on the
382     * right edge of the list and will die if we do.
383     * Returns orgcol of leftmost element.
384     * newi and orgcol must correspond to an element
385     * definitely NOT already in the elimination nzlist.
386     */
387     static
388     int32 rankiba_insert_nz(int32 leftorg,
389     int32 newi,
390     int32 orgcol,
391     struct NZListEntry *nzdata)
392     {
393     int32 pcc,searchcol;
394    
395     #if RBADEBUG
396     assert(!onright(leftorg,orgcol,nzdata));
397     #endif
398     nzdata[orgcol].pseudo_curcol = newi;
399     nzdata[orgcol].orgcol = orgcol;
400    
401     pcc = nzdata[leftorg].pseudo_curcol;
402     #if RBADEBUG
403     assert(pcc!=newi);
404     #endif
405     if (pcc > newi) {
406     /* newi is new left edge. right edge unchanged */
407     #if RBADEBUG
408     assert(nzdata[leftorg].leftnz == EMPTY);
409     #endif
410     nzdata[leftorg].leftnz = orgcol;
411     nzdata[orgcol].rightnz = leftorg;
412     nzdata[orgcol].leftnz = EMPTY;
413     return orgcol;
414     } else {
415     /* hunt right for insertion point. we must find it before
416     * reaching the end of the row (per assumptions) or we
417     * eventually will seg. fault by reading outside nzdata.
418     */
419     searchcol = leftorg;
420     while ( pcc < newi) {
421     searchcol = nzdata[searchcol].rightnz;
422     #if RBADEBUG
423     assert(searchcol!=EMPTY);
424     #endif
425     pcc = nzdata[searchcol].pseudo_curcol;
426     }
427     #if RBADEBUG
428     assert(pcc!=newi); /* shouldn't call this if nz already there */
429     assert(searchcol != EMPTY);
430     #endif
431     /* search col is now the element to the right of
432     * where newi should be inserted.
433     */
434     /* point new element at neighbors */
435     nzdata[orgcol].rightnz = searchcol;
436     nzdata[orgcol].leftnz = nzdata[searchcol].leftnz;
437     /* repoint neighbors */
438     nzdata[nzdata[orgcol].leftnz].rightnz = orgcol;
439     nzdata[searchcol].leftnz = orgcol;
440     return leftorg; /* unchanged left */
441     }
442     }
443    
444     /* We insert (lifo) the rightside element at the left of the
445     * rightside nzlist
446     * assuming the element is not there. We do not assume any
447     * element IS there.
448     * User should set nzdata[orgcol].pseudo_curcol before calling
449     * by using SETPCCRIGHT.
450     */
451     static
452     int32 rankiba_addright_nz(int32 addtail,
453     int32 orgcol,
454     struct NZListEntry *nzdata
455     )
456     {
457     nzdata[orgcol].orgcol = orgcol;
458     if (addtail != EMPTY) {
459     /* link existing tail to new element */
460     #if RBADEBUG
461     assert(nzdata[addtail].leftnz == EMPTY);
462     assert(!onright(addtail,orgcol,nzdata));
463     #endif
464     nzdata[addtail].leftnz = orgcol;
465     }
466     nzdata[orgcol].leftnz = EMPTY; /* always adding to left */
467     nzdata[orgcol].rightnz = addtail; /* works even for addtail = EMPTY */
468     return orgcol;
469     }
470    
471     #define RELLOC(pd,ocol) ((pd)[(ocol)].relative_location)
472     /* returns the relative location (in pivot list pd) given the orgcol.
473     * Columns not yet IN pivot list will have RELLOC = INT_MIN.
474     * Columns rejected from pivot list will have RELLOC = DIAG_MIN.
475     * orgcol must be between 0 and sys->capacity.
476     */
477    
478     /*
479     * eliminate_row2_ba does no permutation of any sort.
480     * sp, pdata,nzdata should be of size sys->capacity.
481     * pdata and nzdata are assumed to be orgcol indexed
482     * as are tmp and pivots.
483     * On normal exits (no floating point exceptions)
484     * all nzdata entries will have orgcol == EMPTY,
485     * as it was supposed to on entry.
486     * On entry, must have row > rng->low.
487     */
488     #if RBADEBUG
489     static int dbg =0;
490     static
491     int check_nzdata( struct NZListEntry *nzd,int len)
492     {
493     int i,errs=0;
494     for (i=0;i<len;i++) {
495     if( nzd[i].orgcol != EMPTY) {
496     errs++;
497     FPRINTF(stderr,"foo at %d: l=%d r=%d p=%d o=%d\n",i,
498     nzd[i].leftnz,nzd[i].rightnz,nzd[i].pseudo_curcol,nzd[i].orgcol);
499     nzd[i].orgcol=EMPTY;
500     }
501     }
502     return errs;
503     }
504     #endif
505    
506     static
507     void eliminate_row2_ba( mtx_matrix_t mtx,
508     mtx_matrix_t upper_mtx,
509     const mtx_range_t *rng,
510     const int32 row, /* row to eliminate */
511     real64 *tmp, /* temporary array */
512     real64 *pivots, /* prior pivots array */
513     const real64 dtol, /* drop tolerance */
514     const int32 tail_relloc,/* not used if row = rng->low */
515     mtx_sparse_t *sp, /* scratch space */
516     struct PivListEntry *pdata,
517     struct NZListEntry *nzdata
518     )
519     {
520    
521     #define PCCLEFT(rloc) (low + (tail_relloc - (rloc)))
522     /* Returns the pseudo curcol index for a column given that
523     * a) it has already been pivotted,
524     * b) rloc is the RELLOC of its orgcol index.
525     * low and tail_relloc are variables declared/passed in this
526     * function.
527     */
528     #define SETPCCRIGHT(nzd,m,ocol) \
529     ((nzd)[(ocol)].pseudo_curcol = mtx_org_to_col((m),(ocol)))
530     /* Returns the pseudo curcol index for a column given that
531     * a) it has NOT been pivotted,
532     * b) ocol is the orgcol index.
533     * m is the matrix and nzd is the nzdata.
534     * As is obvious, in right side columns, pseudo and actual
535     * cur cols are the same.
536     */
537    
538     /* new */
539     mtx_coord_t org;
540     const real64 *data; /* tmp to help out the optimizer */
541     int32 i,k,len,newi;
542     int32 elimwithrow; /* orgcol of next nz to eliminate. from it get row */
543     int32 useorgrow; /* orgrow of row to add next during elimination */
544     int32 left = EMPTY; /* orgcol of leftmost (in pcc coord) nz to eliminate */
545     int32 right = EMPTY; /* orgcol of rightmost (in pcc coord) nz to eliminate */
546     int32 addtail = EMPTY; /* orgcol of head of rightside collist.
547     * since order of the columns right of origin
548     * matters not, it's a lifo list. */
549    
550     /* possibly helpful picture : (N is a nonzero)
551     * row to be eliminated might look initially like:
552     * |---------------N--------N---------||---N-------N------------|
553     * low(pcc) left(org) right ^ addtail high(pcc)
554     * leftpcc (org) origin
555     * as fill is created, left may move leftward.
556     * right, once established doesn't change by row ops.
557     * all N from low to origin will be eliminated.
558     */
559     /* old */
560     int32 j,low,high;
561     double tmpval;
562    
563     /* pseudo curcol limits */
564     low = rng->low; /* don't change this. pccleft needs it */
565     high = rng->high;
566    
567     #if RBADEBUG
568     FPRINTF(stderr,"Elim Row: %d\n",row);
569     if (check_nzdata(nzdata,mtx_order(mtx))) FPRINTF(stderr,"dirty data1\n");
570     /*
571     if (mtx_row_to_org(mtx,row) == 844 ||
572     mtx_row_to_org(mtx,row) == 913
573     ) dbg=1;
574     */
575     #endif
576     /*
577     * Move all non-zeros from current row to full array.
578     * All operations are done over mtx_ALL_COLS.
579     *
580     * Note: because of an addrow over mtx_ALL_COLS, entries
581     * of tmp outside rng may have nonzeros put in them if
582     * sys->factors has nonzeros in the outlying columns.
583     * If enough of these pile up out there during elimination
584     * of a block and the compiler treats double overflow as
585     * a signalable error, we will have a floating point
586     * exception.
587     * Currently sys->factors is a copy of a region from
588     * the sys->coef matrix, so we should not have anything
589     * out there to pile up. If we make a variant which
590     * does not copy the coef matrix but uses it directly,
591     * this code needs to be revisited.
592     */
593     /* get nonzeros in the row to be eliminated
594     * and expand row into nzdata structure
595     * Since we're working from sparsity patterns exclusively,
596     * initialization of tmp doesn't matter.
597     */
598     mtx_steal_org_row_sparse(mtx,row,sp,mtx_ALL_COLS);
599    
600     len = sp->len;
601     for (i=0; i < len; i++) {
602     /* the mtx sparse steal does not copy 0s, so don't check. */
603     /* maybe use the droptol here. */
604     j = sp->idata[i]; /* j is orgcol */
605     tmp[j] = sp->data[i];
606     k = RELLOC(pdata,j); /* k is pivot list relative location */
607     if (k > DIAG_MIN) {
608     newi = PCCLEFT(k);
609     left = rankiba_firstinsert_nz(left, newi, j, nzdata, &right);
610     } else {
611     SETPCCRIGHT(nzdata,mtx,j);
612     addtail = rankiba_addright_nz(addtail, j, nzdata);
613     }
614     }
615     /* done with sp at this point. will use again later. */
616    
617     if (left == EMPTY) {
618     /* punt. there's nothing to be done.
619     * clean up data structures and get out. only addtail might be dirty.
620     */
621     org.row = mtx_row_to_org(mtx,row);
622     #if (!RBADEBUG)
623     /* standard L refill method */
624     for (j = addtail; j != EMPTY; j = nzdata[j].rightnz) {
625     /* don't fill on cancellations */
626     if (fabs(tmp[j]) > dtol) {
627     org.col = j;
628     mtx_fill_org_value(mtx,&org,tmp[j]);
629     }
630     nzdata[j].orgcol = EMPTY;
631     }
632     #else
633     /* gross comparable, debuggable L refill method */
634     /* a nasty nasty loop to put stuff back in curcol order so we can do
635     * direct comparisons by diffing files.
636     */
637     {
638     int32 maxcurcol, tmpmax,maxold;
639    
640     if (addtail == EMPTY) {
641     if (check_nzdata(nzdata,mtx_order(mtx))) {
642     FPRINTF(stderr,"dirty data2\n");
643     }
644     return; /*nothing to do */
645     }
646     maxold = INT_MAX;
647     while (1) {
648     maxcurcol = -1;
649     for (j = addtail; j != EMPTY; j = nzdata[j].rightnz) {
650     tmpmax = mtx_org_to_col(mtx,j);
651     if (tmpmax > maxcurcol && tmpmax < maxold) {
652     maxcurcol = tmpmax;
653     }
654     }
655     if (maxcurcol == -1) {
656     /* everything printed. exit loop */
657     break;
658     }
659     maxold = maxcurcol;
660     j = mtx_col_to_org(mtx,maxcurcol);
661    
662     /* don't fill on cancellations */
663     if (fabs(tmp[j]) > dtol) {
664     org.col = j;
665     mtx_fill_org_value(mtx,&org,tmp[j]);
666     FPRINTF(gscr,"fillingL: or %d oc %d %24.18g\n",
667     org.row,org.col,tmp[j]);
668     }
669     nzdata[j].orgcol = EMPTY;
670     }
671     }
672     #endif
673    
674     #if (RBADEBUG)
675     if (check_nzdata(nzdata,mtx_order(mtx))) {
676     FPRINTF(stderr,"dirty data3\n");
677     }
678     #endif
679     return;
680     }
681     /* now we can safely assume there is at least one nonzero in the
682     * elimination section of the row.
683     */
684     /*
685     * Eliminates nzs from pivot, one by one, filling tmp with multipliers
686     * We now operate on the entire row, since we are not writing the
687     * multipliers back to the matrix.
688     */
689     for (elimwithrow = right ;
690     elimwithrow != EMPTY ;
691     elimwithrow = nzdata[elimwithrow].leftnz ) {
692     if (tmp[elimwithrow] != D_ZERO) {
693     /* Something to do for this row. things not cancelled */
694     useorgrow = pdata[elimwithrow].orgrow;
695     tmpval = tmp[elimwithrow] / pivots[elimwithrow]; /* Compute multiplier */
696    
697     #if RBADEBUG
698     if (dbg) {
699     FPRINTF(stderr,"elimwithrow %d useorgrow %d tmpval %24.18g\n",
700     elimwithrow,useorgrow,tmpval);
701     }
702     #endif
703    
704     /*
705     * tmpval is now the multiplier. We use it to eliminate the row,
706     * but backpatch it to tmp, *after* we do the elimination, as
707     * adding row over all columns will stomp on tmp[elimwithrow]
708     * because we don't want to pay for the if test that would
709     * avoid stomping tmp[elimwithrow].
710     */
711     /* was:
712     * mtx_cur_vec_add_row(mtx,tmp,j,-tmpval,mtx_ALL_COLS,FALSE);
713     */
714     mtx_org_row_sparse(mtx,
715     mtx_org_to_row(mtx,useorgrow),
716     sp,mtx_ALL_COLS,mtx_IGNORE_ZEROES);
717     data = sp->data;
718     len = sp->len;
719     for (i=0; i < len; i++) {
720     /* the mtx sparse fetch does not copy 0s, so don't check. */
721     /* Maybe use the droptol here?
722     * droptol here would amount to clearing epsilons from A
723     * before factoring.
724     */
725     j = sp->idata[i];
726     if (INSERTED(nzdata[j])) {
727     /* add on top of existing stuff */
728     tmp[j] -= tmpval * data[i];
729     #if RBADEBUG
730     if (dbg) {
731     FPRINTF(stderr,"accum: ");
732     }
733     #endif
734     } else {
735     /* create fill */
736     k = RELLOC(pdata,j);
737     tmp[j] = -tmpval * data[i];
738     if ( k > DIAG_MIN ) {
739     /* stuff in elimination region. insert fill to be eliminated */
740     newi = PCCLEFT(k);
741     left = rankiba_insert_nz(left, newi, j, nzdata);
742     #if RBADEBUG
743     if (dbg) {
744     FPRINTF(stderr,"2elim: ");
745     }
746     #endif
747     } else {
748     /* insert new potential pivot */
749     SETPCCRIGHT(nzdata,mtx,j);
750     addtail = rankiba_addright_nz(addtail, j, nzdata);
751     #if RBADEBUG
752     if (dbg) {
753     FPRINTF(stderr,"right: ");
754     }
755     #endif
756     }
757     }
758     #if RBADEBUG
759     if (dbg) {
760     FPRINTF(stderr,"tmp[%d] = %12.7g\n",j,tmp[j]);
761     }
762     #endif
763     }
764     #if RBADEBUG
765     if (dbg) {
766     right_nzlist(stderr,nzdata,left,tmp);
767     }
768     #endif
769     tmp[elimwithrow] = tmpval; /* patch the diagonal */
770     }
771     }
772     #if RBADEBUG
773     if (dbg) {
774     dbg=0;
775     }
776     #endif
777    
778     /*
779     * Fill up the upper triangular matrix.
780     * refill the pcc range [low .. row-1].
781     * OLD:
782     * Remember that we have to zero all nnz's in tmp, effectively.
783     */
784     org.row = mtx_row_to_org(mtx,row);
785     for (j = left; j != EMPTY; j = nzdata[j].rightnz) {
786     if (tmp[j] != D_ZERO) {
787     org.col = j;
788     mtx_fill_org_value(upper_mtx,&org,tmp[j]);
789    
790     #if (RBADEBUG)
791     FPRINTF(gscr,"fillingU: or %d oc %d (cc %d) %24.18g\n",
792     org.row,org.col,
793     PCCLEFT(RELLOC(pdata,j)),tmp[j]);
794     #endif
795     }
796     nzdata[j].orgcol = EMPTY;
797     #if (RBADEBUG)
798     FPRINTF(gscr,"nzdata[%4d] EMPTYd\n",j);
799     #endif
800     }
801     /*
802     * refill the range [row .. high]. We *wont* keep
803     * the diagonal as this is sorted out by the pivot
804     * array, but here we must keep it, sigh.
805     * At this stage we don't know that the diagonal is the
806     * pivot, as that is determined after elimination done.
807     * Outer loop must delete the ultimate pivot element.
808     * Odds are good, however, that it is the current diagonal,
809     * so put in the diagonal last.
810     */
811     #if (!RBADEBUG)
812     /* Standard refill L */
813     for (j=addtail; j != EMPTY; j = nzdata[j].rightnz) {
814     /* don't fill on cancellations */
815     if (fabs(tmp[j]) > dtol) {
816     org.col = j;
817     mtx_fill_org_value(mtx,&org,tmp[j]);
818     }
819     nzdata[j].orgcol = EMPTY;
820     }
821     #else
822     /* Nonstandard refill L comparable to rankikw via diff on output */
823     /* a nasty nasty loop to put stuff back in curcol order */
824     {
825     int32 maxcurcol, tmpmax,maxold;
826    
827     if (addtail == EMPTY) {
828     if (check_nzdata(nzdata,mtx_order(mtx))) {
829     FPRINTF(stderr,"dirty data4\n");
830     }
831     return; /*nothing to do */
832     }
833     maxold = INT_MAX;
834     while (1) {
835     maxcurcol = -1;
836     for (j = addtail; j != EMPTY; j = nzdata[j].rightnz) {
837     tmpmax = mtx_org_to_col(mtx,j);
838     /* bugs? */
839     nzdata[j].orgcol = EMPTY;
840     if (tmpmax > maxcurcol && tmpmax < maxold) {
841     maxcurcol = tmpmax;
842     }
843     }
844     if (maxcurcol == -1) {
845     /* everything printed. exit loop */
846     break;
847     }
848     maxold = maxcurcol;
849     j = mtx_col_to_org(mtx,maxcurcol);
850    
851     /* don't fill on cancellations */
852     if (fabs(tmp[j]) > dtol) {
853     org.col = j;
854     mtx_fill_org_value(mtx,&org,tmp[j]);
855     FPRINTF(gscr,"fillingL: or %d oc %d %24.18g\n",
856     org.row,org.col,tmp[j]);
857     }
858     nzdata[j].orgcol = EMPTY;
859     FPRINTF(gscr,"nzdata[%4d] EMPTYd\n",j);
860     }
861     }
862     #endif
863    
864     #if (RBADEBUG)
865     if (check_nzdata(nzdata,mtx_order(mtx))) {
866     FPRINTF(stderr,"dirty data5\n");
867     }
868     #endif
869     }
870    
871     /*
872     * This function is the same as rankikw2_factor except
873     * that it uses the 2 matrices, one to store L and one for U,
874     * with minimum permutation.
875     * As such it uses eliminate_row2_ba rather than eliminate_row2.
876     * We assume there is NO incidence outside the region to be factored
877     * in sys->factors.
878     *
879     * There is no rankiba_factor.
880     */
881    
882     static
883     void rankiba2_factor(linsolqr_system_t sys)
884     {
885    
886     /* old stuff */
887     mtx_coord_t nz, org;
888     mtx_matrix_t mtx; /* Acopy converted to L */
889     mtx_matrix_t upper_mtx; /* U */
890     int32 last_row; /* last row not yet factored (cur coords) */
891     int defect; /* rank defect encountered */
892     mtx_range_t pivot_candidates; /* columns (cur) to take pivot search over */
893     real64 *tmp; /* elimination work space */
894     real64 pivot, pivotsize; /* pivotsize = fabs(pivot) always */
895     real64 *pivots; /* pivot vector (cur) when all done, orgrow during */
896    
897     /* added */
898     int32 pivlisthead = EMPTY, pivlisttail = EMPTY, rejectpivtail = EMPTY;
899     /* org_col indices of current pivot data structure entry points. */
900     int32 oldhead, oldtail; /* org_col scratch indices */
901     struct PivListEntry *pdata;
902     struct NZListEntry *nzdata;
903     mtx_sparse_t *scratchrow = NULL;
904     int32 i,j,high;
905     int32 tail_relloc;
906    
907     /**
908     ** tmp and pivots are cur indexed, tmp by cols, pivots by rows.
909     ** rather than allocating tmp every time, which can get expensive,
910     ** we allocate it once *elsewhere* and zero it all here.
911     ** Because we don't know the state of it on entry (due to exceptions,
912     ** etc, the rezeroing is unavoidable.
913     ** Eliminate_row2 is responsible for returning the working region
914     ** zeroed.
915     **
916     ** Actually, during factorization, pivots is indexed by orgcol.
917     ** Afterwards we fix it up to cur row.
918     **
919     **
920     **/
921    
922     tmp = sys->ludata->tmp; /* uninitialized -- sparsity in charge */
923     scratchrow = mtx_create_sparse(sys->capacity);
924     if (scratchrow == NULL) {
925     FPRINTF(stderr,"rankiba2_factor: (%s) Insufficient memory (1)\n",__FILE__);
926     return;
927     }
928     pivots = sys->ludata->pivlist;
929     mtx = sys->factors;
930     upper_mtx = sys->inverse;
931     defect = 0;
932    
933     sys->smallest_pivot = DBL_MAX;
934     last_row = sys->rng.high; /* last_row decreases as singularity increases */
935     high = last_row; /* high stays constant from here */
936     pivot_candidates.high = sys->rng.high;
937    
938     if (reset_elimination_data(sys->capacity,1)) {
939     FPRINTF(stderr,"rankiba2_factor: (%s) Insufficient memory (2)\n",__FILE__);
940     mtx_destroy_sparse(scratchrow);
941     return;
942     }
943    
944     #if RBADEBUG
945     gscr = fopen("/usr1/ballan/tmp/lu/rba","w+");
946     #endif
947    
948     pdata = g_ba_elimdata.pivdata;
949     nzdata = g_ba_elimdata.nzdata;
950     assert(pdata!=NULL);
951     assert(nzdata!=NULL);
952    
953    
954     for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
955    
956     #if (RBADEBUG && 0)
957     mtx_write_region_human_orgrows(gscr,mtx,mtx_ENTIRE_MATRIX);
958     #endif
959    
960     pivot_candidates.low = nz.col = nz.row;
961     pivot = mtx_value(mtx,&nz);
962     pivotsize = fabs(pivot);
963    
964     if (pivotsize > sys->pivot_zero &&
965     pivotsize >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) &&
966     !col_is_a_spike(sys,nz.row) ) {
967     /* Good pivot and not a spike: continue with next row */
968     if( pivotsize < sys->smallest_pivot ) {
969     sys->smallest_pivot = pivotsize;
970     }
971     /* get unpermuted location of pivot */
972     org.col = mtx_col_to_org(mtx,nz.row);
973     org.row = mtx_row_to_org(mtx,nz.row);
974     /* insert pivot at list head */
975     if (pivlisthead >= 0) {
976     /* usual case - add to lower right */
977     oldhead = pivlisthead;
978     #if RBADEBUG
979     assert(pdata[oldhead].next == EMPTY);
980     #endif
981     pdata[oldhead].next = org.col;
982     pdata[org.col].prev = oldhead;
983     pdata[org.col].next = EMPTY; /* only for debug, really? */
984     pdata[org.col].orgrow = org.row;
985     pdata[org.col].relative_location = pdata[oldhead].relative_location - 1;
986     pivlisthead = org.col;
987     } else {
988     /* very first pivot case */
989     pdata[org.col].orgrow = org.row;
990     pdata[org.col].next = pdata[org.col].prev = EMPTY;
991     pivlisthead = pivlisttail = org.col;
992     pdata[org.col].relative_location = 0;
993     }
994     pivots[org.col] = pivot;
995     #if RBADEBUG
996     FPRINTF(gscr,"Cheap pivot col %d (org %d)\n",nz.row,org.col);
997     #endif
998     /* we will have to sort pivots back to cur order at end using tmp */
999     ++(nz.row);
1000     continue;
1001     }
1002     /* else we need to possibly eliminate and search for pivot */
1003    
1004     /* pivots for rows nz.row back to sys->rng.low are stored in pivots,
1005     * but now in the corresponding orgcol positions determined by pdata.
1006     */
1007     /**
1008     *** Row is a spike row or will
1009     *** be when a necessary column
1010     *** exchange occurs. Or maybe row is singular.
1011     **/
1012     if (nz.row == 176) {
1013     nz.row = 176;
1014     }
1015     if (pivlisttail != EMPTY) {
1016     tail_relloc = RELLOC(pdata,pivlisttail);
1017     eliminate_row2_ba(mtx,upper_mtx,&(sys->rng),nz.row,
1018     tmp,pivots,sys->dtol,tail_relloc,
1019     scratchrow,pdata,nzdata);
1020     }
1021     /* pivot will be leftmost of those that pass ptol and eps, or 0.0. */
1022     if (!defect) {
1023     /* no singular cols, and eliminate moved all left of diag crap out, so */
1024     pivotsize = mtx_get_pivot_col(mtx,&nz,mtx_ALL_COLS,
1025     &pivot,sys->ptol,sys->pivot_zero);
1026     } else {
1027     /* unfortunately, can't use mtx_ALL_COLS in search now because we must
1028     * ignore columns that were dragged out with previous singular rows.
1029     * The columns in pivot range have not been 'disordered' from
1030     * the sequence they would have in the old ranki implementations,
1031     * so this search will work and get nice close neighbor if
1032     * available.
1033     */
1034     pivotsize = mtx_get_pivot_col(mtx,&nz,&pivot_candidates,
1035     &pivot, sys->ptol,sys->pivot_zero);
1036     }
1037     #if RBADEBUG
1038     FPRINTF(gscr,"Pivot column found %d (org %d)\n",
1039     nz.col, mtx_col_to_org(mtx,nz.col));
1040     #endif
1041    
1042     if ( pivotsize < sys->pivot_zero ) { /* pivot is an epsilon */
1043     /*
1044     * Dependent row, drag to the end. The upper_mtx is a slave
1045     * of mtx, and will be dragged automatically.
1046     * If we expect to routinely handle bad matrices,
1047     * this needs to be reimplemented to work on linked list stuff,
1048     * in which case pivot selection gets hairy.
1049     */
1050     mtx_drag(mtx,nz.row,last_row);
1051     /* rejected rows and columns are still explicitly on the lower right */
1052     /* rejectpivtail
1053     * is the orgcol of the upper left most of rejected pivots (and
1054     * also the most recently rejected).
1055     */
1056     org.col = mtx_col_to_org(mtx,last_row);
1057     org.row = mtx_row_to_org(mtx,last_row);
1058     if (rejectpivtail == EMPTY) {
1059     pdata[org.col].next = pdata[org.col].prev = EMPTY;
1060     pdata[org.col].orgrow = org.row;
1061     pdata[org.col].relative_location = DIAG_MIN;
1062     } else {
1063     oldtail = rejectpivtail;
1064     #if RBADEBUG
1065     assert(pdata[oldtail].prev == EMPTY);
1066     #endif
1067     pdata[oldtail].prev = org.col;
1068     pdata[org.col].next = oldtail;
1069     pdata[org.col].prev = EMPTY;
1070     pdata[org.col].orgrow = org.row;
1071     pdata[org.col].relative_location = DIAG_MIN;
1072     }
1073     rejectpivtail = org.col;
1074     --last_row;
1075     defect = 1;
1076     } else {
1077     /* Independent row: nz contains selected pivot */
1078     /* Move pivot to diagonal */
1079     mtx_swap_cols(mtx,nz.row,nz.col); /* this Fixes U as well */
1080     /* hah! killed it! killed the quadratic drag! */
1081     /* instead, insert at upper left of linked list */
1082     org.col = mtx_col_to_org(mtx,nz.row);
1083     org.row = mtx_row_to_org(mtx,nz.row);
1084     if (pivlisttail != EMPTY) {
1085     oldtail = pivlisttail;
1086     #if RBADEBUG
1087     assert(pdata[oldtail].prev == EMPTY);
1088     #endif
1089     pdata[oldtail].prev = org.col;
1090     pdata[org.col].next = oldtail;
1091     pdata[org.col].prev = EMPTY;
1092     pdata[org.col].relative_location = pdata[oldtail].relative_location + 1;
1093     pdata[org.col].orgrow = org.row;
1094     pivlisttail = org.col;
1095     } else {
1096     /* very first pivot case */
1097     pdata[org.col].orgrow = org.row;
1098     pdata[org.col].next = pdata[org.col].prev = EMPTY;
1099     pivlisthead = pivlisttail = org.col;
1100     pdata[org.col].relative_location = 0;
1101     }
1102     if( pivotsize < sys->smallest_pivot ) {
1103     sys->smallest_pivot = pivotsize;
1104     }
1105     ++(nz.row);
1106     }
1107     pivots[org.col] = pivot;
1108     }
1109     /* done with math */
1110    
1111     /* need to repermute matrices, pivots here */
1112     /* rejects should already have a proper matrix location,
1113     * but we need to run down the diagonal and fix things.
1114     */
1115     i = sys->rng.low;
1116     /* assuming square region */
1117     for (j = pivlisttail; j != EMPTY; j = pdata[j].next) {
1118     /* fix row order */
1119     if (pdata[j].orgrow != mtx_row_to_org(mtx,i)) {
1120     mtx_swap_rows(mtx,i,mtx_org_to_row(mtx,pdata[j].orgrow));
1121     }
1122     /* fix row order. by construction, j = pdata[j].orgcol */
1123     if (j != mtx_col_to_org(mtx,i)) {
1124     mtx_swap_cols(mtx,i,mtx_org_to_col(mtx,j));
1125     }
1126     i++;
1127     }
1128     assert((i-1)==last_row);
1129     /* mtx ok. fix pivots. tmp is out of use, so copy from org pivots
1130     * to cur locations in tmp, and then copy back the tmp range
1131     * in a separate loop. pivots is initially in orgcol order.
1132     */
1133     for (j = pivlisttail; j != EMPTY; j = pdata[j].next) {
1134     /* fixing pivots */
1135     #if RBADEBUG
1136     assert( mtx_org_to_row(mtx,pdata[j].orgrow) >= sys->rng.low );
1137     assert( mtx_org_to_row(mtx,pdata[j].orgrow) <= last_row );
1138     #endif
1139     tmp[mtx_org_to_row(mtx,pdata[j].orgrow)] = pivots[j];
1140     }
1141     for (j = rejectpivtail; j != EMPTY; j = pdata[j].next) {
1142     /* fixing rejected pivots */
1143     #if RBADEBUG
1144     assert( mtx_org_to_row(mtx,pdata[j].orgrow) >last_row );
1145     assert( mtx_org_to_row(mtx,pdata[j].orgrow) <= sys->rng.high);
1146     #endif
1147     tmp[mtx_org_to_row(mtx,pdata[j].orgrow)] = pivots[j];
1148     }
1149     for (i = sys->rng.low; i <= high; i++) {
1150     pivots[i] = tmp[i];
1151     }
1152    
1153     #if (RBADEBUG)
1154     /* get rid of this */
1155     {
1156     FILE *fp;
1157     int32 cc;
1158     fp = fopen("/usr1/ballan/tmp/lu/ba2p","w+");
1159     FPRINTF(fp,"ba2 recorded pivot sequence:\n");
1160     for (cc= sys->rng.low; cc <= last_row; cc++) {
1161     FPRINTF(fp,"orgrow,orgcol = %d,%d,%24.18g\n",
1162     mtx_row_to_org(mtx,cc),
1163     mtx_col_to_org(mtx,cc), pivots[cc]);
1164     }
1165     fclose(fp);
1166     }
1167     #endif
1168    
1169     zero_diagonal_elements(mtx,sys->rng.low,sys->rng.high);
1170     sys->rank = last_row - sys->rng.low + 1;
1171     mtx_destroy_sparse(scratchrow);
1172     #if (RBADEBUG)
1173     {
1174     FILE *fp;
1175     fp = fopen("/usr1/ballan/tmp/lu/ba2m","w+");
1176     mtx_write_region_human_rows(fp,mtx,mtx_ENTIRE_MATRIX);
1177     fclose(fp);
1178     fp = fopen("/usr1/ballan/tmp/lu/ba2um","w+");
1179     mtx_write_region_human_rows(fp,upper_mtx,mtx_ENTIRE_MATRIX);
1180     fclose(fp);
1181     }
1182     #endif
1183     #if (RBADEBUG)
1184     fclose(gscr);
1185     #endif
1186     }
1187    

john.pye@anu.edu.au
ViewVC Help
Powered by ViewVC 1.1.22