/[ascend]/trunk/ascend4/solver/linsolqr.c
ViewVC logotype

Annotation of /trunk/ascend4/solver/linsolqr.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations) (download) (as text)
Fri Oct 29 20:54:12 2004 UTC (18 years, 7 months ago) by aw0a
File MIME type: text/x-csrc
File size: 168133 byte(s)
Setting up web subdirectory in repository
1 aw0a 1 /*
2     * linsol II: Ascend Sparse Linear Solvers
3     * by Benjamin Allan
4     * Created: 2/6/90
5     * Version: $Revision: 1.26 $
6     * Version control file: $RCSfile: linsolqr.c,v $
7     * Date last modified: $Date: 2000/01/25 02:26:58 $
8     * Last modified by: $Author: ballan $
9     *
10     * This file is part of the SLV solver.
11     *
12     * Copyright (C) 1994 Benjamin Andrew Allan
13     *
14     * Based on linsol:
15     * Copyright (C) 1990 Karl Michael Westerberg
16     * Copyright (C) 1993 Joseph Zaher
17     * Copyright (C) 1994 Joseph Zaher, Benjamin Andrew Allan
18     * and on sqr.pas v1.5:
19     * Copyright (C) 1994 Boyd Safrit
20     *
21     * The SLV solver is free software; you can redistribute
22     * it and/or modify it under the terms of the GNU General Public License as
23     * published by the Free Software Foundation; either version 2 of the
24     * License, or (at your option) any later version.
25     *
26     * The SLV solver is distributed in hope that it will be
27     * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
28     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29     * General Public License for more details.
30     *
31     * You should have received a copy of the GNU General Public License
32     * along with the program; if not, write to the Free Software Foundation,
33     * Inc., 675 Mass Ave, Cambridge, MA 02139 USA. Check the file named
34     * COPYING. COPYING is found in ../compiler.
35     */
36    
37     /*
38     * Implementation Notes:
39     * 9/95: there are many places in linsolqr.c where lower level matrix
40     * operations have been replaced by higher level ones. These used to
41     * be flagged as such so that people who wanted to construct newer
42     * operations from the lowest level mtx operators could see how to
43     * use the lowlevel operators. Since it has been decided that the old
44     * linsol will NOT be discarded completely (we like having easy
45     * benchmarks to look better than), the user wishing to code new
46     * operations is directed back to linsol to see how lowlevel operations
47     * can be expensively done.
48     *
49     * 10/95: (BAA) I attempted to optimize number_drag through use of
50     * pointer arithmetic. Testing the resulting code without optimization
51     * turned on gave about 20% improvement over the original on an alpha
52     * using the native compiler. However, with optimization on the array
53     * subscripted version which is left here won by ~40%.
54     * The results for gcc were similar: pointer 10% better unoptimized,
55     * subscripted 30% better when -O2.
56     * For both compilers -O3 was worse! gcc was ~1.8x slower than the
57     * native cc both optimized and not.
58     * I have not tested a loop unrolled drag, but I expect unrolling is
59     * exactly what the optimizer is doing, so why bother?
60     */
61    
62     #include <math.h>
63     #include <stdarg.h>
64     #include "utilities/ascConfig.h"
65     #include "utilities/ascMalloc.h"
66     #include "utilities/ascPanic.h"
67     #include "utilities/mem.h"
68     #include "utilities/set.h"
69     #include "general/tm_time.h"
70     #include "solver/mtx.h"
71     #include "solver/linsolqr.h"
72    
73     #ifdef __WIN32__
74     #define copysign _copysign
75     #endif /* __WIN32__ */
76    
77     #define LINSOLQR_DEBUG FALSE
78     /* qr code debugging flag */
79     #define LINSOL_DEBUG FALSE
80     /* linsol debugging flag */
81    
82     #define D_ZERO (real64)0.0
83     #define ZERO (int32)0
84     #define D_ONE (real64)1.0
85    
86     /* housekeeping flags */
87     #define BUILD_DEAD_CODE 0
88     #undef BUILD_KIRK_CODE
89    
90    
91     #define RBADEBUG 0
92     /* turns on spew that will generate output files
93     * comparable to and in the same location as for rankiba2
94     * user specific hardcoded pathnames are involved.
95     * don't try this at home.
96     */
97     #if RBADEBUG
98     extern FILE *gscr;
99     #endif
100    
101     /*
102     * timing messages disable flag.
103     */
104     int g_linsolqr_timing = 1;
105    
106     /***************************************************************************\
107     Data structures for linsol
108     Any time you modify these, please verify linsolqr_size is still correct.
109     \***************************************************************************/
110     struct rhs_list {
111     real64 *rhs; /* Vector of rhs values */
112     real64 *varvalue; /* Solution of the linear system */
113     boolean solved; /* ? has the rhs been solved */
114     boolean transpose; /* ? transpose the coef matrix */
115     struct rhs_list *next;
116     };
117    
118     struct qr_fill_t {
119     int32 cnt; /* number of real nonzeros in a column */
120     int32 col; /* column number prior to being sorted */
121     int32 fill; /* fill if this (or if col?) were the pivot column */
122     int32 overlap; /* scratch for fillable calculation. */
123     };
124     /* fill counting structure for sparse qr */
125    
126     struct lu_auxdata { /* all indexed by cur row or cur col number */
127     real64 *pivlist; /* vector of pivots (diagonal of L for ranki) */
128     real64 *tmp; /* row elimination, dependency buffer */
129     int32 cap; /* current capacity of the vectors if not null */
130     };
131     /* structure for lu algorithms */
132    
133     struct qr_auxdata { /* all indexed by cur row or cur col number */
134     mtx_matrix_t hhvects; /* slave matrix of sys->factors for holding u's */
135     mtx_sparse_t sp; /* sparse data space */
136     real64 *alpha; /* column norms for the Q\R factor matrix */
137     real64 *sigma; /* column norms for the S (R inverse) matrix */
138     real64 *tau; /* column tau of the Householder transforms */
139     real64 *hhcol; /* Householder transform scratch vector */
140     real64 *hhrow; /* Householder transform scratch vector */
141     real64 nu; /* current condition number of inverse */
142     real64 anu; /* condition number of coef */
143     real64 asubnu; /* condition number of active region in coef */
144     struct qr_fill_t *fill; /* fill counting structure */
145     mtx_region_t facreg; /* region over which qr is done if nonsquare */
146     int32 cap; /* current capacity of these vectors if not null */
147     };
148     /* structure for CondQR algorithms */
149    
150     struct linsolqr_header {
151     int integrity;
152     enum factor_class fclass; /* Type of factoring expected */
153     enum factor_method fmethod; /* Method factoring expected */
154     enum reorder_method rmethod; /* Type of most recent reordering */
155     int32 capacity; /* Capacity of arrays */
156     mtx_matrix_t coef; /* Coefficient matrix */
157     struct rhs_list *rl; /* List of rhs vectors */
158     int rlength; /* Length of rhs list */
159     real64 pivot_zero; /* Smallest acceptable pivot */
160     real64 ptol; /* Pivot selection tolerance */
161     real64 ctol; /* Condition selection tolerance */
162     real64 dtol; /* Matrix entry drop tolerance */
163     mtx_matrix_t factors; /* Matrix with UL and dependence info (ranki),
164     or L and dependence info (ranki2),
165     or R and dependence info (qr methods) */
166     mtx_matrix_t inverse; /* Matrix with U multipliers (ranki2) or R^-1
167     (condqr), in both cases slave of factors */
168     boolean factored; /* ? has the matrix been factored */
169     boolean rowdeps; /* ? have row dependencies been calc'ed */
170     boolean coldeps; /* ? have col dependencies been calc'ed */
171     mtx_range_t rng; /* Pivot range */
172     mtx_region_t reg; /* Bounding region given */
173     int32 rank; /* Rank of the matrix */
174     real64 smallest_pivot; /* Smallest pivot accepted */
175     struct qr_auxdata *qrdata; /* Data vectors for qr methods */
176     struct lu_auxdata *ludata; /* Data vectors for lu methods */
177     };
178     /* linsol main structure */
179    
180     /***************************************************************************\
181     string functions for linsol io
182     \***************************************************************************/
183    
184     char *linsolqr_rmethods() {
185     static char names[]="Natural, SPK1, TSPK1";
186     return names;
187     }
188    
189     char *linsolqr_fmethods() {
190     static char names[] =
191     "SPK1/RANKI,SPK1/RANKI+ROW,Fast-SPK1/RANKI,Fast-SPK1/RANKI+ROW,Fastest-SPK1/MR-RANKI,CondQR,CPQR";
192     return names;
193     }
194    
195     /** **/
196     enum reorder_method linsolqr_rmethod_to_enum(char *name) {
197     if (strcmp(name,"SPK1")==0) return spk1;
198     if (strcmp(name,"TSPK1")==0) return tspk1;
199     if (strcmp(name,"Natural")==0) return natural;
200     return unknown_r;
201     }
202    
203     enum factor_method linsolqr_fmethod_to_enum(char *name) {
204     if (strcmp(name,"KIRK-STUFF")==0) return ranki_ka;
205     if (strcmp(name,"SPK1/RANKI")==0) return ranki_kw;
206     if (strcmp(name,"SPK1/RANKI+ROW")==0) return ranki_jz;
207     if (strcmp(name,"Fast-SPK1/RANKI")==0) return ranki_kw2;
208     if (strcmp(name,"Fast-SPK1/RANKI+ROW")==0) return ranki_jz2;
209     if (strcmp(name,"Fastest-SPK1/MR-RANKI")==0) return ranki_ba2;
210     if (strcmp(name,"CondQR")==0) return cond_qr;
211     if (strcmp(name,"CPQR")==0) return plain_qr;
212     return unknown_f;
213     }
214    
215     enum factor_class linsolqr_fmethod_to_fclass(enum factor_method fm)
216     {
217     switch (fm) {
218     /* ranki-like things */
219     case ranki_kw:
220     case ranki_jz:
221     case ranki_ba2:
222     case ranki_kw2:
223     case ranki_jz2:
224     case ranki_ka:
225     return ranki;
226     /* implemented qr things */
227     case plain_qr:
228     return s_qr;
229     /* other stuff unimplemented. all fall through */
230     case cond_qr:
231     case opt_qr:
232     case ls_qr:
233     case gauss_ba2:
234     case symmetric_lu:
235     case unknown_f:
236     default:
237     return unknown_c;
238     }
239     }
240    
241     /** **/
242     char *linsolqr_enum_to_rmethod(enum reorder_method meth) {
243     switch (meth) {
244     case spk1: return "SPK1";
245     case tspk1: return "TSPK1";
246     case natural: return "Natural";
247     default: return "Unknown reordering method.";
248     }
249     }
250    
251     char *linsolqr_enum_to_fmethod(enum factor_method meth) {
252     switch (meth) {
253     case ranki_kw: return "SPK1/RANKI";
254     case ranki_jz: return "SPK1/RANKI+ROW";
255     case ranki_ba2: return "Fastest-SPK1/MR-RANKI";
256     case ranki_kw2: return "Fast-SPK1/RANKI";
257     case ranki_jz2: return "Fast-SPK1/RANKI+ROW";
258     case ranki_ka: return "KIRK-STUFF";
259     case cond_qr: return "CondQR";
260     case plain_qr: return "CPQR";
261     default: return "Unknown factorization method.";
262     }
263     }
264    
265     /** **/
266     char *linsolqr_rmethod_description(enum reorder_method meth) {
267     static char sspk1[]="SPK1 reordering ala Stadtherr.";
268     static char stspk1[]="SPK1 reordering column wise ala Stadtherr.";
269     static char nat[]="Ordering as received from user";
270     switch (meth) {
271     case spk1: return sspk1;
272     case tspk1: return stspk1;
273     case natural: return nat;
274     default: return "Unknown reordering method.";
275     }
276     }
277    
278     char *linsolqr_fmethod_description(enum factor_method meth) {
279     static char ex_ranki[] =
280     "SPK1 reordering with RANKI LU factoring ala Stadtherr.";
281     static char ex_rankib[] =
282     "SPK1 reordering with RANKI LU factoring ala Stadtherr, but fast.";
283     static char ex_rankij[]="SPK1/RANKI LU with pseudo-complete pivoting";
284     static char ex_rankik[]="KIRK-STUFF/RANKI LU with pseudo-complete pivoting";
285     static char ex_condqr[]="Sparse QR with condition controlled pivoting.";
286     static char ex_plainqr[]="Sparse QR with column pivoting.";
287     switch (meth) {
288     case ranki_kw: return ex_ranki;
289     case ranki_kw2: return ex_ranki;
290     case ranki_ba2: return ex_rankib;
291     case ranki_jz: return ex_rankij;
292     case ranki_jz2: return ex_rankij;
293     case ranki_ka: return ex_rankik;
294     case cond_qr: return ex_condqr;
295     case plain_qr: return ex_plainqr;
296     default: return "Unknown factorization method.";
297     }
298     }
299    
300     /***************************************************************************\
301     internal check routines
302     \***************************************************************************/
303     #define OK ((int)439828676)
304     #define DESTROYED ((int)839276847)
305     #define CHECK_SYSTEM(a) check_system((a),__FILE__,__LINE__)
306     static int check_system(linsolqr_system_t sys, char *file, int line)
307     /**
308     *** Checks the system handle. Return 0 if ok, 1 otherwise.
309     **/
310     {
311     if( ISNULL(sys) ) {
312     FPRINTF(stderr,"ERROR: (linsolqr) check_system\n");
313     FPRINTF(stderr," NULL system handle.\n");
314     return 1;
315     }
316    
317     switch( sys->integrity ) {
318     case OK:
319     return 0;
320     case DESTROYED:
321     FPRINTF(stderr,"ERROR: (linsolqr) check_system, line %d\n",line);
322     FPRINTF(stderr," System was recently destroyed.\n");
323     break;
324     default:
325     FPRINTF(stderr,"ERROR: (linsolqr) check_system, line %d\n",line);
326     FPRINTF(stderr," System was reused or never created.\n");
327     break;
328     }
329     return 1;
330     }
331    
332     /***************************************************************************\
333     external calls (and some of their internals)
334     \***************************************************************************/
335     static void destroy_rhs_list(struct rhs_list *rl)
336     /**
337     *** Destroys rhs list.
338     **/
339     {
340     while( NOTNULL(rl) ) {
341     struct rhs_list *p;
342     p = rl;
343     rl = rl->next;
344     if( NOTNULL(p->varvalue) )
345     ascfree( (POINTER)(p->varvalue) );
346     ascfree( (POINTER)p );
347     }
348     }
349    
350     static struct rhs_list *find_rhs(struct rhs_list *rl,real64 *rhs)
351     /**
352     *** Searches for rhs in rhs list, returning it or NULL if not found.
353     **/
354     {
355     for( ; NOTNULL(rl) ; rl = rl->next )
356     if( rl->rhs == rhs )
357     break;
358     return(rl);
359     }
360    
361     static struct lu_auxdata *create_ludata()
362     /**
363     *** Creates an empty zeroed ludata struct.
364     *** Filled up by insure_lu_capacity.
365     **/
366     {
367     struct lu_auxdata *d;
368     d=(struct lu_auxdata *)asccalloc(1,sizeof(struct lu_auxdata));
369     return d;
370     }
371    
372     static struct qr_auxdata *create_qrdata()
373     /**
374     *** Creates an empty zeroed qrdata struct.
375     *** Filled up by insure_qr_capacity.
376     **/
377     {
378     struct qr_auxdata *d;
379     d=(struct qr_auxdata *)asccalloc(1,sizeof(struct qr_auxdata));
380     return d;
381     }
382    
383     static void destroy_ludata(struct lu_auxdata *d) {
384     /**
385     *** Conditionally destroys a ludata struct and its parts.
386     **/
387     if (NOTNULL(d)) {
388     if (NOTNULL(d->pivlist)) ascfree(d->pivlist);
389     d->pivlist=NULL;
390     if (NOTNULL(d->tmp)) ascfree(d->tmp);
391     d->tmp=NULL;
392     ascfree(d);
393     }
394     }
395    
396     static void destroy_qrdata(struct qr_auxdata *d) {
397     /**
398     *** Conditionally destroys a qrdata struct and its parts.
399     **/
400     if (NOTNULL(d)) {
401     if (NOTNULL(d->alpha)) ascfree(d->alpha);
402     if (NOTNULL(d->sigma)) ascfree(d->sigma);
403     if (NOTNULL(d->tau)) ascfree(d->tau);
404     if (NOTNULL(d->hhcol)) ascfree(d->hhcol);
405     if (NOTNULL(d->hhrow)) ascfree(d->hhrow);
406     if (NOTNULL(d->fill)) ascfree(d->fill);
407    
408     ascfree(d);
409     }
410     }
411    
412     linsolqr_system_t linsolqr_create()
413     {
414     linsolqr_system_t sys;
415    
416     sys = (linsolqr_system_t)ascmalloc( sizeof(struct linsolqr_header) );
417     sys->fclass = unknown_c;
418     sys->fmethod = unknown_f;
419     sys->rmethod = unknown_r;
420     sys->integrity = OK;
421     sys->capacity = 0;
422     sys->coef = NULL;
423     sys->rl = NULL;
424     sys->rlength = 0;
425     sys->pivot_zero = 1e-12; /* default value */
426     sys->ptol = 0.1; /* default value */
427     sys->ctol = 0.1; /* default value */
428     sys->dtol = LINQR_DROP_TOLERANCE; /* default value */
429     sys->factors = NULL;
430     sys->inverse = NULL;
431     sys->factored = FALSE;
432     sys->rowdeps = FALSE;
433     sys->coldeps = FALSE;
434     sys->rng.low = sys->rng.high = -1;
435     sys->reg.row.low = sys->reg.row.high = -1;
436     sys->reg.col.low = sys->reg.col.high = -1;
437     sys->rank = -1;
438     sys->smallest_pivot = MAXDOUBLE;
439     sys->qrdata = NULL;
440     sys->ludata = NULL;
441     return(sys);
442     }
443    
444     void linsolqr_destroy(linsolqr_system_t sys)
445     {
446     if(CHECK_SYSTEM(sys)) {
447     FPRINTF(stderr,"Bad linsolqr_system_t found. Not destroyed.\n");
448     return;
449     }
450     if( NOTNULL(sys->coef) ) {
451     FPRINTF(stderr,"Notice: (linsolqr) non-NULL coefficient matrix\n");
452     FPRINTF(stderr," in linsolqr system not being\n");
453     FPRINTF(stderr," destroyed with linsolqr_system_t.\n");
454     }
455     if( NOTNULL(sys->inverse) )
456     mtx_destroy(sys->inverse);
457     if( NOTNULL(sys->factors) )
458     mtx_destroy(sys->factors);
459     destroy_rhs_list(sys->rl);
460     destroy_qrdata(sys->qrdata);
461     destroy_ludata(sys->ludata);
462     sys->integrity = DESTROYED;
463     ascfree( (POINTER)sys );
464     }
465    
466     void linsolqr_set_matrix(linsolqr_system_t sys,mtx_matrix_t mtx)
467     {
468     if(CHECK_SYSTEM(sys)) {
469     FPRINTF(stderr,"Bad linsolqr_system_t found. coef mtx not set.\n");
470     return;
471     }
472     sys->coef = mtx;
473     }
474    
475    
476     void linsolqr_set_region(linsolqr_system_t sys,mtx_region_t region)
477     {
478     if(CHECK_SYSTEM(sys)) {
479     FPRINTF(stderr,"Bad linsolqr_system_t found. coef mtx not set.\n");
480     return;
481     }
482     sys->reg = region;
483     }
484    
485    
486     mtx_matrix_t linsolqr_get_matrix(linsolqr_system_t sys)
487     {
488     CHECK_SYSTEM(sys);
489     return( sys->coef );
490     }
491    
492     mtx_region_t *linsolqr_get_region(linsolqr_system_t sys)
493     {
494     return &(sys->reg);
495     }
496    
497     void linsolqr_add_rhs(linsolqr_system_t sys,
498     real64 *rhs,
499     boolean transpose)
500     {
501     struct rhs_list *rl;
502    
503     if(CHECK_SYSTEM(sys)) {
504     FPRINTF(stderr,"Bad linsolqr_system_t found. rhs not added.\n");
505     return;
506     }
507     rl = find_rhs(sys->rl,rhs);
508     if( NOTNULL(rl) ) {
509     return;
510     } else {
511     if( NOTNULL(rhs) ) {
512     rl = (struct rhs_list *)ascmalloc( sizeof(struct rhs_list) );
513     rl->rhs = rhs;
514     rl->varvalue = NULL;
515     rl->solved = FALSE;
516     rl->transpose = transpose;
517     rl->next = sys->rl;
518     sys->rl = rl;
519     ++(sys->rlength);
520     if (sys->capacity>0) {
521     rl->varvalue =
522     (real64 *)ascmalloc(sys->capacity*sizeof(real64));
523     }
524     }
525     }
526     }
527    
528     void linsolqr_remove_rhs(linsolqr_system_t sys,real64 *rhs)
529     {
530     struct rhs_list **q;
531    
532     if(CHECK_SYSTEM(sys)) {
533     FPRINTF(stderr,"Bad linsolqr_system_t found. rhs not removed.\n");
534     return;
535     }
536     for( q = &(sys->rl) ; NOTNULL(*q) ; q = &((*q)->next) )
537     if( (*q)->rhs == rhs )
538     break;
539     if( NOTNULL(*q) ) {
540     struct rhs_list *p;
541     p = *q;
542     *q = p->next;
543     if( NOTNULL(p->varvalue) )
544     ascfree( (POINTER)(p->varvalue) );
545     ascfree( (POINTER)p );
546     --(sys->rlength);
547     } else if( NOTNULL(rhs) ) {
548     FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_remove_rhs\n");
549     FPRINTF(stderr," Rhs does not exist.\n");
550     }
551     }
552    
553     int32 linsolqr_number_of_rhs(linsolqr_system_t sys)
554     {
555     CHECK_SYSTEM(sys);
556     return( sys->rlength );
557     }
558    
559     real64 *linsolqr_get_rhs(linsolqr_system_t sys,int n)
560     {
561     struct rhs_list *rl;
562     int count;
563    
564     CHECK_SYSTEM(sys);
565    
566     count = sys->rlength - 1 - n;
567     if( count < 0 ) return(NULL);
568     for( rl = sys->rl ; count > 0 && NOTNULL(rl) ; rl = rl->next )
569     --count;
570     return( ISNULL(rl) ? NULL : rl->rhs );
571     }
572    
573     void linsolqr_matrix_was_changed(linsolqr_system_t sys)
574     {
575     if(CHECK_SYSTEM(sys)) {
576     FPRINTF(stderr,
577     "Bad linsolqr_system_t found. matrix change message ignored.\n");
578     return;
579     }
580     sys->rowdeps = sys->coldeps = sys->factored = FALSE;
581     }
582    
583     void linsolqr_rhs_was_changed(linsolqr_system_t sys, real64 *rhs)
584     {
585     struct rhs_list *rl;
586    
587     if(CHECK_SYSTEM(sys)) {
588     FPRINTF(stderr,"Bad linsolqr_system_t found. rhs change ignored.\n");
589     return;
590     }
591     rl = find_rhs(sys->rl,rhs);
592     if( NOTNULL(rl) ) {
593     rl->solved = FALSE;
594     } else if( NOTNULL(rhs) ) {
595     FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_rhs_was_modified\n");
596     FPRINTF(stderr," Rhs does not exist.\n");
597     }
598     }
599    
600     void linsolqr_set_pivot_zero(linsolqr_system_t sys,real64 pivot_zero)
601     {
602     if(CHECK_SYSTEM(sys)) {
603     FPRINTF(stderr,"Bad linsolqr_system_t found. set_pivot_zero ignored.\n");
604     return;
605     }
606     if( pivot_zero < D_ZERO ) {
607     FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_pivot_zero\n");
608     FPRINTF(stderr," Invalid pivot zero of %g\n",pivot_zero);
609     } else {
610     sys->pivot_zero = pivot_zero;
611     linsolqr_matrix_was_changed(sys);
612     }
613     }
614    
615     real64 linsolqr_pivot_zero(linsolqr_system_t sys)
616     {
617     CHECK_SYSTEM(sys);
618     return( sys->pivot_zero );
619     }
620    
621     void linsolqr_set_pivot_tolerance(linsolqr_system_t sys, real64 ptol)
622     {
623     if(CHECK_SYSTEM(sys)) {
624     FPRINTF(stderr,"Bad linsolqr_system_t found. set_pivot_tol ignored.\n");
625     return;
626     }
627     if( ptol < D_ZERO || ptol > D_ONE ) {
628     FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_pivot_tolerance\n");
629     FPRINTF(stderr," Invalid pivot tolerance of %g\n",ptol);
630     } else {
631     sys->ptol = ptol;
632     linsolqr_matrix_was_changed(sys);
633     }
634     }
635    
636     void linsolqr_set_condition_tolerance(linsolqr_system_t sys, real64 ctol)
637     {
638     if(CHECK_SYSTEM(sys)) {
639     FPRINTF(stderr,
640     "Bad linsolqr_system_t found. set_condition_tolerance ignored.\n");
641     return;
642     }
643     if( ctol < D_ZERO || ctol > D_ONE ) {
644     FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_condition_tolerance\n");
645     FPRINTF(stderr," Invalid condition tolerance of %g\n",ctol);
646     } else {
647     sys->ctol = ctol;
648     linsolqr_matrix_was_changed(sys);
649     }
650     }
651    
652     void linsolqr_set_drop_tolerance(linsolqr_system_t sys, real64 dtol)
653     {
654     if(CHECK_SYSTEM(sys)) {
655     FPRINTF(stderr,
656     "Bad linsolqr_system_t found. set_drop_tolerance ignored.\n");
657     return;
658     }
659     if( dtol < D_ZERO || dtol > D_ONE ) {
660     FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_drop_tolerance\n");
661     FPRINTF(stderr," Invalid drop tolerance of %g\n",dtol);
662     } else {
663     sys->dtol = dtol;
664     linsolqr_matrix_was_changed(sys);
665     }
666     }
667    
668     real64 linsolqr_pivot_tolerance(linsolqr_system_t sys)
669     {
670     CHECK_SYSTEM(sys);
671     return( sys->ptol );
672     }
673    
674     real64 linsolqr_condition_tolerance(linsolqr_system_t sys)
675     {
676     CHECK_SYSTEM(sys);
677     return( sys->ctol );
678     }
679    
680     real64 linsolqr_drop_tolerance(linsolqr_system_t sys)
681     {
682     CHECK_SYSTEM(sys);
683     return( sys->dtol );
684     }
685    
686     extern enum factor_class linsolqr_fclass(linsolqr_system_t sys)
687     {
688     CHECK_SYSTEM(sys);
689     return( sys->fclass );
690     }
691     extern enum factor_method linsolqr_fmethod(linsolqr_system_t sys)
692     {
693     CHECK_SYSTEM(sys);
694     return( sys->fmethod );
695     }
696     extern enum reorder_method linsolqr_rmethod(linsolqr_system_t sys)
697     {
698     CHECK_SYSTEM(sys);
699     return( sys->rmethod );
700     }
701    
702     int32 linsolqr_rank(linsolqr_system_t sys)
703     {
704     CHECK_SYSTEM(sys);
705     if( !sys->factored ) {
706     FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_rank\n");
707     FPRINTF(stderr," System not factored yet.\n");
708     }
709     return(sys->rank);
710     }
711    
712     real64 linsolqr_smallest_pivot(linsolqr_system_t sys)
713     {
714     CHECK_SYSTEM(sys);
715     #if LINSOL_DEBUG
716     if( !sys->factored ) {
717     FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_smallest_pivot\n");
718     FPRINTF(stderr," System not factored yet.\n");
719     }
720     #endif
721     return(sys->smallest_pivot);
722     }
723    
724     /***************************************************************************\
725     Commonly used internal functions for sparse linear solvers based on mtx.
726     void insure_capacity(sys)
727     void insure_lu_capacity(sys)
728     void insure_qr_capacity(sys)
729     void forward_substitute(sys,rvec,transpose)
730     void backward_substitute(sys,rvec,transpose)
731     macro SQR(x)
732     int find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi)
733     \***************************************************************************/
734    
735     static real64 *raise_capacity(real64 *vec,
736     int32 oldcap,
737     int32 newcap)
738     /**
739     *** Raises the capacity of the array and returns a new array.
740     *** It is assumed that oldcap < newcap. vec is destroyed or
741     *** returned as appropriate.
742     *** If !NDEBUG, the vector expanded is also set to 0.
743     **/
744     {
745     real64 *newvec=NULL;
746     int i;
747     if (newcap < oldcap) {
748     #ifndef NDEBUG
749     for (i = 0; i < newcap; i++) {
750     newvec[i] = 0.0;
751     }
752     #endif
753     return vec;
754     }
755     if (NOTNULL(vec)) {
756     /* don't call realloc on null with newcap 0 or it frees */
757     newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64)));
758     } else {
759     newvec=(newcap > 0 ?
760     (real64 *)ascmalloc( newcap * sizeof(real64) ) : NULL);
761     }
762     #ifndef NDEBUG
763     for (i = 0; i < newcap; i++) {
764     newvec[i] = 0.0;
765     }
766     #endif
767     return newvec;
768     }
769    
770     static int32 *raise_qr_int_capacity(int32 *vec,
771     int32 oldcap,
772     int32 newcap)
773     /**
774     *** Raises the capacity of the index array and returns a new array.
775     *** It is assumed that oldcap < newcap. vec is destroyed or
776     *** returned as appropriate. vec returned is zeroed.
777     *** calling with newcap=0 does not force deallocation.
778     **/
779     {
780     int32 *newvec=NULL;
781     if (newcap < oldcap)
782     return vec;
783     if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
784     newvec=(int32 *)ascrealloc(vec,(newcap * sizeof(int32)));
785     mtx_zero_int32(vec,newcap);
786     } else {
787     newvec= (newcap > 0 ?
788     (int32 *)asccalloc( newcap , sizeof(int32) ) : NULL);
789     }
790     return newvec;
791     }
792    
793     static real64 *raise_qr_real_capacity(real64 *vec,
794     int32 oldcap,
795     int32 newcap)
796     /**
797     *** Raises the capacity of the real array and returns a new array.
798     *** It is assumed that oldcap < newcap. vec is destroyed or
799     *** returned as appropriate. vec returned is zeroed.
800     *** calling with newcap=0 does not force deallocation.
801     **/
802     {
803     real64 *newvec=NULL;
804     if (newcap < oldcap)
805     return vec;
806     if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
807     newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64)));
808     mtx_zero_real64(vec,newcap);
809     } else {
810     newvec= (newcap > 0 ?
811     (real64 *)asccalloc( newcap , sizeof(real64) ) : NULL);
812     }
813     return newvec;
814     }
815    
816     static struct qr_fill_t *raise_qr_fill_capacity(struct qr_fill_t *vec,
817     int32 oldcap,
818     int32 newcap)
819     /**
820     *** Raises the capacity of the fill array and returns a new array.
821     *** It is assumed that oldcap < newcap. vec is destroyed or
822     *** returned as appropriate. vec returned is zeroed.
823     *** calling with newcap=0 does not force deallocation.
824     **/
825     {
826     struct qr_fill_t *newvec=NULL;
827     if (newcap < oldcap)
828     return vec;
829     if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
830     newvec=(struct qr_fill_t *)
831     ascrealloc(vec,(newcap * sizeof(struct qr_fill_t)));
832     mtx_zero_char((char *)vec,newcap*sizeof(struct qr_fill_t));
833     } else {
834     newvec= (newcap > 0 ?
835     (struct qr_fill_t *)asccalloc( newcap , sizeof(struct qr_fill_t) ) :
836     NULL);
837     }
838     return newvec;
839     }
840    
841     static void insure_capacity(linsolqr_system_t sys)
842     /**
843     *** Insures that the capacity of all of the solution vectors
844     *** for each rhs is large enough.
845     *** The above implies a malloc if varvalue is null.
846     *** Assumes varvalue are at sys->capacity already.
847     **/
848     {
849     int32 req_cap;
850     req_cap = mtx_capacity(sys->coef);
851    
852     if( req_cap > sys->capacity ) {
853     struct rhs_list *rl;
854    
855     for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
856     rl->varvalue = raise_capacity(rl->varvalue,sys->capacity,req_cap);
857     sys->capacity = req_cap;
858     }
859     }
860    
861     static void insure_lu_capacity(linsolqr_system_t sys)
862     /**
863     *** Insures that the capacity of all of the ludata vectors.
864     *** The above implies a malloc if vector is null.
865     *** If not null, implies an extension if needed.
866     **/
867     {
868     int32 req_cap;
869     if (!sys || !(sys->coef) || !(sys->ludata)) {
870     FPRINTF(stderr,"linsolqr: insure_lu_capacity called with NULL pointer");
871     return;
872     }
873     req_cap = mtx_capacity(sys->coef);
874    
875     if( req_cap > sys->ludata->cap ) {
876     sys->ludata->pivlist =
877     raise_capacity(sys->ludata->pivlist,sys->ludata->cap,req_cap);
878     sys->ludata->tmp =
879     raise_capacity(sys->ludata->tmp,sys->ludata->cap,req_cap);
880     sys->ludata->cap = req_cap;
881     }
882     }
883    
884     static void insure_qr_capacity(linsolqr_system_t sys)
885     /**
886     *** Insures that the capacity of all of the qrdata vectors
887     *** is large enough.
888     *** The above implies a calloc if vector is null.
889     *** If not null, implies an extension and zeroing of the vector.
890     *** Also zeroes the simple elements of the qrdata.
891     **/
892     {
893     int32 req_cap;
894     if (!sys || !(sys->coef) || !(sys->qrdata)) {
895     FPRINTF(stderr,"linsolqr: insure_qr_capacity called with NULL pointer");
896     return;
897     }
898     req_cap = mtx_capacity(sys->coef);
899    
900     if( req_cap > sys->qrdata->cap ) {
901     sys->qrdata->alpha =
902     raise_qr_real_capacity(sys->qrdata->alpha,sys->qrdata->cap,req_cap);
903     sys->qrdata->sigma =
904     raise_qr_real_capacity(sys->qrdata->sigma,sys->qrdata->cap,req_cap);
905     sys->qrdata->tau =
906     raise_qr_real_capacity(sys->qrdata->tau,sys->qrdata->cap,req_cap);
907     sys->qrdata->hhcol =
908     raise_qr_real_capacity(sys->qrdata->hhcol,sys->qrdata->cap,req_cap);
909     sys->qrdata->hhrow =
910     raise_qr_real_capacity(sys->qrdata->hhrow,sys->qrdata->cap,req_cap);
911    
912     sys->qrdata->sp.data =
913     raise_qr_real_capacity(sys->qrdata->sp.data,sys->qrdata->cap,req_cap);
914     sys->qrdata->sp.idata =
915     raise_qr_int_capacity(sys->qrdata->sp.idata,sys->qrdata->cap,req_cap);
916    
917     sys->qrdata->fill =
918     raise_qr_fill_capacity(sys->qrdata->fill,sys->qrdata->cap,req_cap);
919    
920     sys->qrdata->cap = req_cap;
921     sys->qrdata->sp.cap = req_cap;
922     sys->qrdata->sp.len = 0;
923     }
924     sys->qrdata->nu=D_ZERO;
925     sys->qrdata->anu=D_ZERO;
926     sys->qrdata->asubnu=D_ZERO;
927     }
928    
929     static void forward_substitute(linsolqr_system_t sys,
930     real64 *arr,
931     boolean transpose)
932     /**
933     *** Forward substitute. It is assumed that the L (or U) part of
934     *** sys->factors is computed. This function converts c to x in place. The
935     *** values are stored in arr indexed by original row number (or original
936     *** column number).
937     ***
938     *** transpose = FALSE: transpose = TRUE:
939     *** T
940     *** L x = c U x = c
941     ***
942     *** The following formula holds:
943     *** 0<=k<r ==> x(k) = [c(k) - L(k,(0..k-1)) dot x(0..k-1)] / L(k,k)
944     *** or
945     *** 0<=k<r ==> x(k) = [c(k) - U((0..k-1),k) dot x(0..k-1)] / U(k,k)
946     ***
947     *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k]
948     *** and in matrix.
949     **/
950     {
951     mtx_range_t dot_rng;
952     mtx_coord_t nz;
953     real64 sum, *pivlist;
954     mtx_matrix_t mtx;
955     int32 dotlim;
956     boolean nonzero_found=FALSE;
957    
958     mtx=sys->factors;
959     pivlist=sys->ludata->pivlist;
960     dot_rng.low = sys->rng.low;
961     dotlim=dot_rng.low+sys->rank;
962     if (transpose) { /* arr is indexed by original column number */
963     for( nz.col=dot_rng.low; nz.col < dotlim; ++(nz.col) ) {
964     register int32 org_col;
965    
966     dot_rng.high = nz.col - 1;
967     org_col = mtx_col_to_org(mtx,nz.col);
968     if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
969     if (nonzero_found) {
970     sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE);
971     /* arr[org_col] = (arr[org_col] - sum) / D_ONE */;
972     arr[org_col] -=sum;
973     }
974     }
975    
976     } else { /* arr is indexed by original row number */
977     for( nz.row=dot_rng.low; nz.row < dotlim; ++(nz.row) ) {
978     register int32 org_row;
979    
980     dot_rng.high = nz.row - 1;
981     org_row = mtx_row_to_org(mtx,nz.row);
982     if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
983     if (nonzero_found) {
984     sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE);
985     /*
986     nz.col = nz.row;
987     arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz);
988     */
989     arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row];
990     }
991     }
992     }
993     }
994    
995     static void backward_substitute(linsolqr_system_t sys,
996     real64 *arr,
997     boolean transpose)
998     /**
999     *** Backward substitute. It is assumed that the U (or L) part of
1000     *** sys->factors is computed. This function converts rhs to c in place. The
1001     *** values are stored in arr indexed by original row number (or original
1002     *** column number).
1003     ***
1004     *** transpose = FALSE: transpose = TRUE:
1005     *** T
1006     *** U c = rhs L c = rhs
1007     ***
1008     *** The following formula holds:
1009     *** transpose=FALSE: (the usual for J.dx=-f where rhs is -f)
1010     *** 0<=k<r ==> c(k) = [rhs(k) - U(k,(k+1..r-1)) dot c(k+1..r-1)] / U(k,k)
1011     *** working up from the bottom.
1012     *** or
1013     *** transpose=TRUE:
1014     *** 0<=k<r ==> c(k) = [rhs(k) - L((k+1..r-1),k) dot c(k+1..r-1)] / L(k,k)
1015     *** working right to left.
1016     ***
1017     *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k]
1018     *** and in matrix.
1019     **/
1020     {
1021     mtx_range_t dot_rng;
1022     mtx_coord_t nz;
1023     real64 sum, *pivlist;
1024     mtx_matrix_t mtx;
1025     int32 dotlim;
1026     boolean nonzero_found=FALSE; /* once TRUE, substitution must be done
1027     over remaining rows/cols */
1028    
1029     dot_rng.high = sys->rng.low + sys->rank - 1;
1030     dotlim=sys->rng.low;
1031     mtx=sys->factors;
1032     pivlist=sys->ludata->pivlist;
1033     if (transpose) { /* arr is indexed by original column number */
1034     for( nz.col = dot_rng.high ; nz.col >= dotlim ; --(nz.col) ) {
1035     register int32 org_col;
1036    
1037     dot_rng.low = nz.col + 1;
1038    
1039     org_col = mtx_col_to_org(mtx,nz.col);
1040     if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
1041     if (nonzero_found) {
1042     sum= mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE);
1043     /*
1044     reminders:
1045     nz.row = nz.col;
1046     arr[org_col] = (arr[org_col] - sum) / mtx_value(mtx,&nz);
1047     */
1048     arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col];
1049     }
1050     }
1051     } else { /* arr is indexed by original row number */
1052     /* we are working from the bottom up */
1053     for( nz.row = dot_rng.high ; nz.row >= dotlim ; --(nz.row) ) {
1054     register int32 org_row;
1055    
1056     dot_rng.low = nz.row + 1;
1057     org_row = mtx_row_to_org(mtx,nz.row);
1058    
1059     if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
1060     if (nonzero_found) {
1061     sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE);
1062     /*
1063     reminders:
1064     nz.row = nz.col;
1065     arr[org_row] = (arr[org_row] - sum) / D_ONE;
1066     */
1067     arr[org_row] -= sum;
1068     }
1069     }
1070     }
1071     }
1072    
1073     #ifdef SQR
1074     #undef SQR
1075     #endif
1076     #define SQR(x) ((x)*(x))
1077     /*
1078     * int32 find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi);
1079     * real64 *vec,*rvec,tol;
1080     * int32 len, *ivec, *maxi;
1081     *
1082     * Search array vec of positive numbers for the first entry, k, which passes
1083     * (vec[k] >= tol*vecmax) where vecmax is the largest value in vec.
1084     * vec is an array len long. rvec, ivec are (worst case) len long.
1085     * *maxi will be set to the index of the first occurence of the largest
1086     * number in vec which is >= eps.
1087     * 0.0 <= tol <= 1.
1088     * If tol <=0, no search is done (*maxi = 0, k = 0).
1089     * Eps is an absolute number which vec values must be >= to before
1090     * being eligible for the tol test.
1091     *
1092     * Best case, highly nonlinear feature:
1093     * if on entry *maxi = len, we will do a simplified search for the
1094     * first k which satisfies tol and eps tests, based on the value
1095     * stored at vec[len-1].
1096     *
1097     * We could allocate and deallocate rvec/ivec since they are
1098     * temporaries, but it is more efficient for caller to manage that.
1099     * If tol>=1.0, rvec and ivec are ignored and may be NULL.
1100     * NOTE: we are going on value vec[i], not fabs(vec[i]).
1101     * Returns k.
1102     * Remember: GIGO.
1103     * Failure modes- if all numbers are <= 0.0, k and *maxi will be returned 0.
1104     * Failure modes- if all numbers are < eps, k and *maxi will be returned 0.
1105     *
1106     * find_pivot_index, should we implement, would search an int vector.
1107     *
1108     * Hint: if you know you have not increased any values in vec and have not
1109     * changed the value at vec[maxi] before a subsequent search, then call
1110     * with len = previous maxi+1 to reduce the search time.
1111     */
1112     static int32 find_pivot_number(const real64 *vec,
1113     const int32 len,
1114     const real64 tol,
1115     const real64 eps,
1116     int32 * const ivec,
1117     real64 * const rvec,
1118     int32 * const maxi)
1119     {
1120     int32 j,k;
1121     if (tol <= D_ZERO ) {
1122     *maxi = 0;
1123     return 0;
1124     }
1125     if (tol >= D_ONE) {
1126     register real64 biggest = MAX(eps,D_ZERO);
1127     /* get the biggest, period */
1128     k = 0;
1129     for (j=0; j < len; j++) {
1130     if (vec[j] > biggest) {
1131     biggest = vec[j];
1132     k = j;
1133     }
1134     }
1135     *maxi = k;
1136     } else {
1137     int32 bigi;
1138     bigi = 0;
1139     rvec[0] = eps;
1140     ivec[0] = 0;
1141     if (*maxi==len) {
1142     /* cheap search against eps and tol, no list. */
1143     register real64 thold;
1144     thold = tol * vec[len-1];
1145     if (thold >= eps) {
1146     for (k = 0; k < len && vec[k] < thold; k++);
1147     } else {
1148     for (k = 0; k < len && vec[k] < eps; k++);
1149     }
1150     /* adjust maxi to still point at last location, as indicated by len */
1151     (*maxi)--;
1152     } else {
1153     real64 rlast;
1154     rlast = eps;
1155     /* create short list */
1156     for (j=0; j < len; j++) {
1157     while (j < len && vec[j] <= rlast) j++; /* skip to next max */
1158     if (j < len) {
1159     ivec[bigi] = j;
1160     rvec[bigi] = vec[j];
1161     bigi++; /* bigi ends up being len of rvec or, if vec all<= eps, 0 */
1162     rlast = rvec[bigi-1];
1163     }
1164     }
1165     if (bigi == 0) {
1166     *maxi = k = 0;
1167     } else {
1168     register real64 thold;
1169     /* Note: if bigi is enormous, we should do a binary search,
1170     not linear. */
1171     *maxi = ivec[bigi-1];
1172     thold = tol * rvec[bigi-1];
1173     /* get pivot from shortlist, searching backward */
1174     if (thold >= eps) {
1175     for (k = bigi-1; k >= 0 && rvec[k] >= thold; k--);
1176     } else {
1177     for (k = bigi-1; k >= 0 && rvec[k] >= eps; k--);
1178     }
1179     /* translate pivot to vec index */
1180     k = ivec[k+1];
1181     }
1182     }
1183     }
1184     return k;
1185     }
1186    
1187     static void calc_dependent_rows_ranki1(linsolqr_system_t sys)
1188     {
1189     mtx_coord_t nz;
1190     real64 value;
1191     mtx_range_t colrange;
1192     mtx_range_t rowrange;
1193     real64 *lc;
1194     mtx_matrix_t mtx;
1195    
1196     sys->rowdeps = TRUE;
1197     if( ( (sys->reg.row.low == sys->rng.low) &&
1198     ( sys->reg.row.high == sys->rng.low+sys->rank-1 )
1199     ) || sys->rank==0 )
1200     return;
1201    
1202     lc = sys->ludata->tmp;
1203     colrange.low = sys->rng.low;
1204     colrange.high = colrange.low + sys->rank - 1;
1205     rowrange.low = sys->rng.high;
1206     rowrange.high = sys->rng.low+sys->rank;
1207     mtx=sys->factors;
1208    
1209     nz.row = sys->reg.row.low;
1210     for( ; nz.row <= sys->reg.row.high; nz.row++ ) {
1211     if( nz.row == sys->rng.low ) {
1212     nz.row = rowrange.high-1;
1213     continue;
1214     }
1215     mtx_zero_real64(lc,(sys->capacity));
1216     mtx_org_row_vec(mtx,nz.row,lc,&colrange);
1217     if( nz.row < rowrange.high || nz.row > rowrange.low )
1218     backward_substitute(sys,lc,TRUE);
1219     forward_substitute(sys,lc,TRUE);
1220     mtx_clear_row(mtx,nz.row,&colrange);
1221     for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) {
1222     value = lc[mtx_col_to_org(mtx,nz.col)];
1223     if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
1224     }
1225     }
1226     }
1227    
1228    
1229     static void calc_dependent_cols_ranki1(linsolqr_system_t sys)
1230     {
1231     mtx_coord_t nz;
1232     real64 value;
1233     mtx_range_t rowrange;
1234     mtx_range_t colrange;
1235     real64 *lc;
1236     mtx_matrix_t mtx;
1237    
1238     sys->coldeps = TRUE;
1239     if( ( (sys->reg.col.low == sys->rng.low) &&
1240     ( sys->reg.col.high == sys->rng.low+sys->rank-1 )
1241     ) || sys->rank==0 )
1242     return;
1243    
1244     lc = sys->ludata->tmp;
1245     rowrange.low = sys->rng.low;
1246     rowrange.high = rowrange.low + sys->rank - 1;
1247     colrange.high = sys->rng.low+sys->rank;
1248     colrange.low = sys->rng.high;
1249     mtx=sys->factors;
1250    
1251     nz.col = sys->reg.col.low;
1252     for( ; nz.col <= sys->reg.col.high; nz.col++ ) {
1253     if( nz.col == sys->rng.low ) {
1254     nz.col = colrange.high-1;
1255     continue;
1256     }
1257     mtx_zero_real64(lc,sys->capacity);
1258     mtx_org_col_vec(mtx,nz.col,lc,&rowrange);
1259     if( nz.col < colrange.high || nz.col > colrange.low )
1260     backward_substitute(sys,lc,FALSE);
1261     forward_substitute(sys,lc,FALSE);
1262     mtx_clear_col(mtx,nz.col,&rowrange);
1263     for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) {
1264     value = lc[mtx_row_to_org(mtx,nz.row)];
1265     if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
1266     }
1267     }
1268     }
1269    
1270     /**
1271     *** Given a matrix and a diagonal range, sets the diagonal elements to 0
1272     *** but does not delete them in the range low to high inclusive.
1273     *** worst cost= k*(nonzeros in rows of range treated).
1274     *** likely cost= k*(nonzeros in column pivoted rows of range treated)
1275     *** since you can smartly put in the diagonal last and find
1276     *** it first on many occasions.
1277     *** This is a good bit cheaper than deleting the elements unless one
1278     *** expects thousands of solves.
1279     **/
1280     static void zero_diagonal_elements(mtx_matrix_t mtx,
1281     int32 low, int32 high)
1282     {
1283     mtx_coord_t nz;
1284     for (nz.row = low; nz.row <= high; nz.row++) {
1285     nz.col = nz.row;
1286     mtx_set_value(mtx,&nz,0.0);
1287     }
1288     }
1289    
1290     static void zero_unpivoted_vars(linsolqr_system_t sys,
1291     real64 *varvalues,
1292     boolean transpose)
1293     /**
1294     *** Sets the values of unpivoted variables to zero.
1295     **/
1296     {
1297     int32 ndx,order;
1298     mtx_matrix_t mtx;
1299    
1300     mtx = sys->factors;
1301     order = mtx_order(mtx);
1302     for( ndx = 0 ; ndx < sys->rng.low ; ++ndx )
1303     if (transpose)
1304     varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO;
1305     else
1306     varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO;
1307    
1308     for( ndx = sys->rng.low + sys->rank ; ndx < order ; ++ndx )
1309     if (transpose)
1310     varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO;
1311     else
1312     varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO;
1313     }
1314    
1315     #if LINSOL_DEBUG
1316     static void debug_out_factors(FILE *fp,linsolqr_system_t sys)
1317     /**
1318     *** Outputs permutation and values of the nonzero elements in the
1319     *** factor matrix square region given by sys->rng.
1320     **/
1321     {
1322     mtx_region_t reg;
1323     reg.row.low=reg.col.low=sys->rng.low;
1324     reg.row.high=reg.col.high=sys->rng.high;
1325     mtx_write_region(fp,sys->factors,&reg);
1326     }
1327     #endif /* LINSOL_DEBUG */
1328    
1329     /***************************************************************************\
1330     Reordering functions for SPK1, and possibly for other schemes to be
1331     implemented later.
1332     The stuff here is almost, but not quite, black magic. Don't tinker with it.
1333     \***************************************************************************/
1334    
1335     /*********************************
1336     begin of spk1 stuff
1337     *********************************/
1338     struct reorder_list { /* List of rows/columns and their counts. */
1339     int32 ndx;
1340     int32 count;
1341     struct reorder_list *next;
1342     };
1343    
1344     struct reorder_vars {
1345     mtx_matrix_t mtx;
1346     mtx_region_t reg; /* Current active region */
1347     int32 colhigh; /* Original value of reg.col.high */
1348     struct reorder_list *tlist; /* Array of (enough) list elements */
1349     int32 *rowcount; /* rowcount[reg.row.low .. reg.row.high] */
1350     };
1351    
1352     static void adjust_row_count(struct reorder_vars *vars,int32 removed_col)
1353     /**
1354     *** Adjusts the row counts to account for the (to be) removed column.
1355     **/
1356     {
1357     mtx_coord_t nz;
1358     real64 value;
1359     nz.col = removed_col;
1360     nz.row = mtx_FIRST;
1361     while( value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row)),
1362     nz.row != mtx_LAST )
1363     --(vars->rowcount[nz.row]);
1364     }
1365    
1366     static void assign_row_and_col(struct reorder_vars *vars,
1367     int32 row,
1368     int32 col)
1369     /**
1370     *** Assigns the given row to the given column, moving the row and column
1371     *** to the beginning of the active region and removing them (readjusting
1372     *** the region). The row counts are NOT adjusted. If col == mtx_NONE,
1373     *** then no column is assigned and the row is moved to the end of the
1374     *** active block instead. Otherwise, it is assumed that the column
1375     *** is active.
1376     **/
1377     {
1378     if( col == mtx_NONE ) {
1379     mtx_swap_rows(vars->mtx,row,vars->reg.row.high);
1380     vars->rowcount[row] = vars->rowcount[vars->reg.row.high];
1381     --(vars->reg.row.high);
1382     } else {
1383     mtx_swap_rows(vars->mtx,row,vars->reg.row.low);
1384     vars->rowcount[row] = vars->rowcount[vars->reg.row.low];
1385     mtx_swap_cols(vars->mtx,col,vars->reg.col.low);
1386     ++(vars->reg.row.low);
1387     ++(vars->reg.col.low);
1388     }
1389     }
1390    
1391     static void push_column_on_stack(struct reorder_vars *vars,int32 col)
1392     /**
1393     *** Pushes the given column onto the stack. It is assumed that the
1394     *** column is active. Row counts are adjusted.
1395     **/
1396     {
1397     adjust_row_count(vars,col);
1398     mtx_swap_cols(vars->mtx,col,vars->reg.col.high);
1399     --(vars->reg.col.high);
1400     }
1401    
1402     static int32 pop_column_from_stack(struct reorder_vars *vars)
1403     /**
1404     *** Pops the column on the "top" of the stack off of the stack and
1405     *** returns the column index, where it now lies in the active region.
1406     *** If the stack is empty, mtx_NONE is returned. Row counts are NOT
1407     *** adjusted (this together with a subsequent assignment of this column
1408     *** ==> no row count adjustment necessary).
1409     **/
1410     {
1411     if( vars->reg.col.high < vars->colhigh )
1412     return(++(vars->reg.col.high));
1413     else
1414     return( mtx_NONE );
1415     }
1416    
1417     static void assign_null_rows(struct reorder_vars *vars)
1418     /**
1419     *** Assigns empty rows, moving them to the assigned region. It is
1420     *** assumed that row counts are correct. Columns are assigned off the
1421     *** stack.
1422     **/
1423     {
1424     int32 row;
1425    
1426     for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1427     if( vars->rowcount[row] == 0 )
1428     assign_row_and_col(vars , row , pop_column_from_stack(vars));
1429     }
1430    
1431     static void forward_triangularize(struct reorder_vars *vars)
1432     /**
1433     *** Forward triangularizes the region, assigning singleton rows with their
1434     *** one and only incident column until there are no more. The row counts
1435     *** must be correct, and they are updated.
1436     **/
1437     {
1438     boolean change;
1439     mtx_coord_t nz;
1440     real64 value;
1441    
1442     do {
1443     change = FALSE;
1444     for( nz.row = vars->reg.row.low ;
1445     nz.row <= vars->reg.row.high && vars->rowcount[nz.row] != 1;
1446     ++nz.row ) ;
1447     if( nz.row <= vars->reg.row.high ) {
1448     /* found singleton row */
1449     nz.col = mtx_FIRST; /* this is somehow coming back with nz.col -1 */
1450     value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col));
1451     adjust_row_count(vars,nz.col);
1452     assign_row_and_col(vars,nz.row,nz.col);
1453     change = TRUE;
1454     }
1455     } while( change );
1456     }
1457    
1458     static int32 select_row(struct reorder_vars *vars)
1459     /**
1460     *** Selects a row and returns its index. It is assumed that there is a
1461     *** row. Row counts must be correct. vars->tlist will be used.
1462     **/
1463     {
1464     int32 min_row_count;
1465     int32 max_col_count;
1466     int32 row;
1467     int32 i, nties=-2; /* # elements currently defined in vars->tlist */
1468     int32 sum;
1469     mtx_coord_t nz;
1470     real64 value;
1471     mtx_matrix_t mtx;
1472     mtx_range_t *colrng, *rowrng;
1473    
1474     /* Set to something > any possible value */
1475     min_row_count = vars->reg.col.high-vars->reg.col.low+2;
1476     for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1477     if( vars->rowcount[row] <= min_row_count ) {
1478     if( vars->rowcount[row] < min_row_count ) {
1479     min_row_count = vars->rowcount[row];
1480     nties = 0;
1481     }
1482     vars->tlist[nties++].ndx = row;
1483     }
1484     /**
1485     *** vars->tlist[0..nties-1] is a list of row numbers which tie for
1486     *** minimum row count.
1487     **/
1488    
1489     max_col_count = -1; /* < any possible value */
1490     i = 0;
1491     mtx = vars->mtx;
1492     colrng=&(vars->reg.col);
1493     rowrng=&(vars->reg.row);
1494     while( i < nties ) {
1495    
1496     sum = 0;
1497     nz.row = vars->tlist[i].ndx;
1498     nz.col = mtx_FIRST;
1499     while( value = mtx_next_in_row(mtx,&nz,colrng),
1500     nz.col != mtx_LAST )
1501     sum += mtx_nonzeros_in_col(mtx,nz.col,rowrng);
1502     if( sum > max_col_count ) {
1503     max_col_count = sum;
1504     row = nz.row;
1505     }
1506     i++;
1507     }
1508     /**
1509     *** Now row contains the row with the minimum row count, which has the
1510     *** greatest total column count of incident columns among all rows with
1511     *** the same (minimum) row count. Select it.
1512     **/
1513     return(row);
1514     }
1515    
1516     static void spk1_reorder(struct reorder_vars *vars)
1517     /**
1518     *** Reorders the assigned matrix vars->mtx within the specified bounding
1519     *** block region vars->reg. The region is split into 6 subregions during
1520     *** reordering: the rows are divided in two, and the columns divided in
1521     *** three. Initially everything is in the active subregion. Ultimately,
1522     *** everything will be assigned.
1523     ***
1524     *** <-- assigned -->|<-- active-->|<-- on stack -->|
1525     *** ----+----------------+-------------+----------------+
1526     *** a | | | |
1527     *** s | | | |
1528     *** s | | | |
1529     *** i | (SQUARE) | | |
1530     *** g | | | |
1531     *** n | | | |
1532     *** e | | | |
1533     *** d | | | |
1534     *** ----+----------------+-------------+----------------+
1535     *** a | | | |
1536     *** c | | ACTIVE | |
1537     *** t | | REGION | |
1538     *** i | | | |
1539     *** v | | | |
1540     *** e | | | |
1541     *** ----+----------------+-------------+----------------+
1542     ***
1543     *** The algorithm is roughly as follows:
1544     *** (1) select a row (by some criterion).
1545     *** (2) push columns incident on that row onto the stack in decreasing
1546     *** order of their length.
1547     *** (3) pop first column off the stack and assign it to the selected
1548     *** row.
1549     *** (4) forward-triangularize (assign singleton rows with their one
1550     *** and only incident column, until no longer possible).
1551     ***
1552     *** (1)-(4) should be repeated until the active subregion becomes empty.
1553     ***
1554     *** Everything above was written as though the entire matrix is
1555     *** involved. In reality, only the relevant square region is involved.
1556     **/
1557     {
1558     int32 row, size;
1559     int32 *rowcount_array_origin;
1560     mtx_matrix_t mtx;
1561    
1562     size = MAX(vars->reg.row.high,vars->reg.col.high) + 1;
1563     vars->tlist = size > 0 ? (struct reorder_list *)
1564     ascmalloc( size*sizeof(struct reorder_list) ) : NULL;
1565     vars->rowcount = rowcount_array_origin = size > 0 ? (int32 *)
1566     ascmalloc( size*sizeof(int32) ) : NULL;
1567     mtx = vars->mtx;
1568    
1569     vars->colhigh = vars->reg.col.high;
1570     /* Establish row counts */
1571     for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1572     vars->rowcount[row] =
1573     mtx_nonzeros_in_row(mtx,row,&(vars->reg.col));
1574    
1575     while(TRUE) {
1576     struct reorder_list *head;
1577     int32 nelts; /* # elements "allocated" from vars->tlist */
1578     mtx_coord_t nz;
1579     real64 value;
1580    
1581     forward_triangularize(vars);
1582     assign_null_rows(vars);
1583     if( vars->reg.row.low>vars->reg.row.high ||
1584     vars->reg.col.low>vars->reg.col.high ) {
1585     /* Active region is now empty, done */
1586     if( NOTNULL(vars->tlist) )
1587     ascfree( vars->tlist );
1588     if( NOTNULL(rowcount_array_origin) )
1589     ascfree( rowcount_array_origin );
1590     return;
1591     }
1592    
1593     head = NULL;
1594     nelts = 0;
1595     nz.row = select_row(vars);
1596     nz.col = mtx_FIRST;
1597     while( value = mtx_next_in_row(mtx,&nz,&(vars->reg.col)),
1598     nz.col != mtx_LAST ) {
1599     struct reorder_list **q,*p;
1600    
1601     p = &(vars->tlist[nelts++]);
1602     p->ndx = mtx_col_to_org(mtx,nz.col);
1603     p->count = mtx_nonzeros_in_col(mtx,nz.col,&(vars->reg.row));
1604     for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) );
1605     p->next = *q;
1606     *q = p;
1607     }
1608     /**
1609     *** We now have a list of columns which intersect the selected row.
1610     *** The list is in order of decreasing column count.
1611     **/
1612    
1613     /* Push incident columns on stack */
1614     for( ; NOTNULL(head) ; head = head->next )
1615     push_column_on_stack(vars,mtx_org_to_col(mtx,head->ndx));
1616    
1617     /* Pop column off stack and assign to selected row */
1618     assign_row_and_col(vars , nz.row , pop_column_from_stack(vars));
1619     }
1620     /* Not reached. */
1621     }
1622    
1623     /*********************************
1624     end of spk1 stuff
1625     *********************************/
1626     /*********************************
1627     begin of tspk1 stuff
1628     *********************************/
1629     struct creorder_list { /* List of columns/rows and their counts. */
1630     int32 ndx;
1631     int32 count;
1632     struct creorder_list *next;
1633     };
1634    
1635     struct creorder_vars {
1636     mtx_matrix_t mtx;
1637     mtx_region_t reg; /* Current active region */
1638     int32 rowhigh; /* Original value of reg.row.high */
1639     struct creorder_list *tlist; /* Array of (enough) list elements */
1640     int32 *colcount; /* colcount[reg.col.low .. reg.col.high] */
1641     };
1642    
1643     static void adjust_col_count(struct creorder_vars *vars,int32 removed_row)
1644     /**
1645     *** Adjusts the column counts to account for the (to be) removed row.
1646     **/
1647     {
1648     mtx_coord_t nz;
1649     real64 value;
1650     nz.row = removed_row;
1651     nz.col = mtx_FIRST;
1652     while( value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col)),
1653     nz.col != mtx_LAST )
1654     --(vars->colcount[nz.col]);
1655     }
1656    
1657     static void assign_col_and_row(struct creorder_vars *vars,
1658     int32 col,
1659     int32 row)
1660     /**
1661     *** Assigns the given row to the given column, moving the row and column
1662     *** to the beginning of the active region and removing them (readjusting
1663     *** the region). The col counts are NOT adjusted. If col == mtx_NONE,
1664     *** then no column is assigned and the col is moved to the end of the
1665     *** active block instead. Otherwise, it is assumed that the row
1666     *** is active.
1667     **/
1668     {
1669     if( row == mtx_NONE ) {
1670     mtx_swap_cols(vars->mtx,col,vars->reg.col.high);
1671     vars->colcount[col] = vars->colcount[vars->reg.col.high];
1672     --(vars->reg.col.high);
1673     } else {
1674     mtx_swap_cols(vars->mtx,col,vars->reg.col.low);
1675     vars->colcount[col] = vars->colcount[vars->reg.col.low];
1676     mtx_swap_rows(vars->mtx,row,vars->reg.row.low);
1677     ++(vars->reg.col.low);
1678     ++(vars->reg.row.low);
1679     }
1680     }
1681    
1682     static void push_row_on_stack(struct creorder_vars *vars,int32 row)
1683     /**
1684     *** Pushes the given row onto the stack. It is assumed that the
1685     *** row is active. Col counts are adjusted.
1686     **/
1687     {
1688     adjust_col_count(vars,row);
1689     mtx_swap_rows(vars->mtx,row,vars->reg.row.high);
1690     --(vars->reg.row.high);
1691     }
1692    
1693     static int32 pop_row_from_stack(struct creorder_vars *vars)
1694     /**
1695     *** Pops the row on the "top" of the stack off of the stack and
1696     *** returns the row index, where it now lies in the active region.
1697     *** If the stack is empty, mtx_NONE is returned. Col counts are NOT
1698     *** adjusted (this together with a subsequent assignment of this row
1699     *** ==> no col count adjustment necessary).
1700     **/
1701     {
1702     if( vars->reg.row.high < vars->rowhigh )
1703     return(++(vars->reg.row.high));
1704     else
1705     return( mtx_NONE );
1706     }
1707    
1708     static void assign_null_cols(struct creorder_vars *vars)
1709     /**
1710     *** Assigns empty cols, moving them to the assigned region. It is
1711     *** assumed that col counts are correct. Rows are assigned off the
1712     *** stack.
1713     **/
1714     {
1715     int32 col;
1716    
1717     for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1718     if( vars->colcount[col] == 0 )
1719     assign_col_and_row(vars , col , pop_row_from_stack(vars));
1720     }
1721    
1722     static void cforward_triangularize(struct creorder_vars *vars)
1723     /**
1724     *** Forward triangularizes the region, assigning singleton columns with their
1725     *** one and only incident row until there are no more. The column counts
1726     *** must be correct, and they are updated.
1727     **/
1728     {
1729     boolean change;
1730    
1731     do {
1732     mtx_coord_t nz;
1733     real64 value;
1734     change = FALSE;
1735     for( nz.col = vars->reg.col.low ;
1736     nz.col <= vars->reg.col.high && vars->colcount[nz.col] != 1;
1737     ++nz.col ) ;
1738     if( nz.col <= vars->reg.col.high ) {
1739     /* found singleton col */
1740     nz.row = mtx_FIRST;
1741     value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row));
1742     adjust_col_count(vars,nz.row);
1743     assign_col_and_row(vars,nz.col,nz.row);
1744     change = TRUE;
1745     }
1746     } while( change );
1747     }
1748    
1749     static int32 select_col(struct creorder_vars *vars)
1750     /**
1751     *** Selects a col and returns its index. It is assumed that there is a
1752     *** col. Col counts must be correct. vars->tlist will be used.
1753     **/
1754     {
1755     int32 min_col_count;
1756     int32 max_row_count;
1757     int32 col;
1758     int32 i, nties=-2; /* # elements currently defined in vars->tlist */
1759     int32 sum;
1760     mtx_coord_t nz;
1761     real64 value;
1762     mtx_matrix_t mtx;
1763     mtx_range_t *colrng, *rowrng;
1764    
1765     /* Set to something > any possible value */
1766     min_col_count = vars->reg.row.high-vars->reg.row.low+2;
1767     for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1768     if( vars->colcount[col] <= min_col_count ) {
1769     if( vars->colcount[col] < min_col_count ) {
1770     min_col_count = vars->colcount[col];
1771     nties = 0;
1772     }
1773     vars->tlist[nties++].ndx = col;
1774     }
1775     /**
1776     *** vars->tlist[0..nties-1] is a list of row numbers which tie for
1777     *** minimum col count.
1778     **/
1779    
1780     max_row_count = -1; /* < any possible value */
1781     i = 0;
1782     mtx = vars->mtx;
1783     rowrng=&(vars->reg.row);
1784     colrng=&(vars->reg.col);
1785     while( i < nties ) {
1786    
1787     sum = 0;
1788     nz.row = mtx_FIRST;
1789     nz.col = vars->tlist[i].ndx;
1790     while( value = mtx_next_in_col(mtx,&nz,rowrng),
1791     nz.row != mtx_LAST )
1792     sum += mtx_nonzeros_in_row(mtx,nz.row,colrng);
1793     if( sum > max_row_count ) {
1794     max_row_count = sum;
1795     col = nz.col;
1796     }
1797     i++;
1798     }
1799     /**
1800     *** Now col contains the col with the minimum col count, which has the
1801     *** greatest total row count of incident rows among all cols with
1802     *** the same (minimum) col count. Select it.
1803     **/
1804     return(col);
1805     }
1806    
1807     static void tspk1_reorder(struct creorder_vars *vars)
1808     /**
1809     *** Transpose the picture and explanation that follows:
1810     *** Reorders the assigned matrix vars->mtx within the specified bounding
1811     *** block region vars->reg. The region is split into 6 subregions during
1812     *** reordering: the rows are divided in two, and the columns divided in
1813     *** three. Initially everything is in the active subregion. Ultimately,
1814     *** everything will be assigned.
1815     ***
1816     *** <-- assigned -->|<-- active-->|<-- on stack -->|
1817     *** ----+----------------+-------------+----------------+
1818     *** a | | | |
1819     *** s | | | |
1820     *** s | | | |
1821     *** i | (SQUARE) | | |
1822     *** g | | | |
1823     *** n | | | |
1824     *** e | | | |
1825     *** d | | | |
1826     *** ----+----------------+-------------+----------------+
1827     *** a | | | |
1828     *** c | | ACTIVE | |
1829     *** t | | REGION | |
1830     *** i | | | |
1831     *** v | | | |
1832     *** e | | | |
1833     *** ----+----------------+-------------+----------------+
1834     ***
1835     *** The algorithm is roughly as follows:
1836     *** (1) select a row (by some criterion).
1837     *** (2) push columns incident on that row onto the stack in decreasing
1838     *** order of their length.
1839     *** (3) pop first column off the stack and assign it to the selected
1840     *** row.
1841     *** (4) forward-triangularize (assign singleton rows with their one
1842     *** and only incident column, until no longer possible).
1843     ***
1844     *** (1)-(4) should be repeated until the active subregion becomes empty.
1845     ***
1846     *** Everything above was written as though the entire matrix is
1847     *** involved. In reality, only the relevant square region is involved.
1848     **/
1849     {
1850     int32 col, size;
1851     int32 *colcount_array_origin;
1852     mtx_matrix_t mtx;
1853    
1854     size = vars->reg.col.high - vars->reg.col.low + 1;
1855     size = MAX(size,vars->reg.row.high - vars->reg.row.low + 1);
1856     vars->tlist = size > 0 ? (struct creorder_list *)
1857     ascmalloc( size*sizeof(struct creorder_list) ) : NULL;
1858     colcount_array_origin = size > 0 ? (int32 *)
1859     ascmalloc( size*sizeof(int32) ) : NULL;
1860     vars->colcount =
1861     NOTNULL(colcount_array_origin) ?
1862     colcount_array_origin - vars->reg.col.low : NULL;
1863     mtx = vars->mtx;
1864    
1865     vars->rowhigh = vars->reg.row.high;
1866     /* Establish col counts */
1867     for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1868     vars->colcount[col] =
1869     mtx_nonzeros_in_col(mtx,col,&(vars->reg.row));
1870    
1871     while(TRUE) {
1872     struct creorder_list *head;
1873     int32 nelts; /* # elements "allocated" from vars->tlist */
1874     mtx_coord_t nz;
1875     real64 value;
1876    
1877     cforward_triangularize(vars);
1878     assign_null_cols(vars);
1879     if( vars->reg.col.low > vars->reg.col.high ||
1880     vars->reg.row.low > vars->reg.row.high ) {
1881     /* Active region is now empty, done */
1882     if( NOTNULL(vars->tlist) )
1883     ascfree( vars->tlist );
1884     if( NOTNULL(colcount_array_origin) )
1885     ascfree( colcount_array_origin );
1886     return;
1887     }
1888    
1889     head = NULL;
1890     nelts = 0;
1891     nz.row = mtx_FIRST;
1892     nz.col = select_col(vars);
1893     while( value = mtx_next_in_col(mtx,&nz,&(vars->reg.row)),
1894     nz.row != mtx_LAST ) {
1895     struct creorder_list **q,*p;
1896    
1897     p = &(vars->tlist[nelts++]);
1898     p->ndx = mtx_row_to_org(mtx,nz.row);
1899     p->count = mtx_nonzeros_in_row(mtx,nz.row,&(vars->reg.col));
1900     for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) );
1901     p->next = *q;
1902     *q = p;
1903     }
1904     /**
1905     *** We now have a list of columns which intersect the selected row.
1906     *** The list is in order of decreasing column count.
1907     **/
1908    
1909     /* Push incident rows on stack */
1910     for( ; NOTNULL(head) ; head = head->next )
1911     push_row_on_stack(vars,mtx_org_to_row(mtx,head->ndx));
1912    
1913     /* Pop row off stack and assign to selected col */
1914     assign_col_and_row(vars , nz.col , pop_row_from_stack(vars));
1915     }
1916     /* Not reached. */
1917     }
1918    
1919     /*********************************
1920     end of tspk1 stuff
1921     *********************************/
1922    
1923     static boolean nonempty_row(mtx_matrix_t mtx,int32 row)
1924     /**
1925     *** ? row not empty in mtx.
1926     **/
1927     {
1928     mtx_coord_t nz;
1929     real64 value;
1930     value = mtx_next_in_row(mtx, mtx_coord(&nz,row,mtx_FIRST), mtx_ALL_COLS);
1931     return( nz.col != mtx_LAST );
1932     }
1933    
1934     static boolean nonempty_col(mtx_matrix_t mtx,int32 col)
1935     /**
1936     *** ? column not empty in mtx.
1937     **/
1938     {
1939     mtx_coord_t nz;
1940     real64 value;
1941     value = mtx_next_in_col(mtx, mtx_coord(&nz,mtx_FIRST,col), mtx_ALL_ROWS);
1942     return( nz.row != mtx_LAST );
1943     }
1944    
1945     static void determine_pivot_range(linsolqr_system_t sys)
1946     /**
1947     *** Calculates sys->rng from sys->coef.
1948     **/
1949     {
1950     sys->reg.row.low = sys->reg.row.high = -1;
1951     sys->reg.col.low = sys->reg.col.high = -1;
1952    
1953     for( sys->rng.high=mtx_order(sys->coef) ; --(sys->rng.high) >= 0 ; ) {
1954     if( nonempty_row(sys->coef,sys->rng.high) &&
1955     sys->reg.row.high < 0 )
1956     sys->reg.row.high = sys->rng.high;
1957     if( nonempty_col(sys->coef,sys->rng.high) &&
1958     sys->reg.col.high < 0 )
1959     sys->reg.col.high = sys->rng.high;
1960     if( nonempty_row(sys->coef,sys->rng.high) &&
1961     nonempty_col(sys->coef,sys->rng.high) )
1962     break;
1963     }
1964    
1965     for( sys->rng.low=0 ; sys->rng.low <= sys->rng.high ; ++(sys->rng.low) ) {
1966     if( nonempty_row(sys->coef,sys->rng.low) &&
1967     sys->reg.row.low < 0 )
1968     sys->reg.row.low = sys->rng.low;
1969     if( nonempty_col(sys->coef,sys->rng.low) &&
1970     sys->reg.col.low < 0 )
1971     sys->reg.col.low = sys->rng.low;
1972     if( nonempty_row(sys->coef,sys->rng.low) &&
1973     nonempty_col(sys->coef,sys->rng.low) )
1974     break;
1975     }
1976     }
1977    
1978     static void square_region(linsolqr_system_t sys,mtx_region_t *region)
1979     /**
1980     *** Get the largest square confined to the diagonal within the region given
1981     *** and set sys->rng accordingly.
1982     **/
1983     {
1984     sys->reg = *region;
1985     sys->rng.low = MAX(region->row.low,region->col.low);
1986     sys->rng.high = MIN(region->row.high,region->col.high);
1987     }
1988    
1989     static int ranki_reorder(linsolqr_system_t sys,mtx_region_t *region)
1990     /**
1991     *** The region to reorder is first isolated by truncating the region
1992     *** provided to the largest square region confined to the matrix diagonal.
1993     *** It is presumed it will contain no empty rows or columns and will
1994     *** provide the basis of candidate pivots when factoring.
1995     **/
1996     {
1997     struct reorder_vars vars;
1998     CHECK_SYSTEM(sys);
1999     if (sys->fclass != ranki) {
2000     FPRINTF(stderr,"linsolqr_reorder: spk1 reorder called on system\n");
2001     FPRINTF(stderr," with inappropriate factor class\n");
2002     return 1;
2003     }
2004     if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2005     else square_region(sys,region);
2006    
2007     vars.mtx = sys->coef;
2008     vars.reg.row.low = vars.reg.col.low = sys->rng.low;
2009     vars.reg.row.high = vars.reg.col.high = sys->rng.high;
2010     spk1_reorder(&vars);
2011     return 0;
2012     }
2013    
2014     static int tranki_reorder(linsolqr_system_t sys,mtx_region_t *region)
2015     /**
2016     *** The region to reorder is first isolated by truncating the region
2017     *** provided to the largest square region confined to the matrix diagonal.
2018     *** It is presumed it will contain no empty rows or columns and will
2019     *** provide the basis of candidate pivots when factoring.
2020     **/
2021     {
2022     struct creorder_vars vars;
2023     CHECK_SYSTEM(sys);
2024     if ( !(sys->fclass==ranki || sys->fclass==s_qr) ) {
2025     FPRINTF(stderr,"linsolqr_reorder: tspk1 reorder called on system\n");
2026     FPRINTF(stderr," with inappropriate factor method\n");
2027     return 1;
2028     }
2029     if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2030     else square_region(sys,region);
2031    
2032     vars.mtx = sys->coef;
2033     vars.reg.row.low = vars.reg.col.low = sys->rng.low;
2034     vars.reg.row.high = vars.reg.col.high = sys->rng.high;
2035     tspk1_reorder(&vars);
2036     return 0;
2037     }
2038    
2039     /***************************************************************************\
2040     End of reordering functions for SPK1.
2041     \***************************************************************************/
2042    
2043     /***************************************************************************\
2044     RANKI implementation functions.
2045     \***************************************************************************/
2046    
2047     static void eliminate_row(mtx_matrix_t mtx,
2048     mtx_range_t *rng,
2049     int32 row, /* row to eliminate */
2050     real64 *tmp, /* temporary array */
2051     real64 *pivots) /* prior pivots array */
2052     /**
2053     *** Eliminates the given row to the left of the diagonal element, assuming
2054     *** valid pivots for all of the diagonal elements above it (the elements
2055     *** above those diagonal elements, if any exist, are assumed to be U
2056     *** elements and therefore ignored). The temporary array is used by this
2057     *** function to do its job. tmp[k], where rng->low <= k <= rng->high must
2058     *** be defined (allocated) but need not be initialized.
2059     *** pivots[k] where rng->low <= k <=rng->high must be allocated and
2060     *** pivots[rng->low] to pivots[row-1] must contain the previous pivots.
2061     **/
2062     {
2063     mtx_coord_t nz;
2064     mtx_range_t left_to_eliminate,high_cols,tmp_cols;
2065     boolean do_series=FALSE;
2066    
2067     high_cols.low = row;
2068     high_cols.high = rng->high;
2069     left_to_eliminate.low = rng->low;
2070     left_to_eliminate.high = row - 1;
2071    
2072     /* Move non-zeros left of pivot from matrix to full array */
2073     mtx_zero_real64(tmp+rng->low,(row-rng->low));
2074     nz.row = row;
2075     mtx_cur_row_vec(mtx,row,tmp,&left_to_eliminate);
2076     mtx_clear_row(mtx,row,&left_to_eliminate);
2077    
2078     /* eliminates nzs from pivot, one by one, filling tmp with multipliers */
2079     do_series= ( (row-1) >= (rng->low) );
2080     if (do_series) {
2081     mtx_add_row_series_init(mtx,row,FALSE);
2082     /* do nothing between here and the END call to _init which removes
2083     elements from row */
2084     }
2085     for( nz.row = row-1 ; nz.row >= rng->low ; --(nz.row) ) {
2086     if( tmp[nz.row] == D_ZERO )
2087     continue; /* Nothing to do for this row */
2088    
2089     nz.col = nz.row;
2090     tmp[nz.row] /= pivots[nz.row];
2091     /* tmp[nz.row] now equals multiplier */
2092    
2093     /* Perform "mtx_add_row" for full array part of the row */
2094     left_to_eliminate.high = nz.row - 1;
2095     mtx_cur_vec_add_row(mtx,tmp,nz.row,-tmp[nz.row],
2096     &left_to_eliminate,FALSE);
2097    
2098     /* Perform "mtx_add_row" on remaining part of row */
2099     mtx_add_row_series(nz.row,-tmp[nz.row],&high_cols);
2100     }
2101     if (do_series) {
2102     mtx_add_row_series_init(mtx,mtx_NONE,TRUE);
2103     }
2104    
2105     mtx_fill_cur_row_vec(mtx,row,tmp,mtx_range(&tmp_cols,rng->low,row-1));
2106    
2107     }
2108    
2109    
2110     /* not in use it seems */
2111     #if BUILD_DEAD_CODE /* yes, baa */
2112     static int32 top_of_spike(linsolqr_system_t sys,int32 col)
2113     /**
2114     *** Determines the top row (row of lowest index) in a possible spike
2115     *** above the diagonal element in the given column. If there is no spike,
2116     *** then (row = ) col is returned.
2117     **/
2118     {
2119     mtx_range_t above_diagonal;
2120     mtx_coord_t nz;
2121     real64 value;
2122     int32 top_row;
2123    
2124     above_diagonal.low = sys->rng.low;
2125     above_diagonal.high = col-1;
2126     top_row = nz.col = col;
2127     nz.row = mtx_FIRST;
2128     while( value = mtx_next_in_col(sys->factors,&nz,&above_diagonal),
2129     nz.row != mtx_LAST )
2130     if( nz.row < top_row )
2131     top_row = nz.row;
2132     return( top_row );
2133     }
2134     #endif /* BUILD_DEAD_CODE */
2135    
2136     static boolean col_is_a_spike(linsolqr_system_t sys,int32 col)
2137     /**
2138     *** Determines if the col is a spike, characterized by having any
2139     *** nonzeros above the diagonal. By construction there shouldn't
2140     *** be any numeric 0 nonzeros above the diagonal, so we don't prune
2141     *** them out here.
2142     **/
2143     {
2144     mtx_range_t above_diagonal;
2145     mtx_coord_t nz;
2146    
2147     above_diagonal.low = sys->rng.low;
2148     above_diagonal.high = col-1;
2149     nz.col = col;
2150     nz.row = mtx_FIRST;
2151     /* this loop is cheaper than it looks */
2152     while( (void)mtx_next_in_col(sys->factors,&nz,&above_diagonal),
2153     nz.row != mtx_LAST ) {
2154     if( nz.row < col ) return TRUE;
2155     }
2156     return( FALSE );
2157     }
2158    
2159     /* see implementation notes before modifying this function! */
2160     static void number_drag(real64 *vec, int32 rfrom, int32 rto)
2161     {
2162     int32 i;
2163     real64 tmp;
2164     if (rto <rfrom) {
2165     tmp=vec[rfrom];
2166     for (i=rfrom; i> rto; i--) {
2167     vec[i]=vec[i-1]; /* rotate right */
2168     }
2169     vec[rto]=tmp;
2170     return;
2171     }
2172     if (rto > rfrom) {
2173     tmp=vec[rfrom];
2174     for (i=rfrom; i< rto; i++) {
2175     vec[i]=vec[i+1]; /* rotate left */
2176     }
2177     vec[rto]=tmp;
2178     }
2179     }
2180    
2181     static void rankikw_factor(linsolqr_system_t sys)
2182     /**
2183     *** (the following description also applies to rankijz_factor which is
2184     *** different only in pivot selection strategy.)
2185     *** This is the heart of the linear equation solver. This function
2186     *** factorizes the matrix into a lower (L) and upper (U) triangular
2187     *** matrix. sys->smallest_pivot and sys->rank are calculated. The
2188     *** RANKI method is utilized. At the end of elimination, the matrix A
2189     *** is factored into A = U L, where U and L are stored as follows:
2190     ***
2191     *** <----- r ----> <- n-r ->
2192     *** +--------------+---------+
2193     *** | | |
2194     *** | U | |
2195     *** | | |
2196     *** | L | | r
2197     *** | | |
2198     *** +--------------+---------+
2199     *** | | |
2200     *** | | 0 | n-r
2201     *** | | |
2202     *** +--------------+---------+
2203     ***
2204     *** The rows and columns have been permuted so that all of the pivoted
2205     *** original rows and columns are found in the first r rows and columns
2206     *** of the region. The last n-r rows and columns are unpivoted. U has
2207     *** 1's on its diagonal, whereas L's diagonal is stored in the matrix.
2208     ***
2209     *** Everything above was written as though the entire matrix is
2210     *** involved. In reality, only the relevant square region is involved.
2211     **/
2212     {
2213     mtx_coord_t nz;
2214     int32 last_row;
2215     mtx_range_t pivot_candidates;
2216     real64 *tmp;
2217     real64 pivot, *pivots;
2218     int32 length;
2219     mtx_matrix_t mtx;
2220    
2221     length = sys->rng.high - sys->rng.low + 1;
2222     tmp = sys->ludata->tmp;
2223     /* eliminate row takes care of zeroing the relevant region and won't
2224     change values outside of it. */
2225     pivots=sys->ludata->pivlist;
2226     mtx=sys->factors;
2227    
2228     sys->smallest_pivot = MAXDOUBLE;
2229     last_row = pivot_candidates.high = sys->rng.high;
2230     for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
2231    
2232     pivot_candidates.low = nz.col = nz.row;
2233     pivots[nz.row]=pivot = mtx_value(mtx,&nz);
2234     pivot = fabs(pivot);
2235     if( pivot > sys->pivot_zero &&
2236     pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) &&
2237     !col_is_a_spike(sys,nz.row) ) {
2238     /* Good pivot and not a spike: continue with next row */
2239     if( pivot < sys->smallest_pivot )
2240     sys->smallest_pivot = pivot;
2241     ++(nz.row);
2242     continue;
2243     }
2244     /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2245     /**
2246     *** Row is a spike row or will
2247     *** be when a necessary column
2248     *** exchange occurs.
2249     **/
2250     eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots);
2251     /* pivot will be largest of those available. get size and value */
2252     pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row]));
2253     if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */
2254     /* Dependent row, drag to the end */
2255     mtx_drag(mtx,nz.row,last_row);
2256     number_drag(pivots,nz.row,last_row);
2257     --last_row;
2258     #undef KAA_DEBUG
2259     #ifdef KAA_DEBUG
2260     FPRINTF(stderr,"Warning: Row %d is dependent with pivot %20.8g\n",
2261     nz.row,pivot);
2262     #endif /* KAA_DEBUG */
2263     } else {
2264     /* Independent row: nz contains best pivot */
2265     /* Move pivot to diagonal */
2266     mtx_swap_cols(mtx,nz.row,nz.col);
2267     mtx_drag( mtx , nz.row , sys->rng.low );
2268     number_drag(pivots,nz.row,sys->rng.low);
2269     if( pivot < sys->smallest_pivot )
2270     sys->smallest_pivot = pivot;
2271     ++(nz.row);
2272     }
2273     }
2274    
2275     sys->rank = last_row - sys->rng.low + 1;
2276     }
2277    
2278    
2279     static void rankijz_factor(linsolqr_system_t sys)
2280     /**
2281     *** This is an alternate pivoting strategy introduced by Joe Zaher.
2282     *** it looks down and across for good pivots rather than just across.
2283     ***
2284     **/
2285     {
2286     real64 biggest;
2287     mtx_coord_t nz, best;
2288     mtx_region_t candidates;
2289     real64 *tmp;
2290     real64 pivot, *pivots;
2291     int32 length;
2292     mtx_matrix_t mtx;
2293    
2294     length = sys->rng.high - sys->rng.low + 1;
2295     tmp = sys->ludata->tmp;
2296     /* eliminate row takes care of zeroing the relevant region and won't
2297     change values outside of it. */
2298     pivots=sys->ludata->pivlist;
2299     mtx=sys->factors;
2300    
2301     sys->smallest_pivot = MAXDOUBLE;
2302     candidates.row.high = sys->rng.high;
2303     candidates.col.high = sys->rng.high;
2304     for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) {
2305     nz.col = nz.row;
2306     pivots[nz.row] = pivot = mtx_value(mtx,&nz);
2307     pivot = fabs(pivot);
2308     candidates.row.low = nz.row;
2309     candidates.col.low = nz.row;
2310     if( !col_is_a_spike(sys,nz.row) ) {
2311     best.col = nz.row;
2312     mtx_col_max(mtx,&best,&(candidates.row),&biggest);
2313     if( fabs(biggest) >= sys->pivot_zero ) {
2314     if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) {
2315     mtx_swap_rows(mtx,nz.row,best.row);
2316     pivots[nz.row] = biggest;
2317     pivot = fabs(biggest);
2318     }
2319     if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot;
2320     ++(nz.row);
2321     continue;
2322     }
2323     }
2324     /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2325     /**
2326     *** Row is a spike row or will
2327     *** be when a necessary column
2328     *** exchange occurs.
2329     **/
2330     eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots);
2331     /* pivot will be largest of those available. get size and value */
2332     pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row]));
2333     /* Move pivot to diagonal */
2334     if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2335     /* Dependent row, drag nz to lower right */
2336     mtx_drag(mtx,nz.row,candidates.row.high);
2337     number_drag(pivots,nz.row,candidates.row.high);
2338     --(candidates.row.high);
2339     } else {
2340     /* Independent row, drag nz to upper left */
2341     mtx_swap_cols(mtx,nz.row,nz.col);
2342     mtx_drag( mtx , nz.row , sys->rng.low );
2343     number_drag(pivots,nz.row,sys->rng.low);
2344     if( pivot < sys->smallest_pivot )
2345     sys->smallest_pivot = pivot;
2346     ++(nz.row);
2347     }
2348     }
2349    
2350     sys->rank = candidates.row.high - sys->rng.low + 1;
2351     }
2352    
2353    
2354     #define KAA_DEBUG 0
2355     #if KAA_DEBUG
2356     int ranki_entry(linsolqr_system_t sys,mtx_region_t *region)
2357     #else
2358     static int ranki_entry(linsolqr_system_t sys,mtx_region_t *region)
2359     #endif
2360     /**
2361     *** The region to factor is first isolated by truncating the region
2362     *** provided to the largest square region confined to the matrix diagonal.
2363     *** It is presumed it will contain no empty rows or columns and that it has
2364     *** been previously reordered using linsolqr_reorder(sys,region,spk1)
2365     *** or a sane variant of spk1.
2366     *** This is the entry point for all ranki based strategies, regardless of
2367     *** pivot selection subtleties.
2368     **/
2369     {
2370     struct rhs_list *rl;
2371     double time;
2372    
2373     CHECK_SYSTEM(sys);
2374     if( sys->factored )
2375     return 0;
2376     switch(sys->fmethod) {
2377     case ranki_kw:
2378     case ranki_jz:
2379     case ranki_ka: /* add new methods here */
2380     break;
2381     default:
2382     return 1;
2383     }
2384    
2385     if(ISNULL(sys->ludata))
2386     return 1;
2387    
2388     if( NOTNULL(sys->inverse) )
2389     mtx_destroy(sys->inverse);
2390     sys->inverse=NULL;
2391     if( NOTNULL(sys->factors) )
2392     mtx_destroy(sys->factors);
2393     if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2394     else square_region(sys,region);
2395    
2396     sys->factors = mtx_copy_region(sys->coef,region);
2397     sys->rank = -1;
2398     sys->smallest_pivot = MAXDOUBLE;
2399     for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
2400     rl->solved = FALSE;
2401     insure_capacity(sys);
2402     insure_lu_capacity(sys);
2403    
2404     time = tm_cpu_time();
2405     switch(sys->fmethod) {
2406     case ranki_ka:
2407     case ranki_kw:
2408     rankikw_factor(sys);
2409     break;
2410     case ranki_jz:
2411     default:
2412     rankijz_factor(sys);
2413     }
2414     /* no longer done automatically as noone usually cares.
2415     calc_dependent_rows_ranki1(sys);
2416     calc_dependent_cols_ranki1(sys);
2417     */
2418     sys->factored = TRUE;
2419    
2420     #undef KAA_DEBUG
2421     #if KAA_DEBUG
2422     time = tm_cpu_time() - time;
2423     FPRINTF(stderr,"Time for Inversion = %f\n",time);
2424     FPRINTF(stderr,"Non-zeros in Inverse = %d\n",
2425     mtx_nonzeros_in_region(sys->factors,region));
2426     #endif
2427     return 0;
2428     }
2429    
2430     static int ranki_solve(linsolqr_system_t sys, struct rhs_list *rl)
2431     {
2432     backward_substitute(sys,rl->varvalue,rl->transpose);
2433     forward_substitute(sys,rl->varvalue,rl->transpose);
2434     zero_unpivoted_vars(sys,rl->varvalue,rl->transpose);
2435     return 0;
2436     }
2437    
2438    
2439     /*
2440     *********************************************************************
2441     * Start of the 2 bodied factorization schemes.
2442     *
2443     * ranki2_entry will be used to access these routines.
2444     * there is now code called:
2445     * rankikw2_factor
2446     * rankijz2_factor
2447     * eliminate_row2
2448     * ranki2_entry
2449     * forward_substitute2
2450     * backward_substitute2
2451     * ranki2_solve
2452     *
2453     * We have fixed the calc_col_dependent and calc_row_dependent
2454     * routines.
2455     *********************************************************************
2456     */
2457    
2458     extern int32 mtx_number_ops;
2459    
2460     /* this function does no permutation of any sort */
2461     static
2462     void eliminate_row2(mtx_matrix_t mtx,
2463     mtx_matrix_t upper_mtx,
2464     mtx_range_t *rng,
2465     int32 row, /* row to eliminate */
2466     real64 *tmp, /* temporary array */
2467     real64 *pivots, /* prior pivots array */
2468     real64 dtol /* drop tolerance */
2469     )
2470     {
2471     mtx_coord_t nz;
2472     int j,low,high;
2473     double tmpval;
2474    
2475     /*
2476     * Move all non-zeros from current row to full array.
2477     * The full array would have been initialized before,
2478     * we must put it back in the clean state when we leave.
2479     * All operations are done over mtx_ALL_COLS.
2480     *
2481     * Note: because of an addrow over mtx_ALL_COLS, entries
2482     * of tmp outside rng may have nonzeros put in them if
2483     * sys->factors has nonzeros in the outlying columns.
2484     * If enough of these pile up out there during elimination
2485     * of a block and the compiler treats double overflow as
2486     * a signalable error, we will have a floating point
2487     * exception.
2488     * Currently sys->factors is a copy of a region from
2489     * the sys->coef matrix, so we should not have anything
2490     * out there to pile up. If we make a variant which
2491     * does not copy the coef matrix but uses it directly,
2492     * this code needs to be revisited.
2493     */
2494     mtx_steal_cur_row_vec(mtx,row,tmp,mtx_ALL_COLS);
2495    
2496     /*
2497     * Eliminates nzs from pivot, one by one, filling tmp with multipliers
2498     * We now operate on the entire row, since we are not writing the
2499     * multipliers back to the matrix.
2500     */
2501    
2502     low = rng->low;
2503     high = rng->high;
2504     for (j = row-1 ; j >= low ; --(j) ) {
2505     if (tmp[j] == D_ZERO)
2506     continue; /* Nothing to do for this row */
2507     tmpval = tmp[j]/pivots[j]; /* Compute multiplier */
2508    
2509     /*
2510     * tmpval is now the multiplier. We use it to eliminate the row,
2511     * but backpatch it to tmp, *after* we do the elimination, as
2512     * mtx_cur_vec_add_row over all columns will stomp on tmp[j]
2513     */
2514     mtx_cur_vec_add_row(mtx,tmp,j,-tmpval,mtx_ALL_COLS,FALSE);
2515     tmp[j] = tmpval; /* patch the diagonal */
2516     }
2517    
2518     /*
2519     * Fill up the upper triangular matrix.
2520     * refill the range [low .. row-1]. Remember that we have
2521     * to zero all nnz's in tmp.
2522     */
2523     nz.row = row;
2524     for (j=low;j<row;j++) {
2525     if (tmp[j] != D_ZERO) {
2526     nz.col = j;
2527     mtx_fill_value(upper_mtx,&nz,tmp[j]);
2528     #if RBADEBUG
2529     FPRINTF(gscr,"fillingU: or %d oc %d (cc %d) %24.18g\n",
2530     mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), nz.col, tmp[j]);
2531     #endif
2532     tmp[j] = D_ZERO;
2533     }
2534     }
2535     /*
2536     * refill the range [row .. high]. We *wont* keep
2537     * the diagonal as this is sorted out by the pivot
2538     * array, but here we must keep it, sigh.
2539     * At this stage we don't know that the diagonal is the
2540     * pivot, as that is determined after elimination done.
2541     * Outer loop must delete the ultimate pivot element.
2542     * Odds are good, however, that it is the current diagonal,
2543     * so put in the diagonal last.
2544     */
2545