/[ascend]/trunk/base/generic/linear/linsolqr.c
ViewVC logotype

Annotation of /trunk/base/generic/linear/linsolqr.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1321 - (hide annotations) (download) (as text)
Tue Mar 6 12:23:24 2007 UTC (13 years, 6 months ago) by jpye
File MIME type: text/x-csrc
File size: 164645 byte(s)
As promised, also deleted comment about how good linsol made us look
1 aw0a 1 /*
2     * linsol II: Ascend Sparse Linear Solvers
3     * by Benjamin Allan
4     * Created: 2/6/90
5     * Version: $Revision: 1.26 $
6     * Version control file: $RCSfile: linsolqr.c,v $
7     * Date last modified: $Date: 2000/01/25 02:26:58 $
8     * Last modified by: $Author: ballan $
9     *
10     * This file is part of the SLV solver.
11     *
12     * Copyright (C) 1994 Benjamin Andrew Allan
13     *
14     * Based on linsol:
15     * Copyright (C) 1990 Karl Michael Westerberg
16     * Copyright (C) 1993 Joseph Zaher
17     * Copyright (C) 1994 Joseph Zaher, Benjamin Andrew Allan
18     * and on sqr.pas v1.5:
19     * Copyright (C) 1994 Boyd Safrit
20     *
21     * The SLV solver is free software; you can redistribute
22     * it and/or modify it under the terms of the GNU General Public License as
23     * published by the Free Software Foundation; either version 2 of the
24     * License, or (at your option) any later version.
25     *
26     * The SLV solver is distributed in hope that it will be
27     * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
28     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29     * General Public License for more details.
30     *
31     * You should have received a copy of the GNU General Public License
32     * along with the program; if not, write to the Free Software Foundation,
33     * Inc., 675 Mass Ave, Cambridge, MA 02139 USA. Check the file named
34     * COPYING. COPYING is found in ../compiler.
35     */
36    
37     /*
38     * Implementation Notes:
39     * 9/95: there are many places in linsolqr.c where lower level matrix
40     * operations have been replaced by higher level ones. These used to
41     * be flagged as such so that people who wanted to construct newer
42     * operations from the lowest level mtx operators could see how to
43 jpye 1321 * use the lowlevel operators.
44    
45     Later note (JP, 2007): The earlier version of linsolqr, named linsol,
46     has been removed completely. There was some comment about it here but
47     it's not here any more.
48 aw0a 1 *
49     * 10/95: (BAA) I attempted to optimize number_drag through use of
50     * pointer arithmetic. Testing the resulting code without optimization
51     * turned on gave about 20% improvement over the original on an alpha
52     * using the native compiler. However, with optimization on the array
53     * subscripted version which is left here won by ~40%.
54     * The results for gcc were similar: pointer 10% better unoptimized,
55     * subscripted 30% better when -O2.
56     * For both compilers -O3 was worse! gcc was ~1.8x slower than the
57     * native cc both optimized and not.
58     * I have not tested a loop unrolled drag, but I expect unrolling is
59     * exactly what the optimizer is doing, so why bother?
60     */
61    
62     #include <math.h>
63     #include <stdarg.h>
64 johnpye 399 #include <utilities/ascConfig.h>
65 johnpye 1309 #include <utilities/error.h>
66 johnpye 399 #include <utilities/ascMalloc.h>
67     #include <utilities/ascPanic.h>
68     #include <utilities/mem.h>
69     #include <utilities/set.h>
70     #include <general/tm_time.h>
71     #include "mtx.h"
72     #include "linsolqr.h"
73     #include <general/mathmacros.h>
74 aw0a 1
75     #ifdef __WIN32__
76     #define copysign _copysign
77     #endif /* __WIN32__ */
78    
79     #define LINSOLQR_DEBUG FALSE
80     /* qr code debugging flag */
81     #define LINSOL_DEBUG FALSE
82     /* linsol debugging flag */
83    
84     #define D_ZERO (real64)0.0
85     #define ZERO (int32)0
86     #define D_ONE (real64)1.0
87    
88     /* housekeeping flags */
89     #define BUILD_DEAD_CODE 0
90     #undef BUILD_KIRK_CODE
91    
92    
93     #define RBADEBUG 0
94     /* turns on spew that will generate output files
95     * comparable to and in the same location as for rankiba2
96     * user specific hardcoded pathnames are involved.
97     * don't try this at home.
98     */
99     #if RBADEBUG
100     extern FILE *gscr;
101     #endif
102    
103     /*
104     * timing messages disable flag.
105     */
106     int g_linsolqr_timing = 1;
107    
108     /***************************************************************************\
109     Data structures for linsol
110     Any time you modify these, please verify linsolqr_size is still correct.
111     \***************************************************************************/
112     struct rhs_list {
113     real64 *rhs; /* Vector of rhs values */
114     real64 *varvalue; /* Solution of the linear system */
115     boolean solved; /* ? has the rhs been solved */
116     boolean transpose; /* ? transpose the coef matrix */
117     struct rhs_list *next;
118     };
119    
120     struct qr_fill_t {
121     int32 cnt; /* number of real nonzeros in a column */
122     int32 col; /* column number prior to being sorted */
123     int32 fill; /* fill if this (or if col?) were the pivot column */
124     int32 overlap; /* scratch for fillable calculation. */
125     };
126     /* fill counting structure for sparse qr */
127    
128     struct lu_auxdata { /* all indexed by cur row or cur col number */
129     real64 *pivlist; /* vector of pivots (diagonal of L for ranki) */
130     real64 *tmp; /* row elimination, dependency buffer */
131     int32 cap; /* current capacity of the vectors if not null */
132     };
133     /* structure for lu algorithms */
134    
135     struct qr_auxdata { /* all indexed by cur row or cur col number */
136     mtx_matrix_t hhvects; /* slave matrix of sys->factors for holding u's */
137     mtx_sparse_t sp; /* sparse data space */
138     real64 *alpha; /* column norms for the Q\R factor matrix */
139     real64 *sigma; /* column norms for the S (R inverse) matrix */
140     real64 *tau; /* column tau of the Householder transforms */
141     real64 *hhcol; /* Householder transform scratch vector */
142     real64 *hhrow; /* Householder transform scratch vector */
143     real64 nu; /* current condition number of inverse */
144     real64 anu; /* condition number of coef */
145     real64 asubnu; /* condition number of active region in coef */
146     struct qr_fill_t *fill; /* fill counting structure */
147     mtx_region_t facreg; /* region over which qr is done if nonsquare */
148     int32 cap; /* current capacity of these vectors if not null */
149     };
150     /* structure for CondQR algorithms */
151    
152     struct linsolqr_header {
153     int integrity;
154     enum factor_class fclass; /* Type of factoring expected */
155     enum factor_method fmethod; /* Method factoring expected */
156     enum reorder_method rmethod; /* Type of most recent reordering */
157     int32 capacity; /* Capacity of arrays */
158     mtx_matrix_t coef; /* Coefficient matrix */
159     struct rhs_list *rl; /* List of rhs vectors */
160     int rlength; /* Length of rhs list */
161     real64 pivot_zero; /* Smallest acceptable pivot */
162     real64 ptol; /* Pivot selection tolerance */
163     real64 ctol; /* Condition selection tolerance */
164     real64 dtol; /* Matrix entry drop tolerance */
165     mtx_matrix_t factors; /* Matrix with UL and dependence info (ranki),
166     or L and dependence info (ranki2),
167     or R and dependence info (qr methods) */
168     mtx_matrix_t inverse; /* Matrix with U multipliers (ranki2) or R^-1
169     (condqr), in both cases slave of factors */
170     boolean factored; /* ? has the matrix been factored */
171     boolean rowdeps; /* ? have row dependencies been calc'ed */
172     boolean coldeps; /* ? have col dependencies been calc'ed */
173     mtx_range_t rng; /* Pivot range */
174     mtx_region_t reg; /* Bounding region given */
175     int32 rank; /* Rank of the matrix */
176     real64 smallest_pivot; /* Smallest pivot accepted */
177     struct qr_auxdata *qrdata; /* Data vectors for qr methods */
178     struct lu_auxdata *ludata; /* Data vectors for lu methods */
179     };
180     /* linsol main structure */
181    
182     /***************************************************************************\
183     string functions for linsol io
184     \***************************************************************************/
185    
186     char *linsolqr_rmethods() {
187     static char names[]="Natural, SPK1, TSPK1";
188     return names;
189     }
190    
191     char *linsolqr_fmethods() {
192     static char names[] =
193     "SPK1/RANKI,SPK1/RANKI+ROW,Fast-SPK1/RANKI,Fast-SPK1/RANKI+ROW,Fastest-SPK1/MR-RANKI,CondQR,CPQR";
194     return names;
195     }
196    
197     /** **/
198     enum reorder_method linsolqr_rmethod_to_enum(char *name) {
199     if (strcmp(name,"SPK1")==0) return spk1;
200     if (strcmp(name,"TSPK1")==0) return tspk1;
201     if (strcmp(name,"Natural")==0) return natural;
202     return unknown_r;
203     }
204    
205     enum factor_method linsolqr_fmethod_to_enum(char *name) {
206     if (strcmp(name,"KIRK-STUFF")==0) return ranki_ka;
207     if (strcmp(name,"SPK1/RANKI")==0) return ranki_kw;
208     if (strcmp(name,"SPK1/RANKI+ROW")==0) return ranki_jz;
209     if (strcmp(name,"Fast-SPK1/RANKI")==0) return ranki_kw2;
210     if (strcmp(name,"Fast-SPK1/RANKI+ROW")==0) return ranki_jz2;
211     if (strcmp(name,"Fastest-SPK1/MR-RANKI")==0) return ranki_ba2;
212     if (strcmp(name,"CondQR")==0) return cond_qr;
213     if (strcmp(name,"CPQR")==0) return plain_qr;
214     return unknown_f;
215     }
216    
217     enum factor_class linsolqr_fmethod_to_fclass(enum factor_method fm)
218     {
219     switch (fm) {
220     /* ranki-like things */
221     case ranki_kw:
222     case ranki_jz:
223     case ranki_ba2:
224     case ranki_kw2:
225     case ranki_jz2:
226     case ranki_ka:
227     return ranki;
228     /* implemented qr things */
229     case plain_qr:
230     return s_qr;
231     /* other stuff unimplemented. all fall through */
232     case cond_qr:
233     case opt_qr:
234     case ls_qr:
235     case gauss_ba2:
236     case symmetric_lu:
237     case unknown_f:
238     default:
239     return unknown_c;
240     }
241     }
242    
243     /** **/
244     char *linsolqr_enum_to_rmethod(enum reorder_method meth) {
245     switch (meth) {
246     case spk1: return "SPK1";
247     case tspk1: return "TSPK1";
248     case natural: return "Natural";
249     default: return "Unknown reordering method.";
250     }
251     }
252    
253     char *linsolqr_enum_to_fmethod(enum factor_method meth) {
254     switch (meth) {
255     case ranki_kw: return "SPK1/RANKI";
256     case ranki_jz: return "SPK1/RANKI+ROW";
257     case ranki_ba2: return "Fastest-SPK1/MR-RANKI";
258     case ranki_kw2: return "Fast-SPK1/RANKI";
259     case ranki_jz2: return "Fast-SPK1/RANKI+ROW";
260     case ranki_ka: return "KIRK-STUFF";
261     case cond_qr: return "CondQR";
262     case plain_qr: return "CPQR";
263     default: return "Unknown factorization method.";
264     }
265     }
266    
267     /** **/
268     char *linsolqr_rmethod_description(enum reorder_method meth) {
269     static char sspk1[]="SPK1 reordering ala Stadtherr.";
270     static char stspk1[]="SPK1 reordering column wise ala Stadtherr.";
271     static char nat[]="Ordering as received from user";
272     switch (meth) {
273     case spk1: return sspk1;
274     case tspk1: return stspk1;
275     case natural: return nat;
276     default: return "Unknown reordering method.";
277     }
278     }
279    
280     char *linsolqr_fmethod_description(enum factor_method meth) {
281     static char ex_ranki[] =
282     "SPK1 reordering with RANKI LU factoring ala Stadtherr.";
283     static char ex_rankib[] =
284     "SPK1 reordering with RANKI LU factoring ala Stadtherr, but fast.";
285     static char ex_rankij[]="SPK1/RANKI LU with pseudo-complete pivoting";
286     static char ex_rankik[]="KIRK-STUFF/RANKI LU with pseudo-complete pivoting";
287     static char ex_condqr[]="Sparse QR with condition controlled pivoting.";
288     static char ex_plainqr[]="Sparse QR with column pivoting.";
289     switch (meth) {
290     case ranki_kw: return ex_ranki;
291     case ranki_kw2: return ex_ranki;
292     case ranki_ba2: return ex_rankib;
293     case ranki_jz: return ex_rankij;
294     case ranki_jz2: return ex_rankij;
295     case ranki_ka: return ex_rankik;
296     case cond_qr: return ex_condqr;
297     case plain_qr: return ex_plainqr;
298     default: return "Unknown factorization method.";
299     }
300     }
301    
302     /***************************************************************************\
303     internal check routines
304     \***************************************************************************/
305     #define OK ((int)439828676)
306     #define DESTROYED ((int)839276847)
307     #define CHECK_SYSTEM(a) check_system((a),__FILE__,__LINE__)
308     static int check_system(linsolqr_system_t sys, char *file, int line)
309     /**
310     *** Checks the system handle. Return 0 if ok, 1 otherwise.
311     **/
312     {
313 jds 97 UNUSED_PARAMETER(file);
314    
315 aw0a 1 if( ISNULL(sys) ) {
316 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"NULL system handle.");
317 aw0a 1 return 1;
318     }
319    
320     switch( sys->integrity ) {
321     case OK:
322     return 0;
323     case DESTROYED:
324 johnpye 1309 error_reporter(ASC_PROG_ERR,file,line,__FUNCTION__,"System was recently destroyed.");
325 aw0a 1 break;
326     default:
327 johnpye 1309 error_reporter(ASC_PROG_ERR,file,line,__FUNCTION__,"System was reused or never created.");
328 aw0a 1 break;
329     }
330     return 1;
331     }
332    
333     /***************************************************************************\
334     external calls (and some of their internals)
335     \***************************************************************************/
336     static void destroy_rhs_list(struct rhs_list *rl)
337     /**
338     *** Destroys rhs list.
339     **/
340     {
341     while( NOTNULL(rl) ) {
342     struct rhs_list *p;
343     p = rl;
344     rl = rl->next;
345     if( NOTNULL(p->varvalue) )
346     ascfree( (POINTER)(p->varvalue) );
347     ascfree( (POINTER)p );
348     }
349     }
350    
351     static struct rhs_list *find_rhs(struct rhs_list *rl,real64 *rhs)
352     /**
353     *** Searches for rhs in rhs list, returning it or NULL if not found.
354     **/
355     {
356     for( ; NOTNULL(rl) ; rl = rl->next )
357     if( rl->rhs == rhs )
358     break;
359     return(rl);
360     }
361    
362     static struct lu_auxdata *create_ludata()
363     /**
364     *** Creates an empty zeroed ludata struct.
365 johnpye 1309 *** Filled up by ensure_lu_capacity.
366 aw0a 1 **/
367     {
368     struct lu_auxdata *d;
369 johnpye 709 d=ASC_NEW_CLEAR(struct lu_auxdata);
370 aw0a 1 return d;
371     }
372    
373     static struct qr_auxdata *create_qrdata()
374     /**
375     *** Creates an empty zeroed qrdata struct.
376 johnpye 1309 *** Filled up by ensure_qr_capacity.
377 aw0a 1 **/
378     {
379     struct qr_auxdata *d;
380 johnpye 709 d=ASC_NEW_CLEAR(struct qr_auxdata);
381 aw0a 1 return d;
382     }
383    
384     static void destroy_ludata(struct lu_auxdata *d) {
385     /**
386     *** Conditionally destroys a ludata struct and its parts.
387     **/
388     if (NOTNULL(d)) {
389     if (NOTNULL(d->pivlist)) ascfree(d->pivlist);
390     d->pivlist=NULL;
391     if (NOTNULL(d->tmp)) ascfree(d->tmp);
392     d->tmp=NULL;
393     ascfree(d);
394     }
395     }
396    
397     static void destroy_qrdata(struct qr_auxdata *d) {
398     /**
399     *** Conditionally destroys a qrdata struct and its parts.
400     **/
401     if (NOTNULL(d)) {
402     if (NOTNULL(d->alpha)) ascfree(d->alpha);
403     if (NOTNULL(d->sigma)) ascfree(d->sigma);
404     if (NOTNULL(d->tau)) ascfree(d->tau);
405     if (NOTNULL(d->hhcol)) ascfree(d->hhcol);
406     if (NOTNULL(d->hhrow)) ascfree(d->hhrow);
407     if (NOTNULL(d->fill)) ascfree(d->fill);
408    
409     ascfree(d);
410     }
411     }
412    
413     linsolqr_system_t linsolqr_create()
414     {
415     linsolqr_system_t sys;
416    
417     sys = (linsolqr_system_t)ascmalloc( sizeof(struct linsolqr_header) );
418     sys->fclass = unknown_c;
419     sys->fmethod = unknown_f;
420     sys->rmethod = unknown_r;
421     sys->integrity = OK;
422     sys->capacity = 0;
423     sys->coef = NULL;
424     sys->rl = NULL;
425     sys->rlength = 0;
426     sys->pivot_zero = 1e-12; /* default value */
427     sys->ptol = 0.1; /* default value */
428     sys->ctol = 0.1; /* default value */
429     sys->dtol = LINQR_DROP_TOLERANCE; /* default value */
430     sys->factors = NULL;
431     sys->inverse = NULL;
432     sys->factored = FALSE;
433     sys->rowdeps = FALSE;
434     sys->coldeps = FALSE;
435     sys->rng.low = sys->rng.high = -1;
436     sys->reg.row.low = sys->reg.row.high = -1;
437     sys->reg.col.low = sys->reg.col.high = -1;
438     sys->rank = -1;
439     sys->smallest_pivot = MAXDOUBLE;
440     sys->qrdata = NULL;
441     sys->ludata = NULL;
442     return(sys);
443     }
444    
445 johnpye 1309 linsolqr_system_t linsolqr_create_default(){
446     linsolqr_system_t L;
447     L = linsolqr_create();
448 johnpye 1317 L->fmethod = ranki_ba2;
449     L->rmethod = spk1;
450 johnpye 1309 return L;
451     }
452    
453 aw0a 1 void linsolqr_destroy(linsolqr_system_t sys)
454     {
455     if(CHECK_SYSTEM(sys)) {
456 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. Not destroyed.");
457 aw0a 1 return;
458     }
459     if( NOTNULL(sys->coef) ) {
460 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_NOTE,"linsolqr contains coef mtx which will NOT be destroyed\n");
461 aw0a 1 }
462     if( NOTNULL(sys->inverse) )
463     mtx_destroy(sys->inverse);
464     if( NOTNULL(sys->factors) )
465     mtx_destroy(sys->factors);
466     destroy_rhs_list(sys->rl);
467     destroy_qrdata(sys->qrdata);
468     destroy_ludata(sys->ludata);
469     sys->integrity = DESTROYED;
470     ascfree( (POINTER)sys );
471     }
472    
473     void linsolqr_set_matrix(linsolqr_system_t sys,mtx_matrix_t mtx)
474     {
475     if(CHECK_SYSTEM(sys)) {
476 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERROR,"Bad linsolqr_system_t found. coef mtx not set.");
477 aw0a 1 return;
478     }
479     sys->coef = mtx;
480     }
481    
482    
483     void linsolqr_set_region(linsolqr_system_t sys,mtx_region_t region)
484     {
485     if(CHECK_SYSTEM(sys)) {
486 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. coef mtx not set.");
487 aw0a 1 return;
488     }
489 johnpye 1309 CONSOLE_DEBUG("Region rows=[%d,%d], cols=[%d,%d]",region.row.low,region.row.high,region.col.low,region.col.high);
490 aw0a 1 sys->reg = region;
491     }
492    
493    
494     mtx_matrix_t linsolqr_get_matrix(linsolqr_system_t sys)
495     {
496     CHECK_SYSTEM(sys);
497     return( sys->coef );
498     }
499    
500     mtx_region_t *linsolqr_get_region(linsolqr_system_t sys)
501     {
502     return &(sys->reg);
503     }
504    
505     void linsolqr_add_rhs(linsolqr_system_t sys,
506     real64 *rhs,
507     boolean transpose)
508     {
509     struct rhs_list *rl;
510    
511     if(CHECK_SYSTEM(sys)) {
512 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. rhs not added.");
513 aw0a 1 return;
514     }
515     rl = find_rhs(sys->rl,rhs);
516     if( NOTNULL(rl) ) {
517     return;
518     } else {
519     if( NOTNULL(rhs) ) {
520     rl = (struct rhs_list *)ascmalloc( sizeof(struct rhs_list) );
521     rl->rhs = rhs;
522     rl->varvalue = NULL;
523     rl->solved = FALSE;
524     rl->transpose = transpose;
525     rl->next = sys->rl;
526     sys->rl = rl;
527     ++(sys->rlength);
528     if (sys->capacity>0) {
529     rl->varvalue =
530 johnpye 708 ASC_NEW_ARRAY(real64,sys->capacity);
531 aw0a 1 }
532     }
533     }
534     }
535    
536     void linsolqr_remove_rhs(linsolqr_system_t sys,real64 *rhs)
537     {
538     struct rhs_list **q;
539    
540     if(CHECK_SYSTEM(sys)) {
541 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. RHS not removed.");
542 aw0a 1 return;
543     }
544     for( q = &(sys->rl) ; NOTNULL(*q) ; q = &((*q)->next) )
545     if( (*q)->rhs == rhs )
546     break;
547     if( NOTNULL(*q) ) {
548     struct rhs_list *p;
549     p = *q;
550     *q = p->next;
551     if( NOTNULL(p->varvalue) )
552     ascfree( (POINTER)(p->varvalue) );
553     ascfree( (POINTER)p );
554     --(sys->rlength);
555     } else if( NOTNULL(rhs) ) {
556 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Rhs does not exist.");
557 aw0a 1 }
558     }
559    
560     int32 linsolqr_number_of_rhs(linsolqr_system_t sys)
561     {
562     CHECK_SYSTEM(sys);
563     return( sys->rlength );
564     }
565    
566     real64 *linsolqr_get_rhs(linsolqr_system_t sys,int n)
567     {
568     struct rhs_list *rl;
569     int count;
570    
571     CHECK_SYSTEM(sys);
572    
573     count = sys->rlength - 1 - n;
574     if( count < 0 ) return(NULL);
575     for( rl = sys->rl ; count > 0 && NOTNULL(rl) ; rl = rl->next )
576     --count;
577     return( ISNULL(rl) ? NULL : rl->rhs );
578     }
579    
580     void linsolqr_matrix_was_changed(linsolqr_system_t sys)
581     {
582     if(CHECK_SYSTEM(sys)) {
583 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. matrix change message ignored.");
584 aw0a 1 return;
585     }
586     sys->rowdeps = sys->coldeps = sys->factored = FALSE;
587     }
588    
589     void linsolqr_rhs_was_changed(linsolqr_system_t sys, real64 *rhs)
590     {
591     struct rhs_list *rl;
592    
593     if(CHECK_SYSTEM(sys)) {
594 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. rhs change ignored.");
595 aw0a 1 return;
596     }
597     rl = find_rhs(sys->rl,rhs);
598     if( NOTNULL(rl) ) {
599     rl->solved = FALSE;
600     } else if( NOTNULL(rhs) ) {
601 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Rhs does not exist.");
602 aw0a 1 }
603     }
604    
605     void linsolqr_set_pivot_zero(linsolqr_system_t sys,real64 pivot_zero)
606     {
607     if(CHECK_SYSTEM(sys)) {
608 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. set_pivot_zero ignored.");
609 aw0a 1 return;
610     }
611     if( pivot_zero < D_ZERO ) {
612 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Invalid pivot zero of %g",pivot_zero);
613 aw0a 1 } else {
614     sys->pivot_zero = pivot_zero;
615     linsolqr_matrix_was_changed(sys);
616     }
617     }
618    
619     real64 linsolqr_pivot_zero(linsolqr_system_t sys)
620     {
621     CHECK_SYSTEM(sys);
622     return( sys->pivot_zero );
623     }
624    
625     void linsolqr_set_pivot_tolerance(linsolqr_system_t sys, real64 ptol)
626     {
627     if(CHECK_SYSTEM(sys)) {
628 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. set_pivot_tol ignored.");
629 aw0a 1 return;
630     }
631     if( ptol < D_ZERO || ptol > D_ONE ) {
632 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Invalid pivot tolerance of %g",ptol);
633 aw0a 1 } else {
634     sys->ptol = ptol;
635     linsolqr_matrix_was_changed(sys);
636     }
637     }
638    
639     void linsolqr_set_condition_tolerance(linsolqr_system_t sys, real64 ctol)
640     {
641     if(CHECK_SYSTEM(sys)) {
642 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. set_condition_tolerance ignored.");
643 aw0a 1 return;
644     }
645     if( ctol < D_ZERO || ctol > D_ONE ) {
646 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Invalid condition tolerance of %g.",ctol);
647 aw0a 1 } else {
648     sys->ctol = ctol;
649     linsolqr_matrix_was_changed(sys);
650     }
651     }
652    
653     void linsolqr_set_drop_tolerance(linsolqr_system_t sys, real64 dtol)
654     {
655     if(CHECK_SYSTEM(sys)) {
656 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. set_drop_tolerance ignored.");
657 aw0a 1 return;
658     }
659     if( dtol < D_ZERO || dtol > D_ONE ) {
660 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Invalid drop tolerance of %g.",dtol);
661 aw0a 1 } else {
662     sys->dtol = dtol;
663     linsolqr_matrix_was_changed(sys);
664     }
665     }
666    
667     real64 linsolqr_pivot_tolerance(linsolqr_system_t sys)
668     {
669     CHECK_SYSTEM(sys);
670     return( sys->ptol );
671     }
672    
673     real64 linsolqr_condition_tolerance(linsolqr_system_t sys)
674     {
675     CHECK_SYSTEM(sys);
676     return( sys->ctol );
677     }
678    
679     real64 linsolqr_drop_tolerance(linsolqr_system_t sys)
680     {
681     CHECK_SYSTEM(sys);
682     return( sys->dtol );
683     }
684    
685     extern enum factor_class linsolqr_fclass(linsolqr_system_t sys)
686     {
687     CHECK_SYSTEM(sys);
688     return( sys->fclass );
689     }
690     extern enum factor_method linsolqr_fmethod(linsolqr_system_t sys)
691     {
692     CHECK_SYSTEM(sys);
693     return( sys->fmethod );
694     }
695     extern enum reorder_method linsolqr_rmethod(linsolqr_system_t sys)
696     {
697     CHECK_SYSTEM(sys);
698     return( sys->rmethod );
699     }
700    
701     int32 linsolqr_rank(linsolqr_system_t sys)
702     {
703     CHECK_SYSTEM(sys);
704     if( !sys->factored ) {
705 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"System not factored yet.");
706 aw0a 1 }
707     return(sys->rank);
708     }
709    
710     real64 linsolqr_smallest_pivot(linsolqr_system_t sys)
711     {
712     CHECK_SYSTEM(sys);
713     #if LINSOL_DEBUG
714     if( !sys->factored ) {
715 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"System not factored yet.");
716 aw0a 1 }
717     #endif
718     return(sys->smallest_pivot);
719     }
720    
721     /***************************************************************************\
722     Commonly used internal functions for sparse linear solvers based on mtx.
723 johnpye 1309 void ensure_capacity(sys)
724     void ensure_lu_capacity(sys)
725     void ensure_qr_capacity(sys)
726 aw0a 1 void forward_substitute(sys,rvec,transpose)
727     void backward_substitute(sys,rvec,transpose)
728     macro SQR(x)
729     int find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi)
730     \***************************************************************************/
731    
732     static real64 *raise_capacity(real64 *vec,
733     int32 oldcap,
734     int32 newcap)
735     /**
736     *** Raises the capacity of the array and returns a new array.
737     *** It is assumed that oldcap < newcap. vec is destroyed or
738     *** returned as appropriate.
739     *** If !NDEBUG, the vector expanded is also set to 0.
740     **/
741     {
742     real64 *newvec=NULL;
743 johnpye 130 #ifndef NDEBUG
744 aw0a 1 int i;
745 johnpye 130 #endif
746 aw0a 1 if (newcap < oldcap) {
747     #ifndef NDEBUG
748     for (i = 0; i < newcap; i++) {
749     newvec[i] = 0.0;
750     }
751     #endif
752     return vec;
753     }
754     if (NOTNULL(vec)) {
755     /* don't call realloc on null with newcap 0 or it frees */
756     newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64)));
757     } else {
758     newvec=(newcap > 0 ?
759 johnpye 708 ASC_NEW_ARRAY(real64,newcap ) : NULL);
760 aw0a 1 }
761     #ifndef NDEBUG
762     for (i = 0; i < newcap; i++) {
763     newvec[i] = 0.0;
764     }
765     #endif
766     return newvec;
767     }
768    
769     static int32 *raise_qr_int_capacity(int32 *vec,
770     int32 oldcap,
771     int32 newcap)
772     /**
773     *** Raises the capacity of the index array and returns a new array.
774     *** It is assumed that oldcap < newcap. vec is destroyed or
775     *** returned as appropriate. vec returned is zeroed.
776     *** calling with newcap=0 does not force deallocation.
777     **/
778     {
779     int32 *newvec=NULL;
780     if (newcap < oldcap)
781     return vec;
782     if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
783     newvec=(int32 *)ascrealloc(vec,(newcap * sizeof(int32)));
784     mtx_zero_int32(vec,newcap);
785     } else {
786 johnpye 709 newvec= (newcap > 0 ? ASC_NEW_ARRAY_CLEAR(int32,newcap) : NULL);
787 aw0a 1 }
788     return newvec;
789     }
790    
791     static real64 *raise_qr_real_capacity(real64 *vec,
792     int32 oldcap,
793     int32 newcap)
794     /**
795     *** Raises the capacity of the real array and returns a new array.
796     *** It is assumed that oldcap < newcap. vec is destroyed or
797     *** returned as appropriate. vec returned is zeroed.
798     *** calling with newcap=0 does not force deallocation.
799     **/
800     {
801     real64 *newvec=NULL;
802     if (newcap < oldcap)
803     return vec;
804     if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
805     newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64)));
806     mtx_zero_real64(vec,newcap);
807     } else {
808 johnpye 709 newvec= (newcap > 0 ? ASC_NEW_ARRAY_CLEAR(real64,newcap) : NULL);
809 aw0a 1 }
810     return newvec;
811     }
812    
813     static struct qr_fill_t *raise_qr_fill_capacity(struct qr_fill_t *vec,
814     int32 oldcap,
815     int32 newcap)
816     /**
817     *** Raises the capacity of the fill array and returns a new array.
818     *** It is assumed that oldcap < newcap. vec is destroyed or
819     *** returned as appropriate. vec returned is zeroed.
820     *** calling with newcap=0 does not force deallocation.
821     **/
822     {
823     struct qr_fill_t *newvec=NULL;
824     if (newcap < oldcap)
825     return vec;
826     if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
827     newvec=(struct qr_fill_t *)
828     ascrealloc(vec,(newcap * sizeof(struct qr_fill_t)));
829     mtx_zero_char((char *)vec,newcap*sizeof(struct qr_fill_t));
830 johnpye 709 }else{
831     newvec= (newcap > 0 ? ASC_NEW_ARRAY_CLEAR(struct qr_fill_t, newcap) : NULL);
832 aw0a 1 }
833     return newvec;
834     }
835    
836 johnpye 1309 static void ensure_capacity(linsolqr_system_t sys)
837 aw0a 1 /**
838 johnpye 1309 *** ensures that the capacity of all of the solution vectors
839 aw0a 1 *** for each rhs is large enough.
840     *** The above implies a malloc if varvalue is null.
841     *** Assumes varvalue are at sys->capacity already.
842     **/
843     {
844     int32 req_cap;
845     req_cap = mtx_capacity(sys->coef);
846    
847     if( req_cap > sys->capacity ) {
848     struct rhs_list *rl;
849    
850     for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
851     rl->varvalue = raise_capacity(rl->varvalue,sys->capacity,req_cap);
852     sys->capacity = req_cap;
853     }
854     }
855    
856 johnpye 1309 static void ensure_lu_capacity(linsolqr_system_t sys)
857 aw0a 1 /**
858 johnpye 1309 *** ensures that the capacity of all of the ludata vectors.
859 aw0a 1 *** The above implies a malloc if vector is null.
860     *** If not null, implies an extension if needed.
861     **/
862     {
863     int32 req_cap;
864     if (!sys || !(sys->coef) || !(sys->ludata)) {
865 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"called with NULL pointer");
866 aw0a 1 return;
867     }
868     req_cap = mtx_capacity(sys->coef);
869    
870     if( req_cap > sys->ludata->cap ) {
871     sys->ludata->pivlist =
872     raise_capacity(sys->ludata->pivlist,sys->ludata->cap,req_cap);
873     sys->ludata->tmp =
874     raise_capacity(sys->ludata->tmp,sys->ludata->cap,req_cap);
875     sys->ludata->cap = req_cap;
876     }
877     }
878    
879 johnpye 1309 static void ensure_qr_capacity(linsolqr_system_t sys)
880 aw0a 1 /**
881 johnpye 1309 *** ensures that the capacity of all of the qrdata vectors
882 aw0a 1 *** is large enough.
883     *** The above implies a calloc if vector is null.
884     *** If not null, implies an extension and zeroing of the vector.
885     *** Also zeroes the simple elements of the qrdata.
886     **/
887     {
888     int32 req_cap;
889     if (!sys || !(sys->coef) || !(sys->qrdata)) {
890 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"called with NULL pointer");
891 aw0a 1 return;
892     }
893     req_cap = mtx_capacity(sys->coef);
894    
895     if( req_cap > sys->qrdata->cap ) {
896     sys->qrdata->alpha =
897     raise_qr_real_capacity(sys->qrdata->alpha,sys->qrdata->cap,req_cap);
898     sys->qrdata->sigma =
899     raise_qr_real_capacity(sys->qrdata->sigma,sys->qrdata->cap,req_cap);
900     sys->qrdata->tau =
901     raise_qr_real_capacity(sys->qrdata->tau,sys->qrdata->cap,req_cap);
902     sys->qrdata->hhcol =
903     raise_qr_real_capacity(sys->qrdata->hhcol,sys->qrdata->cap,req_cap);
904     sys->qrdata->hhrow =
905     raise_qr_real_capacity(sys->qrdata->hhrow,sys->qrdata->cap,req_cap);
906    
907     sys->qrdata->sp.data =
908     raise_qr_real_capacity(sys->qrdata->sp.data,sys->qrdata->cap,req_cap);
909     sys->qrdata->sp.idata =
910     raise_qr_int_capacity(sys->qrdata->sp.idata,sys->qrdata->cap,req_cap);
911    
912     sys->qrdata->fill =
913     raise_qr_fill_capacity(sys->qrdata->fill,sys->qrdata->cap,req_cap);
914    
915     sys->qrdata->cap = req_cap;
916     sys->qrdata->sp.cap = req_cap;
917     sys->qrdata->sp.len = 0;
918     }
919     sys->qrdata->nu=D_ZERO;
920     sys->qrdata->anu=D_ZERO;
921     sys->qrdata->asubnu=D_ZERO;
922     }
923    
924     static void forward_substitute(linsolqr_system_t sys,
925     real64 *arr,
926     boolean transpose)
927     /**
928     *** Forward substitute. It is assumed that the L (or U) part of
929     *** sys->factors is computed. This function converts c to x in place. The
930     *** values are stored in arr indexed by original row number (or original
931     *** column number).
932     ***
933     *** transpose = FALSE: transpose = TRUE:
934     *** T
935     *** L x = c U x = c
936     ***
937     *** The following formula holds:
938     *** 0<=k<r ==> x(k) = [c(k) - L(k,(0..k-1)) dot x(0..k-1)] / L(k,k)
939     *** or
940     *** 0<=k<r ==> x(k) = [c(k) - U((0..k-1),k) dot x(0..k-1)] / U(k,k)
941     ***
942     *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k]
943     *** and in matrix.
944     **/
945     {
946     mtx_range_t dot_rng;
947     mtx_coord_t nz;
948     real64 sum, *pivlist;
949     mtx_matrix_t mtx;
950     int32 dotlim;
951     boolean nonzero_found=FALSE;
952    
953     mtx=sys->factors;
954     pivlist=sys->ludata->pivlist;
955     dot_rng.low = sys->rng.low;
956     dotlim=dot_rng.low+sys->rank;
957     if (transpose) { /* arr is indexed by original column number */
958     for( nz.col=dot_rng.low; nz.col < dotlim; ++(nz.col) ) {
959     register int32 org_col;
960    
961     dot_rng.high = nz.col - 1;
962     org_col = mtx_col_to_org(mtx,nz.col);
963     if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
964     if (nonzero_found) {
965     sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE);
966     /* arr[org_col] = (arr[org_col] - sum) / D_ONE */;
967     arr[org_col] -=sum;
968     }
969     }
970    
971     } else { /* arr is indexed by original row number */
972     for( nz.row=dot_rng.low; nz.row < dotlim; ++(nz.row) ) {
973     register int32 org_row;
974    
975     dot_rng.high = nz.row - 1;
976     org_row = mtx_row_to_org(mtx,nz.row);
977     if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
978     if (nonzero_found) {
979     sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE);
980     /*
981     nz.col = nz.row;
982     arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz);
983     */
984     arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row];
985     }
986     }
987     }
988     }
989    
990     static void backward_substitute(linsolqr_system_t sys,
991     real64 *arr,
992     boolean transpose)
993     /**
994     *** Backward substitute. It is assumed that the U (or L) part of
995     *** sys->factors is computed. This function converts rhs to c in place. The
996     *** values are stored in arr indexed by original row number (or original
997     *** column number).
998     ***
999     *** transpose = FALSE: transpose = TRUE:
1000     *** T
1001     *** U c = rhs L c = rhs
1002     ***
1003     *** The following formula holds:
1004     *** transpose=FALSE: (the usual for J.dx=-f where rhs is -f)
1005     *** 0<=k<r ==> c(k) = [rhs(k) - U(k,(k+1..r-1)) dot c(k+1..r-1)] / U(k,k)
1006     *** working up from the bottom.
1007     *** or
1008     *** transpose=TRUE:
1009     *** 0<=k<r ==> c(k) = [rhs(k) - L((k+1..r-1),k) dot c(k+1..r-1)] / L(k,k)
1010     *** working right to left.
1011     ***
1012     *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k]
1013     *** and in matrix.
1014     **/
1015     {
1016     mtx_range_t dot_rng;
1017     mtx_coord_t nz;
1018     real64 sum, *pivlist;
1019     mtx_matrix_t mtx;
1020     int32 dotlim;
1021     boolean nonzero_found=FALSE; /* once TRUE, substitution must be done
1022     over remaining rows/cols */
1023    
1024     dot_rng.high = sys->rng.low + sys->rank - 1;
1025     dotlim=sys->rng.low;
1026     mtx=sys->factors;
1027     pivlist=sys->ludata->pivlist;
1028     if (transpose) { /* arr is indexed by original column number */
1029     for( nz.col = dot_rng.high ; nz.col >= dotlim ; --(nz.col) ) {
1030     register int32 org_col;
1031    
1032     dot_rng.low = nz.col + 1;
1033    
1034     org_col = mtx_col_to_org(mtx,nz.col);
1035     if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
1036     if (nonzero_found) {
1037     sum= mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE);
1038     /*
1039     reminders:
1040     nz.row = nz.col;
1041     arr[org_col] = (arr[org_col] - sum) / mtx_value(mtx,&nz);
1042     */
1043     arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col];
1044     }
1045     }
1046     } else { /* arr is indexed by original row number */
1047     /* we are working from the bottom up */
1048     for( nz.row = dot_rng.high ; nz.row >= dotlim ; --(nz.row) ) {
1049     register int32 org_row;
1050    
1051     dot_rng.low = nz.row + 1;
1052     org_row = mtx_row_to_org(mtx,nz.row);
1053    
1054     if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
1055     if (nonzero_found) {
1056     sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE);
1057     /*
1058     reminders:
1059     nz.row = nz.col;
1060     arr[org_row] = (arr[org_row] - sum) / D_ONE;
1061     */
1062     arr[org_row] -= sum;
1063     }
1064     }
1065     }
1066     }
1067    
1068     #ifdef SQR
1069     #undef SQR
1070     #endif
1071     #define SQR(x) ((x)*(x))
1072     /*
1073     * int32 find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi);
1074     * real64 *vec,*rvec,tol;
1075     * int32 len, *ivec, *maxi;
1076     *
1077     * Search array vec of positive numbers for the first entry, k, which passes
1078     * (vec[k] >= tol*vecmax) where vecmax is the largest value in vec.
1079     * vec is an array len long. rvec, ivec are (worst case) len long.
1080     * *maxi will be set to the index of the first occurence of the largest
1081     * number in vec which is >= eps.
1082     * 0.0 <= tol <= 1.
1083     * If tol <=0, no search is done (*maxi = 0, k = 0).
1084     * Eps is an absolute number which vec values must be >= to before
1085     * being eligible for the tol test.
1086     *
1087     * Best case, highly nonlinear feature:
1088     * if on entry *maxi = len, we will do a simplified search for the
1089     * first k which satisfies tol and eps tests, based on the value
1090     * stored at vec[len-1].
1091     *
1092     * We could allocate and deallocate rvec/ivec since they are
1093     * temporaries, but it is more efficient for caller to manage that.
1094     * If tol>=1.0, rvec and ivec are ignored and may be NULL.
1095     * NOTE: we are going on value vec[i], not fabs(vec[i]).
1096     * Returns k.
1097     * Remember: GIGO.
1098     * Failure modes- if all numbers are <= 0.0, k and *maxi will be returned 0.
1099     * Failure modes- if all numbers are < eps, k and *maxi will be returned 0.
1100     *
1101     * find_pivot_index, should we implement, would search an int vector.
1102     *
1103     * Hint: if you know you have not increased any values in vec and have not
1104     * changed the value at vec[maxi] before a subsequent search, then call
1105     * with len = previous maxi+1 to reduce the search time.
1106     */
1107     static int32 find_pivot_number(const real64 *vec,
1108     const int32 len,
1109     const real64 tol,
1110     const real64 eps,
1111     int32 * const ivec,
1112     real64 * const rvec,
1113     int32 * const maxi)
1114     {
1115     int32 j,k;
1116     if (tol <= D_ZERO ) {
1117     *maxi = 0;
1118     return 0;
1119     }
1120     if (tol >= D_ONE) {
1121     register real64 biggest = MAX(eps,D_ZERO);
1122     /* get the biggest, period */
1123     k = 0;
1124     for (j=0; j < len; j++) {
1125     if (vec[j] > biggest) {
1126     biggest = vec[j];
1127     k = j;
1128     }
1129     }
1130     *maxi = k;
1131     } else {
1132     int32 bigi;
1133     bigi = 0;
1134     rvec[0] = eps;
1135     ivec[0] = 0;
1136     if (*maxi==len) {
1137     /* cheap search against eps and tol, no list. */
1138     register real64 thold;
1139     thold = tol * vec[len-1];
1140     if (thold >= eps) {
1141     for (k = 0; k < len && vec[k] < thold; k++);
1142     } else {
1143     for (k = 0; k < len && vec[k] < eps; k++);
1144     }
1145     /* adjust maxi to still point at last location, as indicated by len */
1146     (*maxi)--;
1147     } else {
1148     real64 rlast;
1149     rlast = eps;
1150     /* create short list */
1151     for (j=0; j < len; j++) {
1152     while (j < len && vec[j] <= rlast) j++; /* skip to next max */
1153     if (j < len) {
1154     ivec[bigi] = j;
1155     rvec[bigi] = vec[j];
1156     bigi++; /* bigi ends up being len of rvec or, if vec all<= eps, 0 */
1157     rlast = rvec[bigi-1];
1158     }
1159     }
1160     if (bigi == 0) {
1161     *maxi = k = 0;
1162     } else {
1163     register real64 thold;
1164     /* Note: if bigi is enormous, we should do a binary search,
1165     not linear. */
1166     *maxi = ivec[bigi-1];
1167     thold = tol * rvec[bigi-1];
1168     /* get pivot from shortlist, searching backward */
1169     if (thold >= eps) {
1170     for (k = bigi-1; k >= 0 && rvec[k] >= thold; k--);
1171     } else {
1172     for (k = bigi-1; k >= 0 && rvec[k] >= eps; k--);
1173     }
1174     /* translate pivot to vec index */
1175     k = ivec[k+1];
1176     }
1177     }
1178     }
1179     return k;
1180     }
1181    
1182     static void calc_dependent_rows_ranki1(linsolqr_system_t sys)
1183     {
1184     mtx_coord_t nz;
1185     real64 value;
1186     mtx_range_t colrange;
1187     mtx_range_t rowrange;
1188     real64 *lc;
1189     mtx_matrix_t mtx;
1190    
1191     sys->rowdeps = TRUE;
1192     if( ( (sys->reg.row.low == sys->rng.low) &&
1193     ( sys->reg.row.high == sys->rng.low+sys->rank-1 )
1194     ) || sys->rank==0 )
1195     return;
1196    
1197     lc = sys->ludata->tmp;
1198     colrange.low = sys->rng.low;
1199     colrange.high = colrange.low + sys->rank - 1;
1200     rowrange.low = sys->rng.high;
1201     rowrange.high = sys->rng.low+sys->rank;
1202     mtx=sys->factors;
1203    
1204     nz.row = sys->reg.row.low;
1205     for( ; nz.row <= sys->reg.row.high; nz.row++ ) {
1206     if( nz.row == sys->rng.low ) {
1207     nz.row = rowrange.high-1;
1208     continue;
1209     }
1210     mtx_zero_real64(lc,(sys->capacity));
1211     mtx_org_row_vec(mtx,nz.row,lc,&colrange);
1212     if( nz.row < rowrange.high || nz.row > rowrange.low )
1213     backward_substitute(sys,lc,TRUE);
1214     forward_substitute(sys,lc,TRUE);
1215     mtx_clear_row(mtx,nz.row,&colrange);
1216     for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) {
1217     value = lc[mtx_col_to_org(mtx,nz.col)];
1218     if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
1219     }
1220     }
1221     }
1222    
1223    
1224     static void calc_dependent_cols_ranki1(linsolqr_system_t sys)
1225     {
1226     mtx_coord_t nz;
1227     real64 value;
1228     mtx_range_t rowrange;
1229     mtx_range_t colrange;
1230     real64 *lc;
1231     mtx_matrix_t mtx;
1232    
1233     sys->coldeps = TRUE;
1234     if( ( (sys->reg.col.low == sys->rng.low) &&
1235     ( sys->reg.col.high == sys->rng.low+sys->rank-1 )
1236     ) || sys->rank==0 )
1237     return;
1238    
1239     lc = sys->ludata->tmp;
1240     rowrange.low = sys->rng.low;
1241     rowrange.high = rowrange.low + sys->rank - 1;
1242     colrange.high = sys->rng.low+sys->rank;
1243     colrange.low = sys->rng.high;
1244     mtx=sys->factors;
1245    
1246     nz.col = sys->reg.col.low;
1247     for( ; nz.col <= sys->reg.col.high; nz.col++ ) {
1248     if( nz.col == sys->rng.low ) {
1249     nz.col = colrange.high-1;
1250     continue;
1251     }
1252     mtx_zero_real64(lc,sys->capacity);
1253     mtx_org_col_vec(mtx,nz.col,lc,&rowrange);
1254     if( nz.col < colrange.high || nz.col > colrange.low )
1255     backward_substitute(sys,lc,FALSE);
1256     forward_substitute(sys,lc,FALSE);
1257     mtx_clear_col(mtx,nz.col,&rowrange);
1258     for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) {
1259     value = lc[mtx_row_to_org(mtx,nz.row)];
1260     if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
1261     }
1262     }
1263     }
1264    
1265     /**
1266     *** Given a matrix and a diagonal range, sets the diagonal elements to 0
1267     *** but does not delete them in the range low to high inclusive.
1268     *** worst cost= k*(nonzeros in rows of range treated).
1269     *** likely cost= k*(nonzeros in column pivoted rows of range treated)
1270     *** since you can smartly put in the diagonal last and find
1271     *** it first on many occasions.
1272     *** This is a good bit cheaper than deleting the elements unless one
1273     *** expects thousands of solves.
1274     **/
1275     static void zero_diagonal_elements(mtx_matrix_t mtx,
1276     int32 low, int32 high)
1277     {
1278     mtx_coord_t nz;
1279     for (nz.row = low; nz.row <= high; nz.row++) {
1280     nz.col = nz.row;
1281     mtx_set_value(mtx,&nz,0.0);
1282     }
1283     }
1284    
1285     static void zero_unpivoted_vars(linsolqr_system_t sys,
1286     real64 *varvalues,
1287     boolean transpose)
1288     /**
1289     *** Sets the values of unpivoted variables to zero.
1290     **/
1291     {
1292     int32 ndx,order;
1293     mtx_matrix_t mtx;
1294    
1295     mtx = sys->factors;
1296     order = mtx_order(mtx);
1297     for( ndx = 0 ; ndx < sys->rng.low ; ++ndx )
1298     if (transpose)
1299     varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO;
1300     else
1301     varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO;
1302    
1303     for( ndx = sys->rng.low + sys->rank ; ndx < order ; ++ndx )
1304     if (transpose)
1305     varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO;
1306     else
1307     varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO;
1308     }
1309    
1310     #if LINSOL_DEBUG
1311     static void debug_out_factors(FILE *fp,linsolqr_system_t sys)
1312     /**
1313     *** Outputs permutation and values of the nonzero elements in the
1314     *** factor matrix square region given by sys->rng.
1315     **/
1316     {
1317     mtx_region_t reg;
1318     reg.row.low=reg.col.low=sys->rng.low;
1319     reg.row.high=reg.col.high=sys->rng.high;
1320     mtx_write_region(fp,sys->factors,&reg);
1321     }
1322     #endif /* LINSOL_DEBUG */
1323    
1324     /***************************************************************************\
1325     Reordering functions for SPK1, and possibly for other schemes to be
1326     implemented later.
1327     The stuff here is almost, but not quite, black magic. Don't tinker with it.
1328     \***************************************************************************/
1329    
1330     /*********************************
1331     begin of spk1 stuff
1332     *********************************/
1333     struct reorder_list { /* List of rows/columns and their counts. */
1334     int32 ndx;
1335     int32 count;
1336     struct reorder_list *next;
1337     };
1338    
1339     struct reorder_vars {
1340     mtx_matrix_t mtx;
1341     mtx_region_t reg; /* Current active region */
1342     int32 colhigh; /* Original value of reg.col.high */
1343     struct reorder_list *tlist; /* Array of (enough) list elements */
1344     int32 *rowcount; /* rowcount[reg.row.low .. reg.row.high] */
1345     };
1346    
1347     static void adjust_row_count(struct reorder_vars *vars,int32 removed_col)
1348     /**
1349     *** Adjusts the row counts to account for the (to be) removed column.
1350     **/
1351     {
1352     mtx_coord_t nz;
1353     real64 value;
1354     nz.col = removed_col;
1355     nz.row = mtx_FIRST;
1356     while( value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row)),
1357     nz.row != mtx_LAST )
1358     --(vars->rowcount[nz.row]);
1359     }
1360    
1361     static void assign_row_and_col(struct reorder_vars *vars,
1362     int32 row,
1363     int32 col)
1364     /**
1365     *** Assigns the given row to the given column, moving the row and column
1366     *** to the beginning of the active region and removing them (readjusting
1367     *** the region). The row counts are NOT adjusted. If col == mtx_NONE,
1368     *** then no column is assigned and the row is moved to the end of the
1369     *** active block instead. Otherwise, it is assumed that the column
1370     *** is active.
1371     **/
1372     {
1373     if( col == mtx_NONE ) {
1374     mtx_swap_rows(vars->mtx,row,vars->reg.row.high);
1375     vars->rowcount[row] = vars->rowcount[vars->reg.row.high];
1376     --(vars->reg.row.high);
1377     } else {
1378     mtx_swap_rows(vars->mtx,row,vars->reg.row.low);
1379     vars->rowcount[row] = vars->rowcount[vars->reg.row.low];
1380     mtx_swap_cols(vars->mtx,col,vars->reg.col.low);
1381     ++(vars->reg.row.low);
1382     ++(vars->reg.col.low);
1383     }
1384     }
1385    
1386     static void push_column_on_stack(struct reorder_vars *vars,int32 col)
1387     /**
1388     *** Pushes the given column onto the stack. It is assumed that the
1389     *** column is active. Row counts are adjusted.
1390     **/
1391     {
1392     adjust_row_count(vars,col);
1393     mtx_swap_cols(vars->mtx,col,vars->reg.col.high);
1394     --(vars->reg.col.high);
1395     }
1396    
1397     static int32 pop_column_from_stack(struct reorder_vars *vars)
1398     /**
1399     *** Pops the column on the "top" of the stack off of the stack and
1400     *** returns the column index, where it now lies in the active region.
1401     *** If the stack is empty, mtx_NONE is returned. Row counts are NOT
1402     *** adjusted (this together with a subsequent assignment of this column
1403     *** ==> no row count adjustment necessary).
1404     **/
1405     {
1406     if( vars->reg.col.high < vars->colhigh )
1407     return(++(vars->reg.col.high));
1408     else
1409     return( mtx_NONE );
1410     }
1411    
1412     static void assign_null_rows(struct reorder_vars *vars)
1413     /**
1414     *** Assigns empty rows, moving them to the assigned region. It is
1415     *** assumed that row counts are correct. Columns are assigned off the
1416     *** stack.
1417     **/
1418     {
1419     int32 row;
1420    
1421     for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1422     if( vars->rowcount[row] == 0 )
1423     assign_row_and_col(vars , row , pop_column_from_stack(vars));
1424     }
1425    
1426     static void forward_triangularize(struct reorder_vars *vars)
1427     /**
1428     *** Forward triangularizes the region, assigning singleton rows with their
1429     *** one and only incident column until there are no more. The row counts
1430     *** must be correct, and they are updated.
1431     **/
1432     {
1433     boolean change;
1434     mtx_coord_t nz;
1435     real64 value;
1436    
1437     do {
1438     change = FALSE;
1439     for( nz.row = vars->reg.row.low ;
1440     nz.row <= vars->reg.row.high && vars->rowcount[nz.row] != 1;
1441     ++nz.row ) ;
1442     if( nz.row <= vars->reg.row.high ) {
1443     /* found singleton row */
1444     nz.col = mtx_FIRST; /* this is somehow coming back with nz.col -1 */
1445     value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col));
1446     adjust_row_count(vars,nz.col);
1447     assign_row_and_col(vars,nz.row,nz.col);
1448     change = TRUE;
1449     }
1450     } while( change );
1451     }
1452    
1453     static int32 select_row(struct reorder_vars *vars)
1454     /**
1455     *** Selects a row and returns its index. It is assumed that there is a
1456     *** row. Row counts must be correct. vars->tlist will be used.
1457     **/
1458     {
1459     int32 min_row_count;
1460     int32 max_col_count;
1461     int32 row;
1462     int32 i, nties=-2; /* # elements currently defined in vars->tlist */
1463     int32 sum;
1464     mtx_coord_t nz;
1465     real64 value;
1466     mtx_matrix_t mtx;
1467     mtx_range_t *colrng, *rowrng;
1468    
1469     /* Set to something > any possible value */
1470     min_row_count = vars->reg.col.high-vars->reg.col.low+2;
1471     for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1472     if( vars->rowcount[row] <= min_row_count ) {
1473     if( vars->rowcount[row] < min_row_count ) {
1474     min_row_count = vars->rowcount[row];
1475     nties = 0;
1476     }
1477     vars->tlist[nties++].ndx = row;
1478     }
1479     /**
1480     *** vars->tlist[0..nties-1] is a list of row numbers which tie for
1481     *** minimum row count.
1482     **/
1483    
1484     max_col_count = -1; /* < any possible value */
1485     i = 0;
1486     mtx = vars->mtx;
1487     colrng=&(vars->reg.col);
1488     rowrng=&(vars->reg.row);
1489     while( i < nties ) {
1490    
1491     sum = 0;
1492     nz.row = vars->tlist[i].ndx;
1493     nz.col = mtx_FIRST;
1494     while( value = mtx_next_in_row(mtx,&nz,colrng),
1495     nz.col != mtx_LAST )
1496     sum += mtx_nonzeros_in_col(mtx,nz.col,rowrng);
1497     if( sum > max_col_count ) {
1498     max_col_count = sum;
1499     row = nz.row;
1500     }
1501     i++;
1502     }
1503     /**
1504     *** Now row contains the row with the minimum row count, which has the
1505     *** greatest total column count of incident columns among all rows with
1506     *** the same (minimum) row count. Select it.
1507     **/
1508     return(row);
1509     }
1510    
1511     static void spk1_reorder(struct reorder_vars *vars)
1512     /**
1513     *** Reorders the assigned matrix vars->mtx within the specified bounding
1514     *** block region vars->reg. The region is split into 6 subregions during
1515     *** reordering: the rows are divided in two, and the columns divided in
1516     *** three. Initially everything is in the active subregion. Ultimately,
1517     *** everything will be assigned.
1518     ***
1519     *** <-- assigned -->|<-- active-->|<-- on stack -->|
1520     *** ----+----------------+-------------+----------------+
1521     *** a | | | |
1522     *** s | | | |
1523     *** s | | | |
1524     *** i | (SQUARE) | | |
1525     *** g | | | |
1526     *** n | | | |
1527     *** e | | | |
1528     *** d | | | |
1529     *** ----+----------------+-------------+----------------+
1530     *** a | | | |
1531     *** c | | ACTIVE | |
1532     *** t | | REGION | |
1533     *** i | | | |
1534     *** v | | | |
1535     *** e | | | |
1536     *** ----+----------------+-------------+----------------+
1537     ***
1538     *** The algorithm is roughly as follows:
1539     *** (1) select a row (by some criterion).
1540     *** (2) push columns incident on that row onto the stack in decreasing
1541     *** order of their length.
1542     *** (3) pop first column off the stack and assign it to the selected
1543     *** row.
1544     *** (4) forward-triangularize (assign singleton rows with their one
1545     *** and only incident column, until no longer possible).
1546     ***
1547     *** (1)-(4) should be repeated until the active subregion becomes empty.
1548     ***
1549     *** Everything above was written as though the entire matrix is
1550     *** involved. In reality, only the relevant square region is involved.
1551     **/
1552     {
1553     int32 row, size;
1554     int32 *rowcount_array_origin;
1555     mtx_matrix_t mtx;
1556    
1557     size = MAX(vars->reg.row.high,vars->reg.col.high) + 1;
1558     vars->tlist = size > 0 ? (struct reorder_list *)
1559     ascmalloc( size*sizeof(struct reorder_list) ) : NULL;
1560     vars->rowcount = rowcount_array_origin = size > 0 ? (int32 *)
1561     ascmalloc( size*sizeof(int32) ) : NULL;
1562     mtx = vars->mtx;
1563    
1564     vars->colhigh = vars->reg.col.high;
1565     /* Establish row counts */
1566     for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1567     vars->rowcount[row] =
1568     mtx_nonzeros_in_row(mtx,row,&(vars->reg.col));
1569    
1570     while(TRUE) {
1571     struct reorder_list *head;
1572     int32 nelts; /* # elements "allocated" from vars->tlist */
1573     mtx_coord_t nz;
1574     real64 value;
1575    
1576     forward_triangularize(vars);
1577     assign_null_rows(vars);
1578     if( vars->reg.row.low>vars->reg.row.high ||
1579     vars->reg.col.low>vars->reg.col.high ) {
1580     /* Active region is now empty, done */
1581     if( NOTNULL(vars->tlist) )
1582     ascfree( vars->tlist );
1583     if( NOTNULL(rowcount_array_origin) )
1584     ascfree( rowcount_array_origin );
1585     return;
1586     }
1587    
1588     head = NULL;
1589     nelts = 0;
1590     nz.row = select_row(vars);
1591     nz.col = mtx_FIRST;
1592     while( value = mtx_next_in_row(mtx,&nz,&(vars->reg.col)),
1593     nz.col != mtx_LAST ) {
1594     struct reorder_list **q,*p;
1595    
1596     p = &(vars->tlist[nelts++]);
1597     p->ndx = mtx_col_to_org(mtx,nz.col);
1598     p->count = mtx_nonzeros_in_col(mtx,nz.col,&(vars->reg.row));
1599     for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) );
1600     p->next = *q;
1601     *q = p;
1602     }
1603     /**
1604     *** We now have a list of columns which intersect the selected row.
1605     *** The list is in order of decreasing column count.
1606     **/
1607    
1608     /* Push incident columns on stack */
1609     for( ; NOTNULL(head) ; head = head->next )
1610     push_column_on_stack(vars,mtx_org_to_col(mtx,head->ndx));
1611    
1612     /* Pop column off stack and assign to selected row */
1613     assign_row_and_col(vars , nz.row , pop_column_from_stack(vars));
1614     }
1615     /* Not reached. */
1616     }
1617    
1618     /*********************************
1619     end of spk1 stuff
1620     *********************************/
1621     /*********************************
1622     begin of tspk1 stuff
1623     *********************************/
1624     struct creorder_list { /* List of columns/rows and their counts. */
1625     int32 ndx;
1626     int32 count;
1627     struct creorder_list *next;
1628     };
1629    
1630     struct creorder_vars {
1631     mtx_matrix_t mtx;
1632     mtx_region_t reg; /* Current active region */
1633     int32 rowhigh; /* Original value of reg.row.high */
1634     struct creorder_list *tlist; /* Array of (enough) list elements */
1635     int32 *colcount; /* colcount[reg.col.low .. reg.col.high] */
1636     };
1637    
1638     static void adjust_col_count(struct creorder_vars *vars,int32 removed_row)
1639     /**
1640     *** Adjusts the column counts to account for the (to be) removed row.
1641     **/
1642     {
1643     mtx_coord_t nz;
1644     real64 value;
1645     nz.row = removed_row;
1646     nz.col = mtx_FIRST;
1647     while( value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col)),
1648     nz.col != mtx_LAST )
1649     --(vars->colcount[nz.col]);
1650     }
1651    
1652     static void assign_col_and_row(struct creorder_vars *vars,
1653     int32 col,
1654     int32 row)
1655     /**
1656     *** Assigns the given row to the given column, moving the row and column
1657     *** to the beginning of the active region and removing them (readjusting
1658     *** the region). The col counts are NOT adjusted. If col == mtx_NONE,
1659     *** then no column is assigned and the col is moved to the end of the
1660     *** active block instead. Otherwise, it is assumed that the row
1661     *** is active.
1662     **/
1663     {
1664     if( row == mtx_NONE ) {
1665     mtx_swap_cols(vars->mtx,col,vars->reg.col.high);
1666     vars->colcount[col] = vars->colcount[vars->reg.col.high];
1667     --(vars->reg.col.high);
1668     } else {
1669     mtx_swap_cols(vars->mtx,col,vars->reg.col.low);
1670     vars->colcount[col] = vars->colcount[vars->reg.col.low];
1671     mtx_swap_rows(vars->mtx,row,vars->reg.row.low);
1672     ++(vars->reg.col.low);
1673     ++(vars->reg.row.low);
1674     }
1675     }
1676    
1677     static void push_row_on_stack(struct creorder_vars *vars,int32 row)
1678     /**
1679     *** Pushes the given row onto the stack. It is assumed that the
1680     *** row is active. Col counts are adjusted.
1681     **/
1682     {
1683     adjust_col_count(vars,row);
1684     mtx_swap_rows(vars->mtx,row,vars->reg.row.high);
1685     --(vars->reg.row.high);
1686     }
1687    
1688     static int32 pop_row_from_stack(struct creorder_vars *vars)
1689     /**
1690     *** Pops the row on the "top" of the stack off of the stack and
1691     *** returns the row index, where it now lies in the active region.
1692     *** If the stack is empty, mtx_NONE is returned. Col counts are NOT
1693     *** adjusted (this together with a subsequent assignment of this row
1694     *** ==> no col count adjustment necessary).
1695     **/
1696     {
1697     if( vars->reg.row.high < vars->rowhigh )
1698     return(++(vars->reg.row.high));
1699     else
1700     return( mtx_NONE );
1701     }
1702    
1703     static void assign_null_cols(struct creorder_vars *vars)
1704     /**
1705     *** Assigns empty cols, moving them to the assigned region. It is
1706     *** assumed that col counts are correct. Rows are assigned off the
1707     *** stack.
1708     **/
1709     {
1710     int32 col;
1711    
1712     for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1713     if( vars->colcount[col] == 0 )
1714     assign_col_and_row(vars , col , pop_row_from_stack(vars));
1715     }
1716    
1717     static void cforward_triangularize(struct creorder_vars *vars)
1718     /**
1719     *** Forward triangularizes the region, assigning singleton columns with their
1720     *** one and only incident row until there are no more. The column counts
1721     *** must be correct, and they are updated.
1722     **/
1723     {
1724     boolean change;
1725    
1726     do {
1727     mtx_coord_t nz;
1728     real64 value;
1729     change = FALSE;
1730     for( nz.col = vars->reg.col.low ;
1731     nz.col <= vars->reg.col.high && vars->colcount[nz.col] != 1;
1732     ++nz.col ) ;
1733     if( nz.col <= vars->reg.col.high ) {
1734     /* found singleton col */
1735     nz.row = mtx_FIRST;
1736     value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row));
1737     adjust_col_count(vars,nz.row);
1738     assign_col_and_row(vars,nz.col,nz.row);
1739     change = TRUE;
1740     }
1741     } while( change );
1742     }
1743    
1744     static int32 select_col(struct creorder_vars *vars)
1745     /**
1746     *** Selects a col and returns its index. It is assumed that there is a
1747     *** col. Col counts must be correct. vars->tlist will be used.
1748     **/
1749     {
1750     int32 min_col_count;
1751     int32 max_row_count;
1752     int32 col;
1753     int32 i, nties=-2; /* # elements currently defined in vars->tlist */
1754     int32 sum;
1755     mtx_coord_t nz;
1756     real64 value;
1757     mtx_matrix_t mtx;
1758     mtx_range_t *colrng, *rowrng;
1759    
1760     /* Set to something > any possible value */
1761     min_col_count = vars->reg.row.high-vars->reg.row.low+2;
1762     for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1763     if( vars->colcount[col] <= min_col_count ) {
1764     if( vars->colcount[col] < min_col_count ) {
1765     min_col_count = vars->colcount[col];
1766     nties = 0;
1767     }
1768     vars->tlist[nties++].ndx = col;
1769     }
1770     /**
1771     *** vars->tlist[0..nties-1] is a list of row numbers which tie for
1772     *** minimum col count.
1773     **/
1774    
1775     max_row_count = -1; /* < any possible value */
1776     i = 0;
1777     mtx = vars->mtx;
1778     rowrng=&(vars->reg.row);
1779     colrng=&(vars->reg.col);
1780     while( i < nties ) {
1781    
1782     sum = 0;
1783     nz.row = mtx_FIRST;
1784     nz.col = vars->tlist[i].ndx;
1785     while( value = mtx_next_in_col(mtx,&nz,rowrng),
1786     nz.row != mtx_LAST )
1787     sum += mtx_nonzeros_in_row(mtx,nz.row,colrng);
1788     if( sum > max_row_count ) {
1789     max_row_count = sum;
1790     col = nz.col;
1791     }
1792     i++;
1793     }
1794     /**
1795     *** Now col contains the col with the minimum col count, which has the
1796     *** greatest total row count of incident rows among all cols with
1797     *** the same (minimum) col count. Select it.
1798     **/
1799     return(col);
1800     }
1801    
1802     static void tspk1_reorder(struct creorder_vars *vars)
1803     /**
1804     *** Transpose the picture and explanation that follows:
1805     *** Reorders the assigned matrix vars->mtx within the specified bounding
1806     *** block region vars->reg. The region is split into 6 subregions during
1807     *** reordering: the rows are divided in two, and the columns divided in
1808     *** three. Initially everything is in the active subregion. Ultimately,
1809     *** everything will be assigned.
1810     ***
1811     *** <-- assigned -->|<-- active-->|<-- on stack -->|
1812     *** ----+----------------+-------------+----------------+
1813     *** a | | | |
1814     *** s | | | |
1815     *** s | | | |
1816     *** i | (SQUARE) | | |
1817     *** g | | | |
1818     *** n | | | |
1819     *** e | | | |
1820     *** d | | | |
1821     *** ----+----------------+-------------+----------------+
1822     *** a | | | |
1823     *** c | | ACTIVE | |
1824     *** t | | REGION | |
1825     *** i | | | |
1826     *** v | | | |
1827     *** e | | | |
1828     *** ----+----------------+-------------+----------------+
1829     ***
1830     *** The algorithm is roughly as follows:
1831     *** (1) select a row (by some criterion).
1832     *** (2) push columns incident on that row onto the stack in decreasing
1833     *** order of their length.
1834     *** (3) pop first column off the stack and assign it to the selected
1835     *** row.
1836     *** (4) forward-triangularize (assign singleton rows with their one
1837     *** and only incident column, until no longer possible).
1838     ***
1839     *** (1)-(4) should be repeated until the active subregion becomes empty.
1840     ***
1841     *** Everything above was written as though the entire matrix is
1842     *** involved. In reality, only the relevant square region is involved.
1843     **/
1844     {
1845     int32 col, size;
1846     int32 *colcount_array_origin;
1847     mtx_matrix_t mtx;
1848    
1849     size = vars->reg.col.high - vars->reg.col.low + 1;
1850     size = MAX(size,vars->reg.row.high - vars->reg.row.low + 1);
1851     vars->tlist = size > 0 ? (struct creorder_list *)
1852     ascmalloc( size*sizeof(struct creorder_list) ) : NULL;
1853     colcount_array_origin = size > 0 ? (int32 *)
1854     ascmalloc( size*sizeof(int32) ) : NULL;
1855     vars->colcount =
1856     NOTNULL(colcount_array_origin) ?
1857     colcount_array_origin - vars->reg.col.low : NULL;
1858     mtx = vars->mtx;
1859    
1860     vars->rowhigh = vars->reg.row.high;
1861     /* Establish col counts */
1862     for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1863     vars->colcount[col] =
1864     mtx_nonzeros_in_col(mtx,col,&(vars->reg.row));
1865    
1866     while(TRUE) {
1867     struct creorder_list *head;
1868     int32 nelts; /* # elements "allocated" from vars->tlist */
1869     mtx_coord_t nz;
1870     real64 value;
1871    
1872     cforward_triangularize(vars);
1873     assign_null_cols(vars);
1874     if( vars->reg.col.low > vars->reg.col.high ||
1875     vars->reg.row.low > vars->reg.row.high ) {
1876     /* Active region is now empty, done */
1877     if( NOTNULL(vars->tlist) )
1878     ascfree( vars->tlist );
1879     if( NOTNULL(colcount_array_origin) )
1880     ascfree( colcount_array_origin );
1881     return;
1882     }
1883    
1884     head = NULL;
1885     nelts = 0;
1886     nz.row = mtx_FIRST;
1887     nz.col = select_col(vars);
1888     while( value = mtx_next_in_col(mtx,&nz,&(vars->reg.row)),
1889     nz.row != mtx_LAST ) {
1890     struct creorder_list **q,*p;
1891    
1892     p = &(vars->tlist[nelts++]);
1893     p->ndx = mtx_row_to_org(mtx,nz.row);
1894     p->count = mtx_nonzeros_in_row(mtx,nz.row,&(vars->reg.col));
1895     for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) );
1896     p->next = *q;
1897     *q = p;
1898     }
1899     /**
1900     *** We now have a list of columns which intersect the selected row.
1901     *** The list is in order of decreasing column count.
1902     **/
1903    
1904     /* Push incident rows on stack */
1905     for( ; NOTNULL(head) ; head = head->next )
1906     push_row_on_stack(vars,mtx_org_to_row(mtx,head->ndx));
1907    
1908     /* Pop row off stack and assign to selected col */
1909     assign_col_and_row(vars , nz.col , pop_row_from_stack(vars));
1910     }
1911     /* Not reached. */
1912     }
1913    
1914     /*********************************
1915     end of tspk1 stuff
1916     *********************************/
1917    
1918     static boolean nonempty_row(mtx_matrix_t mtx,int32 row)
1919     /**
1920     *** ? row not empty in mtx.
1921     **/
1922     {
1923     mtx_coord_t nz;
1924     real64 value;
1925     value = mtx_next_in_row(mtx, mtx_coord(&nz,row,mtx_FIRST), mtx_ALL_COLS);
1926     return( nz.col != mtx_LAST );
1927     }
1928    
1929     static boolean nonempty_col(mtx_matrix_t mtx,int32 col)
1930     /**
1931     *** ? column not empty in mtx.
1932     **/
1933     {
1934     mtx_coord_t nz;
1935     real64 value;
1936     value = mtx_next_in_col(mtx, mtx_coord(&nz,mtx_FIRST,col), mtx_ALL_ROWS);
1937     return( nz.row != mtx_LAST );
1938     }
1939    
1940     static void determine_pivot_range(linsolqr_system_t sys)
1941     /**
1942     *** Calculates sys->rng from sys->coef.
1943     **/
1944     {
1945     sys->reg.row.low = sys->reg.row.high = -1;
1946     sys->reg.col.low = sys->reg.col.high = -1;
1947    
1948     for( sys->rng.high=mtx_order(sys->coef) ; --(sys->rng.high) >= 0 ; ) {
1949     if( nonempty_row(sys->coef,sys->rng.high) &&
1950     sys->reg.row.high < 0 )
1951     sys->reg.row.high = sys->rng.high;
1952     if( nonempty_col(sys->coef,sys->rng.high) &&
1953     sys->reg.col.high < 0 )
1954     sys->reg.col.high = sys->rng.high;
1955     if( nonempty_row(sys->coef,sys->rng.high) &&
1956     nonempty_col(sys->coef,sys->rng.high) )
1957     break;
1958     }
1959    
1960     for( sys->rng.low=0 ; sys->rng.low <= sys->rng.high ; ++(sys->rng.low) ) {
1961     if( nonempty_row(sys->coef,sys->rng.low) &&
1962     sys->reg.row.low < 0 )
1963     sys->reg.row.low = sys->rng.low;
1964     if( nonempty_col(sys->coef,sys->rng.low) &&
1965     sys->reg.col.low < 0 )
1966     sys->reg.col.low = sys->rng.low;
1967     if( nonempty_row(sys->coef,sys->rng.low) &&
1968     nonempty_col(sys->coef,sys->rng.low) )
1969     break;
1970     }
1971     }
1972    
1973     static void square_region(linsolqr_system_t sys,mtx_region_t *region)
1974     /**
1975     *** Get the largest square confined to the diagonal within the region given
1976     *** and set sys->rng accordingly.
1977     **/
1978     {
1979     sys->reg = *region;
1980     sys->rng.low = MAX(region->row.low,region->col.low);
1981     sys->rng.high = MIN(region->row.high,region->col.high);
1982     }
1983    
1984     static int ranki_reorder(linsolqr_system_t sys,mtx_region_t *region)
1985     /**
1986     *** The region to reorder is first isolated by truncating the region
1987     *** provided to the largest square region confined to the matrix diagonal.
1988     *** It is presumed it will contain no empty rows or columns and will
1989     *** provide the basis of candidate pivots when factoring.
1990     **/
1991     {
1992     struct reorder_vars vars;
1993     CHECK_SYSTEM(sys);
1994     if (sys->fclass != ranki) {
1995 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"called on system with inappropriate factor class");
1996 aw0a 1 return 1;
1997     }
1998     if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
1999     else square_region(sys,region);
2000    
2001     vars.mtx = sys->coef;
2002     vars.reg.row.low = vars.reg.col.low = sys->rng.low;
2003     vars.reg.row.high = vars.reg.col.high = sys->rng.high;
2004     spk1_reorder(&vars);
2005     return 0;
2006     }
2007    
2008     static int tranki_reorder(linsolqr_system_t sys,mtx_region_t *region)
2009     /**
2010     *** The region to reorder is first isolated by truncating the region
2011     *** provided to the largest square region confined to the matrix diagonal.
2012     *** It is presumed it will contain no empty rows or columns and will
2013     *** provide the basis of candidate pivots when factoring.
2014     **/
2015     {
2016     struct creorder_vars vars;
2017     CHECK_SYSTEM(sys);
2018     if ( !(sys->fclass==ranki || sys->fclass==s_qr) ) {
2019 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_ERR,"reorder called on system with inappropriate factor method");
2020 aw0a 1 return 1;
2021     }
2022     if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2023     else square_region(sys,region);
2024    
2025     vars.mtx = sys->coef;
2026     vars.reg.row.low = vars.reg.col.low = sys->rng.low;
2027     vars.reg.row.high = vars.reg.col.high = sys->rng.high;
2028     tspk1_reorder(&vars);
2029     return 0;
2030     }
2031    
2032     /***************************************************************************\
2033     End of reordering functions for SPK1.
2034     \***************************************************************************/
2035    
2036     /***************************************************************************\
2037     RANKI implementation functions.
2038     \***************************************************************************/
2039    
2040     static void eliminate_row(mtx_matrix_t mtx,
2041     mtx_range_t *rng,
2042     int32 row, /* row to eliminate */
2043     real64 *tmp, /* temporary array */
2044     real64 *pivots) /* prior pivots array */
2045     /**
2046     *** Eliminates the given row to the left of the diagonal element, assuming
2047     *** valid pivots for all of the diagonal elements above it (the elements
2048     *** above those diagonal elements, if any exist, are assumed to be U
2049     *** elements and therefore ignored). The temporary array is used by this
2050     *** function to do its job. tmp[k], where rng->low <= k <= rng->high must
2051     *** be defined (allocated) but need not be initialized.
2052     *** pivots[k] where rng->low <= k <=rng->high must be allocated and
2053     *** pivots[rng->low] to pivots[row-1] must contain the previous pivots.
2054     **/
2055     {
2056     mtx_coord_t nz;
2057     mtx_range_t left_to_eliminate,high_cols,tmp_cols;
2058     boolean do_series=FALSE;
2059    
2060     high_cols.low = row;
2061     high_cols.high = rng->high;
2062     left_to_eliminate.low = rng->low;
2063     left_to_eliminate.high = row - 1;
2064    
2065     /* Move non-zeros left of pivot from matrix to full array */
2066     mtx_zero_real64(tmp+rng->low,(row-rng->low));
2067     nz.row = row;
2068     mtx_cur_row_vec(mtx,row,tmp,&left_to_eliminate);
2069     mtx_clear_row(mtx,row,&left_to_eliminate);
2070    
2071     /* eliminates nzs from pivot, one by one, filling tmp with multipliers */
2072     do_series= ( (row-1) >= (rng->low) );
2073     if (do_series) {
2074     mtx_add_row_series_init(mtx,row,FALSE);
2075     /* do nothing between here and the END call to _init which removes
2076     elements from row */
2077     }
2078     for( nz.row = row-1 ; nz.row >= rng->low ; --(nz.row) ) {
2079     if( tmp[nz.row] == D_ZERO )
2080     continue; /* Nothing to do for this row */
2081    
2082     nz.col = nz.row;
2083     tmp[nz.row] /= pivots[nz.row];
2084     /* tmp[nz.row] now equals multiplier */
2085    
2086     /* Perform "mtx_add_row" for full array part of the row */
2087     left_to_eliminate.high = nz.row - 1;
2088     mtx_cur_vec_add_row(mtx,tmp,nz.row,-tmp[nz.row],
2089     &left_to_eliminate,FALSE);
2090    
2091     /* Perform "mtx_add_row" on remaining part of row */
2092     mtx_add_row_series(nz.row,-tmp[nz.row],&high_cols);
2093     }
2094     if (do_series) {
2095     mtx_add_row_series_init(mtx,mtx_NONE,TRUE);
2096     }
2097    
2098     mtx_fill_cur_row_vec(mtx,row,tmp,mtx_range(&tmp_cols,rng->low,row-1));
2099    
2100     }
2101    
2102    
2103     /* not in use it seems */
2104     #if BUILD_DEAD_CODE /* yes, baa */
2105     static int32 top_of_spike(linsolqr_system_t sys,int32 col)
2106     /**
2107     *** Determines the top row (row of lowest index) in a possible spike
2108     *** above the diagonal element in the given column. If there is no spike,
2109     *** then (row = ) col is returned.
2110     **/
2111     {
2112     mtx_range_t above_diagonal;
2113     mtx_coord_t nz;
2114     real64 value;
2115     int32 top_row;
2116    
2117     above_diagonal.low = sys->rng.low;
2118     above_diagonal.high = col-1;
2119     top_row = nz.col = col;
2120     nz.row = mtx_FIRST;
2121     while( value = mtx_next_in_col(sys->factors,&nz,&above_diagonal),
2122     nz.row != mtx_LAST )
2123     if( nz.row < top_row )
2124     top_row = nz.row;
2125     return( top_row );
2126     }
2127     #endif /* BUILD_DEAD_CODE */
2128    
2129     static boolean col_is_a_spike(linsolqr_system_t sys,int32 col)
2130     /**
2131     *** Determines if the col is a spike, characterized by having any
2132     *** nonzeros above the diagonal. By construction there shouldn't
2133     *** be any numeric 0 nonzeros above the diagonal, so we don't prune
2134     *** them out here.
2135     **/
2136     {
2137     mtx_range_t above_diagonal;
2138     mtx_coord_t nz;
2139    
2140     above_diagonal.low = sys->rng.low;
2141     above_diagonal.high = col-1;
2142     nz.col = col;
2143     nz.row = mtx_FIRST;
2144     /* this loop is cheaper than it looks */
2145     while( (void)mtx_next_in_col(sys->factors,&nz,&above_diagonal),
2146     nz.row != mtx_LAST ) {
2147     if( nz.row < col ) return TRUE;
2148     }
2149     return( FALSE );
2150     }
2151    
2152     /* see implementation notes before modifying this function! */
2153     static void number_drag(real64 *vec, int32 rfrom, int32 rto)
2154     {
2155     int32 i;
2156     real64 tmp;
2157     if (rto <rfrom) {
2158     tmp=vec[rfrom];
2159     for (i=rfrom; i> rto; i--) {
2160     vec[i]=vec[i-1]; /* rotate right */
2161     }
2162     vec[rto]=tmp;
2163     return;
2164     }
2165     if (rto > rfrom) {
2166     tmp=vec[rfrom];
2167     for (i=rfrom; i< rto; i++) {
2168     vec[i]=vec[i+1]; /* rotate left */
2169     }
2170     vec[rto]=tmp;
2171     }
2172     }
2173    
2174     static void rankikw_factor(linsolqr_system_t sys)
2175     /**
2176     *** (the following description also applies to rankijz_factor which is
2177     *** different only in pivot selection strategy.)
2178     *** This is the heart of the linear equation solver. This function
2179     *** factorizes the matrix into a lower (L) and upper (U) triangular
2180     *** matrix. sys->smallest_pivot and sys->rank are calculated. The
2181     *** RANKI method is utilized. At the end of elimination, the matrix A
2182     *** is factored into A = U L, where U and L are stored as follows:
2183     ***
2184     *** <----- r ----> <- n-r ->
2185     *** +--------------+---------+
2186     *** | | |
2187     *** | U | |
2188     *** | | |
2189     *** | L | | r
2190     *** | | |
2191     *** +--------------+---------+
2192     *** | | |
2193     *** | | 0 | n-r
2194     *** | | |
2195     *** +--------------+---------+
2196     ***
2197     *** The rows and columns have been permuted so that all of the pivoted
2198     *** original rows and columns are found in the first r rows and columns
2199     *** of the region. The last n-r rows and columns are unpivoted. U has
2200     *** 1's on its diagonal, whereas L's diagonal is stored in the matrix.
2201     ***
2202     *** Everything above was written as though the entire matrix is
2203     *** involved. In reality, only the relevant square region is involved.
2204     **/
2205     {
2206     mtx_coord_t nz;
2207     int32 last_row;
2208     mtx_range_t pivot_candidates;
2209     real64 *tmp;
2210     real64 pivot, *pivots;
2211     int32 length;
2212     mtx_matrix_t mtx;
2213    
2214     length = sys->rng.high - sys->rng.low + 1;
2215     tmp = sys->ludata->tmp;
2216     /* eliminate row takes care of zeroing the relevant region and won't
2217     change values outside of it. */
2218     pivots=sys->ludata->pivlist;
2219     mtx=sys->factors;
2220    
2221     sys->smallest_pivot = MAXDOUBLE;
2222     last_row = pivot_candidates.high = sys->rng.high;
2223     for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
2224    
2225     pivot_candidates.low = nz.col = nz.row;
2226     pivots[nz.row]=pivot = mtx_value(mtx,&nz);
2227     pivot = fabs(pivot);
2228     if( pivot > sys->pivot_zero &&
2229     pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) &&
2230     !col_is_a_spike(sys,nz.row) ) {
2231     /* Good pivot and not a spike: continue with next row */
2232     if( pivot < sys->smallest_pivot )
2233     sys->smallest_pivot = pivot;
2234     ++(nz.row);
2235     continue;
2236     }
2237     /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2238     /**
2239     *** Row is a spike row or will
2240     *** be when a necessary column
2241     *** exchange occurs.
2242     **/
2243     eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots);
2244     /* pivot will be largest of those available. get size and value */
2245     pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row]));
2246     if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */
2247     /* Dependent row, drag to the end */
2248     mtx_drag(mtx,nz.row,last_row);
2249     number_drag(pivots,nz.row,last_row);
2250     --last_row;
2251     #undef KAA_DEBUG
2252     #ifdef KAA_DEBUG
2253 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_WARNING,"Warning: Row %d is dependent with pivot %20.8g",nz.row,pivot);
2254 aw0a 1 #endif /* KAA_DEBUG */
2255     } else {
2256     /* Independent row: nz contains best pivot */
2257     /* Move pivot to diagonal */
2258     mtx_swap_cols(mtx,nz.row,nz.col);
2259     mtx_drag( mtx , nz.row , sys->rng.low );
2260     number_drag(pivots,nz.row,sys->rng.low);
2261     if( pivot < sys->smallest_pivot )
2262     sys->smallest_pivot = pivot;
2263     ++(nz.row);
2264     }
2265     }
2266    
2267     sys->rank = last_row - sys->rng.low + 1;
2268     }
2269    
2270    
2271     static void rankijz_factor(linsolqr_system_t sys)
2272     /**
2273     *** This is an alternate pivoting strategy introduced by Joe Zaher.
2274     *** it looks down and across for good pivots rather than just across.
2275     ***
2276     **/
2277     {
2278     real64 biggest;
2279     mtx_coord_t nz, best;
2280     mtx_region_t candidates;
2281     real64 *tmp;
2282     real64 pivot, *pivots;
2283     int32 length;
2284     mtx_matrix_t mtx;
2285    
2286     length = sys->rng.high - sys->rng.low + 1;
2287     tmp = sys->ludata->tmp;
2288     /* eliminate row takes care of zeroing the relevant region and won't
2289     change values outside of it. */
2290     pivots=sys->ludata->pivlist;
2291     mtx=sys->factors;
2292    
2293     sys->smallest_pivot = MAXDOUBLE;
2294     candidates.row.high = sys->rng.high;
2295     candidates.col.high = sys->rng.high;
2296     for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) {
2297     nz.col = nz.row;
2298     pivots[nz.row] = pivot = mtx_value(mtx,&nz);
2299     pivot = fabs(pivot);
2300     candidates.row.low = nz.row;
2301     candidates.col.low = nz.row;
2302     if( !col_is_a_spike(sys,nz.row) ) {
2303     best.col = nz.row;
2304     mtx_col_max(mtx,&best,&(candidates.row),&biggest);
2305     if( fabs(biggest) >= sys->pivot_zero ) {
2306     if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) {
2307     mtx_swap_rows(mtx,nz.row,best.row);
2308     pivots[nz.row] = biggest;
2309     pivot = fabs(biggest);
2310     }
2311     if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot;
2312     ++(nz.row);
2313     continue;
2314     }
2315     }
2316     /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2317     /**
2318     *** Row is a spike row or will
2319     *** be when a necessary column
2320     *** exchange occurs.
2321     **/
2322     eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots);
2323     /* pivot will be largest of those available. get size and value */
2324     pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row]));
2325     /* Move pivot to diagonal */
2326     if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2327     /* Dependent row, drag nz to lower right */
2328     mtx_drag(mtx,nz.row,candidates.row.high);
2329     number_drag(pivots,nz.row,candidates.row.high);
2330     --(candidates.row.high);
2331     } else {
2332     /* Independent row, drag nz to upper left */
2333     mtx_swap_cols(mtx,nz.row,nz.col);
2334     mtx_drag( mtx , nz.row , sys->rng.low );
2335     number_drag(pivots,nz.row,sys->rng.low);
2336     if( pivot < sys->smallest_pivot )
2337     sys->smallest_pivot = pivot;
2338     ++(nz.row);
2339     }
2340     }
2341    
2342     sys->rank = candidates.row.high - sys->rng.low + 1;
2343     }
2344    
2345    
2346     #define KAA_DEBUG 0
2347     #if KAA_DEBUG
2348     int ranki_entry(linsolqr_system_t sys,mtx_region_t *region)
2349     #else
2350     static int ranki_entry(linsolqr_system_t sys,mtx_region_t *region)
2351     #endif
2352     /**
2353     *** The region to factor is first isolated by truncating the region
2354     *** provided to the largest square region confined to the matrix diagonal.
2355     *** It is presumed it will contain no empty rows or columns and that it has
2356     *** been previously reordered using linsolqr_reorder(sys,region,spk1)
2357     *** or a sane variant of spk1.
2358     *** This is the entry point for all ranki based strategies, regardless of
2359     *** pivot selection subtleties.
2360     **/
2361     {
2362     struct rhs_list *rl;
2363 johnpye 908 double comptime;
2364 aw0a 1
2365     CHECK_SYSTEM(sys);
2366     if( sys->factored )
2367     return 0;
2368     switch(sys->fmethod) {
2369     case ranki_kw:
2370     case ranki_jz:
2371     case ranki_ka: /* add new methods here */
2372     break;
2373     default:
2374     return 1;
2375     }
2376    
2377     if(ISNULL(sys->ludata))
2378     return 1;
2379    
2380     if( NOTNULL(sys->inverse) )
2381     mtx_destroy(sys->inverse);
2382     sys->inverse=NULL;
2383     if( NOTNULL(sys->factors) )
2384     mtx_destroy(sys->factors);
2385     if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2386     else square_region(sys,region);
2387    
2388     sys->factors = mtx_copy_region(sys->coef,region);
2389     sys->rank = -1;
2390     sys->smallest_pivot = MAXDOUBLE;
2391     for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
2392     rl->solved = FALSE;
2393 johnpye 1309 ensure_capacity(sys);
2394     ensure_lu_capacity(sys);
2395 aw0a 1
2396 johnpye 908 comptime = tm_cpu_time();
2397 aw0a 1 switch(sys->fmethod) {
2398     case ranki_ka:
2399     case ranki_kw:
2400     rankikw_factor(sys);
2401     break;
2402     case ranki_jz:
2403     default:
2404     rankijz_factor(sys);
2405     }
2406     /* no longer done automatically as noone usually cares.
2407     calc_dependent_rows_ranki1(sys);
2408     calc_dependent_cols_ranki1(sys);
2409     */
2410     sys->factored = TRUE;
2411    
2412     #undef KAA_DEBUG
2413     #if KAA_DEBUG
2414 johnpye 908 comptime = tm_cpu_time() - comptime;
2415 johnpye 1309 ERROR_REPORTER_HERE(ASC_PROG_NOTE,"Time for Inversion = %f",comptime);
2416     ERROR_REPORTER_HERE(ASC_PROG_ERR,"Non-zeros in Inverse = %d",mtx_nonzeros_in_region(sys->factors,region));
2417 aw0a 1 #endif
2418     return 0;
2419     }
2420    
2421     static int ranki_solve(linsolqr_system_t sys, struct rhs_list *rl)
2422     {
2423     backward_substitute(sys,rl->varvalue,rl->transpose);
2424     forward_substitute(sys,rl->varvalue,rl->transpose);
2425     zero_unpivoted_vars(sys,rl->varvalue,rl->transpose);
2426     return 0;
2427     }
2428    
2429    
2430     /*
2431     *********************************************************************
2432     * Start of the 2 bodied factorization schemes.
2433     *
2434     * ranki2_entry will be used to access these routines.
2435     * there is now code called:
2436     * rankikw2_factor
2437     * rankijz2_factor
2438     * eliminate_row2
2439     * ranki2_entry
2440     * forward_substitute2
2441     * backward_substitute2
2442     * ranki2_solve
2443     *
2444     * We have fixed the calc_col_dependent and calc_row_dependent
2445     * routines.
2446     *********************************************************************
2447     */
2448    
2449     extern int32 mtx_number_ops;
2450    
2451     /* this function does no permutation of any sort */
2452     static
2453     void eliminate_row2(mtx_matrix_t mtx,
2454     mtx_matrix_t upper_mtx,
2455     mtx_range_t *rng,
2456     int32 row, /* row to eliminate */
2457     real64 *tmp, /* temporary array */
2458     real64 *pivots, /* prior pivots array */
2459     real64 dtol /* drop tolerance */
2460     )
2461     {
2462     mtx_coord_t nz;
2463     int j,low,high;
2464     double tmpval;
2465    
2466     /*
2467     * Move all non-zeros from current row to full array.
2468     * The full array would have been initialized before,
2469     * we must put it back in the clean state when we leave.
2470     * All operations are done over mtx_ALL_COLS.
2471     *
2472     * Note: because of an addrow over mtx_ALL_COLS, entries
2473     * of tmp outside rng may have nonzeros put in them if
2474     * sys->factors has nonzeros in the outlying columns.
2475     * If enough of these pile up out there during elimination
2476     * of a block and the compiler treats double overflow as
2477     * a signalable error, we will have a floating point
2478     * exception.
2479     * Currently sys->factors is a copy of a region from
2480     * the sys->coef matrix, so we should not have anything
2481     * out there to pile up. If we make a variant which
2482     * does not copy the coef matrix but uses it directly,
2483     * this code needs to be revisited.
2484     */
2485     mtx_steal_cur_row_vec(mtx,row,tmp,mtx_ALL_COLS);
2486    
2487     /*
2488     * Eliminates nzs from pivot, one by one, filling tmp with multipliers
2489     * We now operate on the entire row, since we are not writing the
2490     * multipliers back to the matrix.
2491     */
2492    
2493     low = rng->low;
2494     high = rng->high;
2495     for (j = row-1 ; j >= low ; --(j) ) {
2496     if (tmp[j] == D_ZERO)
2497     continue; /* Nothing to do for this row */
2498     tmpval = tmp[j]/pivots[j]; /* Compute multiplier */
2499    
2500     /*
2501     * tmpval is now the multiplier. We use it to eliminate the row,
2502     * but backpatch it to tmp, *after* we do the elimination, as
2503     * mtx_cur_vec_add_row over all columns will stomp on tmp[j]
2504     */
2505     mtx_cur_vec_add_row(mtx,tmp,j,-tmpval,mtx_ALL_COLS,FALSE);
2506     tmp[j] = tmpval; /* patch the diagonal */
2507     }
2508    
2509     /*
2510     * Fill up the upper triangular matrix.
2511     * refill the range [low .. row-1]. Remember that we have
2512     * to zero all nnz's in tmp.
2513     */
2514     nz.row = row;
2515     for (j=low;j<row;j++) {
2516     if (tmp[j] != D_ZERO) {
2517     nz.col = j;
2518     mtx_fill_value(upper_mtx,&nz,tmp[j]);
2519     #if RBADEBUG
2520     FPRINTF(gscr,"fillingU: or %d oc %d (cc %d) %24.18g\n",
2521     mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), nz.col, tmp[j]);
2522     #endif
2523     tmp[j] = D_ZERO;
2524     }
2525     }
2526     /*
2527     * refill the range [row .. high]. We *wont* keep
2528     * the diagonal as this is sorted out by the pivot
2529     * array, but here we must keep it, sigh.
2530     * At this stage we don't know that the diagonal is the
2531     * pivot, as that is determined after elimination done.
2532     * Outer loop must delete the ultimate pivot element.
2533     * Odds are good, however, that it is the current diagonal,
2534     * so put in the diagonal last.
2535     */
2536     low = row;
2537     for (j=high;j>=low;j--) {
2538     if (tmp[j] != D_ZERO) {
2539     if (fabs(tmp[j]) > dtol) {
2540     nz.col = j;
2541     mtx_fill_value(mtx,&nz,tmp[j]);
2542     #if RBADEBUG
2543     FPRINTF(gscr,"fillingL: or %d oc %d %24.18g\n",
2544     mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), tmp[j]);
2545     #endif
2546     }
2547     tmp[j] = D_ZERO; /* zero element regardless */
2548     }
2549     }
2550     tmp[row] = D_ZERO;
2551     }
2552    
2553     /*
2554     * This function is the same as rankikw_factor except
2555     * that it uses 2 matrices; one to store L and one for U.
2556     * As such it uses eliminate_row2 rather than eliminate_row.
2557     * Note there has been a change in the way ptol is used,
2558     * now selecting nearest passing, rather than taking max.