/[ascend]/trunk/base/generic/linear/linsolqr.c
ViewVC logotype

Contents of /trunk/base/generic/linear/linsolqr.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1321 - (show annotations) (download) (as text)
Tue Mar 6 12:23:24 2007 UTC (13 years, 6 months ago) by jpye
File MIME type: text/x-csrc
File size: 164645 byte(s)
As promised, also deleted comment about how good linsol made us look
1 /*
2 * linsol II: Ascend Sparse Linear Solvers
3 * by Benjamin Allan
4 * Created: 2/6/90
5 * Version: $Revision: 1.26 $
6 * Version control file: $RCSfile: linsolqr.c,v $
7 * Date last modified: $Date: 2000/01/25 02:26:58 $
8 * Last modified by: $Author: ballan $
9 *
10 * This file is part of the SLV solver.
11 *
12 * Copyright (C) 1994 Benjamin Andrew Allan
13 *
14 * Based on linsol:
15 * Copyright (C) 1990 Karl Michael Westerberg
16 * Copyright (C) 1993 Joseph Zaher
17 * Copyright (C) 1994 Joseph Zaher, Benjamin Andrew Allan
18 * and on sqr.pas v1.5:
19 * Copyright (C) 1994 Boyd Safrit
20 *
21 * The SLV solver is free software; you can redistribute
22 * it and/or modify it under the terms of the GNU General Public License as
23 * published by the Free Software Foundation; either version 2 of the
24 * License, or (at your option) any later version.
25 *
26 * The SLV solver is distributed in hope that it will be
27 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 * General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with the program; if not, write to the Free Software Foundation,
33 * Inc., 675 Mass Ave, Cambridge, MA 02139 USA. Check the file named
34 * COPYING. COPYING is found in ../compiler.
35 */
36
37 /*
38 * Implementation Notes:
39 * 9/95: there are many places in linsolqr.c where lower level matrix
40 * operations have been replaced by higher level ones. These used to
41 * be flagged as such so that people who wanted to construct newer
42 * operations from the lowest level mtx operators could see how to
43 * use the lowlevel operators.
44
45 Later note (JP, 2007): The earlier version of linsolqr, named linsol,
46 has been removed completely. There was some comment about it here but
47 it's not here any more.
48 *
49 * 10/95: (BAA) I attempted to optimize number_drag through use of
50 * pointer arithmetic. Testing the resulting code without optimization
51 * turned on gave about 20% improvement over the original on an alpha
52 * using the native compiler. However, with optimization on the array
53 * subscripted version which is left here won by ~40%.
54 * The results for gcc were similar: pointer 10% better unoptimized,
55 * subscripted 30% better when -O2.
56 * For both compilers -O3 was worse! gcc was ~1.8x slower than the
57 * native cc both optimized and not.
58 * I have not tested a loop unrolled drag, but I expect unrolling is
59 * exactly what the optimizer is doing, so why bother?
60 */
61
62 #include <math.h>
63 #include <stdarg.h>
64 #include <utilities/ascConfig.h>
65 #include <utilities/error.h>
66 #include <utilities/ascMalloc.h>
67 #include <utilities/ascPanic.h>
68 #include <utilities/mem.h>
69 #include <utilities/set.h>
70 #include <general/tm_time.h>
71 #include "mtx.h"
72 #include "linsolqr.h"
73 #include <general/mathmacros.h>
74
75 #ifdef __WIN32__
76 #define copysign _copysign
77 #endif /* __WIN32__ */
78
79 #define LINSOLQR_DEBUG FALSE
80 /* qr code debugging flag */
81 #define LINSOL_DEBUG FALSE
82 /* linsol debugging flag */
83
84 #define D_ZERO (real64)0.0
85 #define ZERO (int32)0
86 #define D_ONE (real64)1.0
87
88 /* housekeeping flags */
89 #define BUILD_DEAD_CODE 0
90 #undef BUILD_KIRK_CODE
91
92
93 #define RBADEBUG 0
94 /* turns on spew that will generate output files
95 * comparable to and in the same location as for rankiba2
96 * user specific hardcoded pathnames are involved.
97 * don't try this at home.
98 */
99 #if RBADEBUG
100 extern FILE *gscr;
101 #endif
102
103 /*
104 * timing messages disable flag.
105 */
106 int g_linsolqr_timing = 1;
107
108 /***************************************************************************\
109 Data structures for linsol
110 Any time you modify these, please verify linsolqr_size is still correct.
111 \***************************************************************************/
112 struct rhs_list {
113 real64 *rhs; /* Vector of rhs values */
114 real64 *varvalue; /* Solution of the linear system */
115 boolean solved; /* ? has the rhs been solved */
116 boolean transpose; /* ? transpose the coef matrix */
117 struct rhs_list *next;
118 };
119
120 struct qr_fill_t {
121 int32 cnt; /* number of real nonzeros in a column */
122 int32 col; /* column number prior to being sorted */
123 int32 fill; /* fill if this (or if col?) were the pivot column */
124 int32 overlap; /* scratch for fillable calculation. */
125 };
126 /* fill counting structure for sparse qr */
127
128 struct lu_auxdata { /* all indexed by cur row or cur col number */
129 real64 *pivlist; /* vector of pivots (diagonal of L for ranki) */
130 real64 *tmp; /* row elimination, dependency buffer */
131 int32 cap; /* current capacity of the vectors if not null */
132 };
133 /* structure for lu algorithms */
134
135 struct qr_auxdata { /* all indexed by cur row or cur col number */
136 mtx_matrix_t hhvects; /* slave matrix of sys->factors for holding u's */
137 mtx_sparse_t sp; /* sparse data space */
138 real64 *alpha; /* column norms for the Q\R factor matrix */
139 real64 *sigma; /* column norms for the S (R inverse) matrix */
140 real64 *tau; /* column tau of the Householder transforms */
141 real64 *hhcol; /* Householder transform scratch vector */
142 real64 *hhrow; /* Householder transform scratch vector */
143 real64 nu; /* current condition number of inverse */
144 real64 anu; /* condition number of coef */
145 real64 asubnu; /* condition number of active region in coef */
146 struct qr_fill_t *fill; /* fill counting structure */
147 mtx_region_t facreg; /* region over which qr is done if nonsquare */
148 int32 cap; /* current capacity of these vectors if not null */
149 };
150 /* structure for CondQR algorithms */
151
152 struct linsolqr_header {
153 int integrity;
154 enum factor_class fclass; /* Type of factoring expected */
155 enum factor_method fmethod; /* Method factoring expected */
156 enum reorder_method rmethod; /* Type of most recent reordering */
157 int32 capacity; /* Capacity of arrays */
158 mtx_matrix_t coef; /* Coefficient matrix */
159 struct rhs_list *rl; /* List of rhs vectors */
160 int rlength; /* Length of rhs list */
161 real64 pivot_zero; /* Smallest acceptable pivot */
162 real64 ptol; /* Pivot selection tolerance */
163 real64 ctol; /* Condition selection tolerance */
164 real64 dtol; /* Matrix entry drop tolerance */
165 mtx_matrix_t factors; /* Matrix with UL and dependence info (ranki),
166 or L and dependence info (ranki2),
167 or R and dependence info (qr methods) */
168 mtx_matrix_t inverse; /* Matrix with U multipliers (ranki2) or R^-1
169 (condqr), in both cases slave of factors */
170 boolean factored; /* ? has the matrix been factored */
171 boolean rowdeps; /* ? have row dependencies been calc'ed */
172 boolean coldeps; /* ? have col dependencies been calc'ed */
173 mtx_range_t rng; /* Pivot range */
174 mtx_region_t reg; /* Bounding region given */
175 int32 rank; /* Rank of the matrix */
176 real64 smallest_pivot; /* Smallest pivot accepted */
177 struct qr_auxdata *qrdata; /* Data vectors for qr methods */
178 struct lu_auxdata *ludata; /* Data vectors for lu methods */
179 };
180 /* linsol main structure */
181
182 /***************************************************************************\
183 string functions for linsol io
184 \***************************************************************************/
185
186 char *linsolqr_rmethods() {
187 static char names[]="Natural, SPK1, TSPK1";
188 return names;
189 }
190
191 char *linsolqr_fmethods() {
192 static char names[] =
193 "SPK1/RANKI,SPK1/RANKI+ROW,Fast-SPK1/RANKI,Fast-SPK1/RANKI+ROW,Fastest-SPK1/MR-RANKI,CondQR,CPQR";
194 return names;
195 }
196
197 /** **/
198 enum reorder_method linsolqr_rmethod_to_enum(char *name) {
199 if (strcmp(name,"SPK1")==0) return spk1;
200 if (strcmp(name,"TSPK1")==0) return tspk1;
201 if (strcmp(name,"Natural")==0) return natural;
202 return unknown_r;
203 }
204
205 enum factor_method linsolqr_fmethod_to_enum(char *name) {
206 if (strcmp(name,"KIRK-STUFF")==0) return ranki_ka;
207 if (strcmp(name,"SPK1/RANKI")==0) return ranki_kw;
208 if (strcmp(name,"SPK1/RANKI+ROW")==0) return ranki_jz;
209 if (strcmp(name,"Fast-SPK1/RANKI")==0) return ranki_kw2;
210 if (strcmp(name,"Fast-SPK1/RANKI+ROW")==0) return ranki_jz2;
211 if (strcmp(name,"Fastest-SPK1/MR-RANKI")==0) return ranki_ba2;
212 if (strcmp(name,"CondQR")==0) return cond_qr;
213 if (strcmp(name,"CPQR")==0) return plain_qr;
214 return unknown_f;
215 }
216
217 enum factor_class linsolqr_fmethod_to_fclass(enum factor_method fm)
218 {
219 switch (fm) {
220 /* ranki-like things */
221 case ranki_kw:
222 case ranki_jz:
223 case ranki_ba2:
224 case ranki_kw2:
225 case ranki_jz2:
226 case ranki_ka:
227 return ranki;
228 /* implemented qr things */
229 case plain_qr:
230 return s_qr;
231 /* other stuff unimplemented. all fall through */
232 case cond_qr:
233 case opt_qr:
234 case ls_qr:
235 case gauss_ba2:
236 case symmetric_lu:
237 case unknown_f:
238 default:
239 return unknown_c;
240 }
241 }
242
243 /** **/
244 char *linsolqr_enum_to_rmethod(enum reorder_method meth) {
245 switch (meth) {
246 case spk1: return "SPK1";
247 case tspk1: return "TSPK1";
248 case natural: return "Natural";
249 default: return "Unknown reordering method.";
250 }
251 }
252
253 char *linsolqr_enum_to_fmethod(enum factor_method meth) {
254 switch (meth) {
255 case ranki_kw: return "SPK1/RANKI";
256 case ranki_jz: return "SPK1/RANKI+ROW";
257 case ranki_ba2: return "Fastest-SPK1/MR-RANKI";
258 case ranki_kw2: return "Fast-SPK1/RANKI";
259 case ranki_jz2: return "Fast-SPK1/RANKI+ROW";
260 case ranki_ka: return "KIRK-STUFF";
261 case cond_qr: return "CondQR";
262 case plain_qr: return "CPQR";
263 default: return "Unknown factorization method.";
264 }
265 }
266
267 /** **/
268 char *linsolqr_rmethod_description(enum reorder_method meth) {
269 static char sspk1[]="SPK1 reordering ala Stadtherr.";
270 static char stspk1[]="SPK1 reordering column wise ala Stadtherr.";
271 static char nat[]="Ordering as received from user";
272 switch (meth) {
273 case spk1: return sspk1;
274 case tspk1: return stspk1;
275 case natural: return nat;
276 default: return "Unknown reordering method.";
277 }
278 }
279
280 char *linsolqr_fmethod_description(enum factor_method meth) {
281 static char ex_ranki[] =
282 "SPK1 reordering with RANKI LU factoring ala Stadtherr.";
283 static char ex_rankib[] =
284 "SPK1 reordering with RANKI LU factoring ala Stadtherr, but fast.";
285 static char ex_rankij[]="SPK1/RANKI LU with pseudo-complete pivoting";
286 static char ex_rankik[]="KIRK-STUFF/RANKI LU with pseudo-complete pivoting";
287 static char ex_condqr[]="Sparse QR with condition controlled pivoting.";
288 static char ex_plainqr[]="Sparse QR with column pivoting.";
289 switch (meth) {
290 case ranki_kw: return ex_ranki;
291 case ranki_kw2: return ex_ranki;
292 case ranki_ba2: return ex_rankib;
293 case ranki_jz: return ex_rankij;
294 case ranki_jz2: return ex_rankij;
295 case ranki_ka: return ex_rankik;
296 case cond_qr: return ex_condqr;
297 case plain_qr: return ex_plainqr;
298 default: return "Unknown factorization method.";
299 }
300 }
301
302 /***************************************************************************\
303 internal check routines
304 \***************************************************************************/
305 #define OK ((int)439828676)
306 #define DESTROYED ((int)839276847)
307 #define CHECK_SYSTEM(a) check_system((a),__FILE__,__LINE__)
308 static int check_system(linsolqr_system_t sys, char *file, int line)
309 /**
310 *** Checks the system handle. Return 0 if ok, 1 otherwise.
311 **/
312 {
313 UNUSED_PARAMETER(file);
314
315 if( ISNULL(sys) ) {
316 ERROR_REPORTER_HERE(ASC_PROG_ERR,"NULL system handle.");
317 return 1;
318 }
319
320 switch( sys->integrity ) {
321 case OK:
322 return 0;
323 case DESTROYED:
324 error_reporter(ASC_PROG_ERR,file,line,__FUNCTION__,"System was recently destroyed.");
325 break;
326 default:
327 error_reporter(ASC_PROG_ERR,file,line,__FUNCTION__,"System was reused or never created.");
328 break;
329 }
330 return 1;
331 }
332
333 /***************************************************************************\
334 external calls (and some of their internals)
335 \***************************************************************************/
336 static void destroy_rhs_list(struct rhs_list *rl)
337 /**
338 *** Destroys rhs list.
339 **/
340 {
341 while( NOTNULL(rl) ) {
342 struct rhs_list *p;
343 p = rl;
344 rl = rl->next;
345 if( NOTNULL(p->varvalue) )
346 ascfree( (POINTER)(p->varvalue) );
347 ascfree( (POINTER)p );
348 }
349 }
350
351 static struct rhs_list *find_rhs(struct rhs_list *rl,real64 *rhs)
352 /**
353 *** Searches for rhs in rhs list, returning it or NULL if not found.
354 **/
355 {
356 for( ; NOTNULL(rl) ; rl = rl->next )
357 if( rl->rhs == rhs )
358 break;
359 return(rl);
360 }
361
362 static struct lu_auxdata *create_ludata()
363 /**
364 *** Creates an empty zeroed ludata struct.
365 *** Filled up by ensure_lu_capacity.
366 **/
367 {
368 struct lu_auxdata *d;
369 d=ASC_NEW_CLEAR(struct lu_auxdata);
370 return d;
371 }
372
373 static struct qr_auxdata *create_qrdata()
374 /**
375 *** Creates an empty zeroed qrdata struct.
376 *** Filled up by ensure_qr_capacity.
377 **/
378 {
379 struct qr_auxdata *d;
380 d=ASC_NEW_CLEAR(struct qr_auxdata);
381 return d;
382 }
383
384 static void destroy_ludata(struct lu_auxdata *d) {
385 /**
386 *** Conditionally destroys a ludata struct and its parts.
387 **/
388 if (NOTNULL(d)) {
389 if (NOTNULL(d->pivlist)) ascfree(d->pivlist);
390 d->pivlist=NULL;
391 if (NOTNULL(d->tmp)) ascfree(d->tmp);
392 d->tmp=NULL;
393 ascfree(d);
394 }
395 }
396
397 static void destroy_qrdata(struct qr_auxdata *d) {
398 /**
399 *** Conditionally destroys a qrdata struct and its parts.
400 **/
401 if (NOTNULL(d)) {
402 if (NOTNULL(d->alpha)) ascfree(d->alpha);
403 if (NOTNULL(d->sigma)) ascfree(d->sigma);
404 if (NOTNULL(d->tau)) ascfree(d->tau);
405 if (NOTNULL(d->hhcol)) ascfree(d->hhcol);
406 if (NOTNULL(d->hhrow)) ascfree(d->hhrow);
407 if (NOTNULL(d->fill)) ascfree(d->fill);
408
409 ascfree(d);
410 }
411 }
412
413 linsolqr_system_t linsolqr_create()
414 {
415 linsolqr_system_t sys;
416
417 sys = (linsolqr_system_t)ascmalloc( sizeof(struct linsolqr_header) );
418 sys->fclass = unknown_c;
419 sys->fmethod = unknown_f;
420 sys->rmethod = unknown_r;
421 sys->integrity = OK;
422 sys->capacity = 0;
423 sys->coef = NULL;
424 sys->rl = NULL;
425 sys->rlength = 0;
426 sys->pivot_zero = 1e-12; /* default value */
427 sys->ptol = 0.1; /* default value */
428 sys->ctol = 0.1; /* default value */
429 sys->dtol = LINQR_DROP_TOLERANCE; /* default value */
430 sys->factors = NULL;
431 sys->inverse = NULL;
432 sys->factored = FALSE;
433 sys->rowdeps = FALSE;
434 sys->coldeps = FALSE;
435 sys->rng.low = sys->rng.high = -1;
436 sys->reg.row.low = sys->reg.row.high = -1;
437 sys->reg.col.low = sys->reg.col.high = -1;
438 sys->rank = -1;
439 sys->smallest_pivot = MAXDOUBLE;
440 sys->qrdata = NULL;
441 sys->ludata = NULL;
442 return(sys);
443 }
444
445 linsolqr_system_t linsolqr_create_default(){
446 linsolqr_system_t L;
447 L = linsolqr_create();
448 L->fmethod = ranki_ba2;
449 L->rmethod = spk1;
450 return L;
451 }
452
453 void linsolqr_destroy(linsolqr_system_t sys)
454 {
455 if(CHECK_SYSTEM(sys)) {
456 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. Not destroyed.");
457 return;
458 }
459 if( NOTNULL(sys->coef) ) {
460 ERROR_REPORTER_HERE(ASC_PROG_NOTE,"linsolqr contains coef mtx which will NOT be destroyed\n");
461 }
462 if( NOTNULL(sys->inverse) )
463 mtx_destroy(sys->inverse);
464 if( NOTNULL(sys->factors) )
465 mtx_destroy(sys->factors);
466 destroy_rhs_list(sys->rl);
467 destroy_qrdata(sys->qrdata);
468 destroy_ludata(sys->ludata);
469 sys->integrity = DESTROYED;
470 ascfree( (POINTER)sys );
471 }
472
473 void linsolqr_set_matrix(linsolqr_system_t sys,mtx_matrix_t mtx)
474 {
475 if(CHECK_SYSTEM(sys)) {
476 ERROR_REPORTER_HERE(ASC_PROG_ERROR,"Bad linsolqr_system_t found. coef mtx not set.");
477 return;
478 }
479 sys->coef = mtx;
480 }
481
482
483 void linsolqr_set_region(linsolqr_system_t sys,mtx_region_t region)
484 {
485 if(CHECK_SYSTEM(sys)) {
486 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. coef mtx not set.");
487 return;
488 }
489 CONSOLE_DEBUG("Region rows=[%d,%d], cols=[%d,%d]",region.row.low,region.row.high,region.col.low,region.col.high);
490 sys->reg = region;
491 }
492
493
494 mtx_matrix_t linsolqr_get_matrix(linsolqr_system_t sys)
495 {
496 CHECK_SYSTEM(sys);
497 return( sys->coef );
498 }
499
500 mtx_region_t *linsolqr_get_region(linsolqr_system_t sys)
501 {
502 return &(sys->reg);
503 }
504
505 void linsolqr_add_rhs(linsolqr_system_t sys,
506 real64 *rhs,
507 boolean transpose)
508 {
509 struct rhs_list *rl;
510
511 if(CHECK_SYSTEM(sys)) {
512 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. rhs not added.");
513 return;
514 }
515 rl = find_rhs(sys->rl,rhs);
516 if( NOTNULL(rl) ) {
517 return;
518 } else {
519 if( NOTNULL(rhs) ) {
520 rl = (struct rhs_list *)ascmalloc( sizeof(struct rhs_list) );
521 rl->rhs = rhs;
522 rl->varvalue = NULL;
523 rl->solved = FALSE;
524 rl->transpose = transpose;
525 rl->next = sys->rl;
526 sys->rl = rl;
527 ++(sys->rlength);
528 if (sys->capacity>0) {
529 rl->varvalue =
530 ASC_NEW_ARRAY(real64,sys->capacity);
531 }
532 }
533 }
534 }
535
536 void linsolqr_remove_rhs(linsolqr_system_t sys,real64 *rhs)
537 {
538 struct rhs_list **q;
539
540 if(CHECK_SYSTEM(sys)) {
541 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. RHS not removed.");
542 return;
543 }
544 for( q = &(sys->rl) ; NOTNULL(*q) ; q = &((*q)->next) )
545 if( (*q)->rhs == rhs )
546 break;
547 if( NOTNULL(*q) ) {
548 struct rhs_list *p;
549 p = *q;
550 *q = p->next;
551 if( NOTNULL(p->varvalue) )
552 ascfree( (POINTER)(p->varvalue) );
553 ascfree( (POINTER)p );
554 --(sys->rlength);
555 } else if( NOTNULL(rhs) ) {
556 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Rhs does not exist.");
557 }
558 }
559
560 int32 linsolqr_number_of_rhs(linsolqr_system_t sys)
561 {
562 CHECK_SYSTEM(sys);
563 return( sys->rlength );
564 }
565
566 real64 *linsolqr_get_rhs(linsolqr_system_t sys,int n)
567 {
568 struct rhs_list *rl;
569 int count;
570
571 CHECK_SYSTEM(sys);
572
573 count = sys->rlength - 1 - n;
574 if( count < 0 ) return(NULL);
575 for( rl = sys->rl ; count > 0 && NOTNULL(rl) ; rl = rl->next )
576 --count;
577 return( ISNULL(rl) ? NULL : rl->rhs );
578 }
579
580 void linsolqr_matrix_was_changed(linsolqr_system_t sys)
581 {
582 if(CHECK_SYSTEM(sys)) {
583 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. matrix change message ignored.");
584 return;
585 }
586 sys->rowdeps = sys->coldeps = sys->factored = FALSE;
587 }
588
589 void linsolqr_rhs_was_changed(linsolqr_system_t sys, real64 *rhs)
590 {
591 struct rhs_list *rl;
592
593 if(CHECK_SYSTEM(sys)) {
594 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. rhs change ignored.");
595 return;
596 }
597 rl = find_rhs(sys->rl,rhs);
598 if( NOTNULL(rl) ) {
599 rl->solved = FALSE;
600 } else if( NOTNULL(rhs) ) {
601 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Rhs does not exist.");
602 }
603 }
604
605 void linsolqr_set_pivot_zero(linsolqr_system_t sys,real64 pivot_zero)
606 {
607 if(CHECK_SYSTEM(sys)) {
608 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. set_pivot_zero ignored.");
609 return;
610 }
611 if( pivot_zero < D_ZERO ) {
612 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Invalid pivot zero of %g",pivot_zero);
613 } else {
614 sys->pivot_zero = pivot_zero;
615 linsolqr_matrix_was_changed(sys);
616 }
617 }
618
619 real64 linsolqr_pivot_zero(linsolqr_system_t sys)
620 {
621 CHECK_SYSTEM(sys);
622 return( sys->pivot_zero );
623 }
624
625 void linsolqr_set_pivot_tolerance(linsolqr_system_t sys, real64 ptol)
626 {
627 if(CHECK_SYSTEM(sys)) {
628 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. set_pivot_tol ignored.");
629 return;
630 }
631 if( ptol < D_ZERO || ptol > D_ONE ) {
632 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Invalid pivot tolerance of %g",ptol);
633 } else {
634 sys->ptol = ptol;
635 linsolqr_matrix_was_changed(sys);
636 }
637 }
638
639 void linsolqr_set_condition_tolerance(linsolqr_system_t sys, real64 ctol)
640 {
641 if(CHECK_SYSTEM(sys)) {
642 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. set_condition_tolerance ignored.");
643 return;
644 }
645 if( ctol < D_ZERO || ctol > D_ONE ) {
646 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Invalid condition tolerance of %g.",ctol);
647 } else {
648 sys->ctol = ctol;
649 linsolqr_matrix_was_changed(sys);
650 }
651 }
652
653 void linsolqr_set_drop_tolerance(linsolqr_system_t sys, real64 dtol)
654 {
655 if(CHECK_SYSTEM(sys)) {
656 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Bad linsolqr_system_t found. set_drop_tolerance ignored.");
657 return;
658 }
659 if( dtol < D_ZERO || dtol > D_ONE ) {
660 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Invalid drop tolerance of %g.",dtol);
661 } else {
662 sys->dtol = dtol;
663 linsolqr_matrix_was_changed(sys);
664 }
665 }
666
667 real64 linsolqr_pivot_tolerance(linsolqr_system_t sys)
668 {
669 CHECK_SYSTEM(sys);
670 return( sys->ptol );
671 }
672
673 real64 linsolqr_condition_tolerance(linsolqr_system_t sys)
674 {
675 CHECK_SYSTEM(sys);
676 return( sys->ctol );
677 }
678
679 real64 linsolqr_drop_tolerance(linsolqr_system_t sys)
680 {
681 CHECK_SYSTEM(sys);
682 return( sys->dtol );
683 }
684
685 extern enum factor_class linsolqr_fclass(linsolqr_system_t sys)
686 {
687 CHECK_SYSTEM(sys);
688 return( sys->fclass );
689 }
690 extern enum factor_method linsolqr_fmethod(linsolqr_system_t sys)
691 {
692 CHECK_SYSTEM(sys);
693 return( sys->fmethod );
694 }
695 extern enum reorder_method linsolqr_rmethod(linsolqr_system_t sys)
696 {
697 CHECK_SYSTEM(sys);
698 return( sys->rmethod );
699 }
700
701 int32 linsolqr_rank(linsolqr_system_t sys)
702 {
703 CHECK_SYSTEM(sys);
704 if( !sys->factored ) {
705 ERROR_REPORTER_HERE(ASC_PROG_ERR,"System not factored yet.");
706 }
707 return(sys->rank);
708 }
709
710 real64 linsolqr_smallest_pivot(linsolqr_system_t sys)
711 {
712 CHECK_SYSTEM(sys);
713 #if LINSOL_DEBUG
714 if( !sys->factored ) {
715 ERROR_REPORTER_HERE(ASC_PROG_ERR,"System not factored yet.");
716 }
717 #endif
718 return(sys->smallest_pivot);
719 }
720
721 /***************************************************************************\
722 Commonly used internal functions for sparse linear solvers based on mtx.
723 void ensure_capacity(sys)
724 void ensure_lu_capacity(sys)
725 void ensure_qr_capacity(sys)
726 void forward_substitute(sys,rvec,transpose)
727 void backward_substitute(sys,rvec,transpose)
728 macro SQR(x)
729 int find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi)
730 \***************************************************************************/
731
732 static real64 *raise_capacity(real64 *vec,
733 int32 oldcap,
734 int32 newcap)
735 /**
736 *** Raises the capacity of the array and returns a new array.
737 *** It is assumed that oldcap < newcap. vec is destroyed or
738 *** returned as appropriate.
739 *** If !NDEBUG, the vector expanded is also set to 0.
740 **/
741 {
742 real64 *newvec=NULL;
743 #ifndef NDEBUG
744 int i;
745 #endif
746 if (newcap < oldcap) {
747 #ifndef NDEBUG
748 for (i = 0; i < newcap; i++) {
749 newvec[i] = 0.0;
750 }
751 #endif
752 return vec;
753 }
754 if (NOTNULL(vec)) {
755 /* don't call realloc on null with newcap 0 or it frees */
756 newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64)));
757 } else {
758 newvec=(newcap > 0 ?
759 ASC_NEW_ARRAY(real64,newcap ) : NULL);
760 }
761 #ifndef NDEBUG
762 for (i = 0; i < newcap; i++) {
763 newvec[i] = 0.0;
764 }
765 #endif
766 return newvec;
767 }
768
769 static int32 *raise_qr_int_capacity(int32 *vec,
770 int32 oldcap,
771 int32 newcap)
772 /**
773 *** Raises the capacity of the index array and returns a new array.
774 *** It is assumed that oldcap < newcap. vec is destroyed or
775 *** returned as appropriate. vec returned is zeroed.
776 *** calling with newcap=0 does not force deallocation.
777 **/
778 {
779 int32 *newvec=NULL;
780 if (newcap < oldcap)
781 return vec;
782 if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
783 newvec=(int32 *)ascrealloc(vec,(newcap * sizeof(int32)));
784 mtx_zero_int32(vec,newcap);
785 } else {
786 newvec= (newcap > 0 ? ASC_NEW_ARRAY_CLEAR(int32,newcap) : NULL);
787 }
788 return newvec;
789 }
790
791 static real64 *raise_qr_real_capacity(real64 *vec,
792 int32 oldcap,
793 int32 newcap)
794 /**
795 *** Raises the capacity of the real array and returns a new array.
796 *** It is assumed that oldcap < newcap. vec is destroyed or
797 *** returned as appropriate. vec returned is zeroed.
798 *** calling with newcap=0 does not force deallocation.
799 **/
800 {
801 real64 *newvec=NULL;
802 if (newcap < oldcap)
803 return vec;
804 if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
805 newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64)));
806 mtx_zero_real64(vec,newcap);
807 } else {
808 newvec= (newcap > 0 ? ASC_NEW_ARRAY_CLEAR(real64,newcap) : NULL);
809 }
810 return newvec;
811 }
812
813 static struct qr_fill_t *raise_qr_fill_capacity(struct qr_fill_t *vec,
814 int32 oldcap,
815 int32 newcap)
816 /**
817 *** Raises the capacity of the fill array and returns a new array.
818 *** It is assumed that oldcap < newcap. vec is destroyed or
819 *** returned as appropriate. vec returned is zeroed.
820 *** calling with newcap=0 does not force deallocation.
821 **/
822 {
823 struct qr_fill_t *newvec=NULL;
824 if (newcap < oldcap)
825 return vec;
826 if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
827 newvec=(struct qr_fill_t *)
828 ascrealloc(vec,(newcap * sizeof(struct qr_fill_t)));
829 mtx_zero_char((char *)vec,newcap*sizeof(struct qr_fill_t));
830 }else{
831 newvec= (newcap > 0 ? ASC_NEW_ARRAY_CLEAR(struct qr_fill_t, newcap) : NULL);
832 }
833 return newvec;
834 }
835
836 static void ensure_capacity(linsolqr_system_t sys)
837 /**
838 *** ensures that the capacity of all of the solution vectors
839 *** for each rhs is large enough.
840 *** The above implies a malloc if varvalue is null.
841 *** Assumes varvalue are at sys->capacity already.
842 **/
843 {
844 int32 req_cap;
845 req_cap = mtx_capacity(sys->coef);
846
847 if( req_cap > sys->capacity ) {
848 struct rhs_list *rl;
849
850 for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
851 rl->varvalue = raise_capacity(rl->varvalue,sys->capacity,req_cap);
852 sys->capacity = req_cap;
853 }
854 }
855
856 static void ensure_lu_capacity(linsolqr_system_t sys)
857 /**
858 *** ensures that the capacity of all of the ludata vectors.
859 *** The above implies a malloc if vector is null.
860 *** If not null, implies an extension if needed.
861 **/
862 {
863 int32 req_cap;
864 if (!sys || !(sys->coef) || !(sys->ludata)) {
865 ERROR_REPORTER_HERE(ASC_PROG_ERR,"called with NULL pointer");
866 return;
867 }
868 req_cap = mtx_capacity(sys->coef);
869
870 if( req_cap > sys->ludata->cap ) {
871 sys->ludata->pivlist =
872 raise_capacity(sys->ludata->pivlist,sys->ludata->cap,req_cap);
873 sys->ludata->tmp =
874 raise_capacity(sys->ludata->tmp,sys->ludata->cap,req_cap);
875 sys->ludata->cap = req_cap;
876 }
877 }
878
879 static void ensure_qr_capacity(linsolqr_system_t sys)
880 /**
881 *** ensures that the capacity of all of the qrdata vectors
882 *** is large enough.
883 *** The above implies a calloc if vector is null.
884 *** If not null, implies an extension and zeroing of the vector.
885 *** Also zeroes the simple elements of the qrdata.
886 **/
887 {
888 int32 req_cap;
889 if (!sys || !(sys->coef) || !(sys->qrdata)) {
890 ERROR_REPORTER_HERE(ASC_PROG_ERR,"called with NULL pointer");
891 return;
892 }
893 req_cap = mtx_capacity(sys->coef);
894
895 if( req_cap > sys->qrdata->cap ) {
896 sys->qrdata->alpha =
897 raise_qr_real_capacity(sys->qrdata->alpha,sys->qrdata->cap,req_cap);
898 sys->qrdata->sigma =
899 raise_qr_real_capacity(sys->qrdata->sigma,sys->qrdata->cap,req_cap);
900 sys->qrdata->tau =
901 raise_qr_real_capacity(sys->qrdata->tau,sys->qrdata->cap,req_cap);
902 sys->qrdata->hhcol =
903 raise_qr_real_capacity(sys->qrdata->hhcol,sys->qrdata->cap,req_cap);
904 sys->qrdata->hhrow =
905 raise_qr_real_capacity(sys->qrdata->hhrow,sys->qrdata->cap,req_cap);
906
907 sys->qrdata->sp.data =
908 raise_qr_real_capacity(sys->qrdata->sp.data,sys->qrdata->cap,req_cap);
909 sys->qrdata->sp.idata =
910 raise_qr_int_capacity(sys->qrdata->sp.idata,sys->qrdata->cap,req_cap);
911
912 sys->qrdata->fill =
913 raise_qr_fill_capacity(sys->qrdata->fill,sys->qrdata->cap,req_cap);
914
915 sys->qrdata->cap = req_cap;
916 sys->qrdata->sp.cap = req_cap;
917 sys->qrdata->sp.len = 0;
918 }
919 sys->qrdata->nu=D_ZERO;
920 sys->qrdata->anu=D_ZERO;
921 sys->qrdata->asubnu=D_ZERO;
922 }
923
924 static void forward_substitute(linsolqr_system_t sys,
925 real64 *arr,
926 boolean transpose)
927 /**
928 *** Forward substitute. It is assumed that the L (or U) part of
929 *** sys->factors is computed. This function converts c to x in place. The
930 *** values are stored in arr indexed by original row number (or original
931 *** column number).
932 ***
933 *** transpose = FALSE: transpose = TRUE:
934 *** T
935 *** L x = c U x = c
936 ***
937 *** The following formula holds:
938 *** 0<=k<r ==> x(k) = [c(k) - L(k,(0..k-1)) dot x(0..k-1)] / L(k,k)
939 *** or
940 *** 0<=k<r ==> x(k) = [c(k) - U((0..k-1),k) dot x(0..k-1)] / U(k,k)
941 ***
942 *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k]
943 *** and in matrix.
944 **/
945 {
946 mtx_range_t dot_rng;
947 mtx_coord_t nz;
948 real64 sum, *pivlist;
949 mtx_matrix_t mtx;
950 int32 dotlim;
951 boolean nonzero_found=FALSE;
952
953 mtx=sys->factors;
954 pivlist=sys->ludata->pivlist;
955 dot_rng.low = sys->rng.low;
956 dotlim=dot_rng.low+sys->rank;
957 if (transpose) { /* arr is indexed by original column number */
958 for( nz.col=dot_rng.low; nz.col < dotlim; ++(nz.col) ) {
959 register int32 org_col;
960
961 dot_rng.high = nz.col - 1;
962 org_col = mtx_col_to_org(mtx,nz.col);
963 if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
964 if (nonzero_found) {
965 sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE);
966 /* arr[org_col] = (arr[org_col] - sum) / D_ONE */;
967 arr[org_col] -=sum;
968 }
969 }
970
971 } else { /* arr is indexed by original row number */
972 for( nz.row=dot_rng.low; nz.row < dotlim; ++(nz.row) ) {
973 register int32 org_row;
974
975 dot_rng.high = nz.row - 1;
976 org_row = mtx_row_to_org(mtx,nz.row);
977 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
978 if (nonzero_found) {
979 sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE);
980 /*
981 nz.col = nz.row;
982 arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz);
983 */
984 arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row];
985 }
986 }
987 }
988 }
989
990 static void backward_substitute(linsolqr_system_t sys,
991 real64 *arr,
992 boolean transpose)
993 /**
994 *** Backward substitute. It is assumed that the U (or L) part of
995 *** sys->factors is computed. This function converts rhs to c in place. The
996 *** values are stored in arr indexed by original row number (or original
997 *** column number).
998 ***
999 *** transpose = FALSE: transpose = TRUE:
1000 *** T
1001 *** U c = rhs L c = rhs
1002 ***
1003 *** The following formula holds:
1004 *** transpose=FALSE: (the usual for J.dx=-f where rhs is -f)
1005 *** 0<=k<r ==> c(k) = [rhs(k) - U(k,(k+1..r-1)) dot c(k+1..r-1)] / U(k,k)
1006 *** working up from the bottom.
1007 *** or
1008 *** transpose=TRUE:
1009 *** 0<=k<r ==> c(k) = [rhs(k) - L((k+1..r-1),k) dot c(k+1..r-1)] / L(k,k)
1010 *** working right to left.
1011 ***
1012 *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k]
1013 *** and in matrix.
1014 **/
1015 {
1016 mtx_range_t dot_rng;
1017 mtx_coord_t nz;
1018 real64 sum, *pivlist;
1019 mtx_matrix_t mtx;
1020 int32 dotlim;
1021 boolean nonzero_found=FALSE; /* once TRUE, substitution must be done
1022 over remaining rows/cols */
1023
1024 dot_rng.high = sys->rng.low + sys->rank - 1;
1025 dotlim=sys->rng.low;
1026 mtx=sys->factors;
1027 pivlist=sys->ludata->pivlist;
1028 if (transpose) { /* arr is indexed by original column number */
1029 for( nz.col = dot_rng.high ; nz.col >= dotlim ; --(nz.col) ) {
1030 register int32 org_col;
1031
1032 dot_rng.low = nz.col + 1;
1033
1034 org_col = mtx_col_to_org(mtx,nz.col);
1035 if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
1036 if (nonzero_found) {
1037 sum= mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE);
1038 /*
1039 reminders:
1040 nz.row = nz.col;
1041 arr[org_col] = (arr[org_col] - sum) / mtx_value(mtx,&nz);
1042 */
1043 arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col];
1044 }
1045 }
1046 } else { /* arr is indexed by original row number */
1047 /* we are working from the bottom up */
1048 for( nz.row = dot_rng.high ; nz.row >= dotlim ; --(nz.row) ) {
1049 register int32 org_row;
1050
1051 dot_rng.low = nz.row + 1;
1052 org_row = mtx_row_to_org(mtx,nz.row);
1053
1054 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
1055 if (nonzero_found) {
1056 sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE);
1057 /*
1058 reminders:
1059 nz.row = nz.col;
1060 arr[org_row] = (arr[org_row] - sum) / D_ONE;
1061 */
1062 arr[org_row] -= sum;
1063 }
1064 }
1065 }
1066 }
1067
1068 #ifdef SQR
1069 #undef SQR
1070 #endif
1071 #define SQR(x) ((x)*(x))
1072 /*
1073 * int32 find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi);
1074 * real64 *vec,*rvec,tol;
1075 * int32 len, *ivec, *maxi;
1076 *
1077 * Search array vec of positive numbers for the first entry, k, which passes
1078 * (vec[k] >= tol*vecmax) where vecmax is the largest value in vec.
1079 * vec is an array len long. rvec, ivec are (worst case) len long.
1080 * *maxi will be set to the index of the first occurence of the largest
1081 * number in vec which is >= eps.
1082 * 0.0 <= tol <= 1.
1083 * If tol <=0, no search is done (*maxi = 0, k = 0).
1084 * Eps is an absolute number which vec values must be >= to before
1085 * being eligible for the tol test.
1086 *
1087 * Best case, highly nonlinear feature:
1088 * if on entry *maxi = len, we will do a simplified search for the
1089 * first k which satisfies tol and eps tests, based on the value
1090 * stored at vec[len-1].
1091 *
1092 * We could allocate and deallocate rvec/ivec since they are
1093 * temporaries, but it is more efficient for caller to manage that.
1094 * If tol>=1.0, rvec and ivec are ignored and may be NULL.
1095 * NOTE: we are going on value vec[i], not fabs(vec[i]).
1096 * Returns k.
1097 * Remember: GIGO.
1098 * Failure modes- if all numbers are <= 0.0, k and *maxi will be returned 0.
1099 * Failure modes- if all numbers are < eps, k and *maxi will be returned 0.
1100 *
1101 * find_pivot_index, should we implement, would search an int vector.
1102 *
1103 * Hint: if you know you have not increased any values in vec and have not
1104 * changed the value at vec[maxi] before a subsequent search, then call
1105 * with len = previous maxi+1 to reduce the search time.
1106 */
1107 static int32 find_pivot_number(const real64 *vec,
1108 const int32 len,
1109 const real64 tol,
1110 const real64 eps,
1111 int32 * const ivec,
1112 real64 * const rvec,
1113 int32 * const maxi)
1114 {
1115 int32 j,k;
1116 if (tol <= D_ZERO ) {
1117 *maxi = 0;
1118 return 0;
1119 }
1120 if (tol >= D_ONE) {
1121 register real64 biggest = MAX(eps,D_ZERO);
1122 /* get the biggest, period */
1123 k = 0;
1124 for (j=0; j < len; j++) {
1125 if (vec[j] > biggest) {
1126 biggest = vec[j];
1127 k = j;
1128 }
1129 }
1130 *maxi = k;
1131 } else {
1132 int32 bigi;
1133 bigi = 0;
1134 rvec[0] = eps;
1135 ivec[0] = 0;
1136 if (*maxi==len) {
1137 /* cheap search against eps and tol, no list. */
1138 register real64 thold;
1139 thold = tol * vec[len-1];
1140 if (thold >= eps) {
1141 for (k = 0; k < len && vec[k] < thold; k++);
1142 } else {
1143 for (k = 0; k < len && vec[k] < eps; k++);
1144 }
1145 /* adjust maxi to still point at last location, as indicated by len */
1146 (*maxi)--;
1147 } else {
1148 real64 rlast;
1149 rlast = eps;
1150 /* create short list */
1151 for (j=0; j < len; j++) {
1152 while (j < len && vec[j] <= rlast) j++; /* skip to next max */
1153 if (j < len) {
1154 ivec[bigi] = j;
1155 rvec[bigi] = vec[j];
1156 bigi++; /* bigi ends up being len of rvec or, if vec all<= eps, 0 */
1157 rlast = rvec[bigi-1];
1158 }
1159 }
1160 if (bigi == 0) {
1161 *maxi = k = 0;
1162 } else {
1163 register real64 thold;
1164 /* Note: if bigi is enormous, we should do a binary search,
1165 not linear. */
1166 *maxi = ivec[bigi-1];
1167 thold = tol * rvec[bigi-1];
1168 /* get pivot from shortlist, searching backward */
1169 if (thold >= eps) {
1170 for (k = bigi-1; k >= 0 && rvec[k] >= thold; k--);
1171 } else {
1172 for (k = bigi-1; k >= 0 && rvec[k] >= eps; k--);
1173 }
1174 /* translate pivot to vec index */
1175 k = ivec[k+1];
1176 }
1177 }
1178 }
1179 return k;
1180 }
1181
1182 static void calc_dependent_rows_ranki1(linsolqr_system_t sys)
1183 {
1184 mtx_coord_t nz;
1185 real64 value;
1186 mtx_range_t colrange;
1187 mtx_range_t rowrange;
1188 real64 *lc;
1189 mtx_matrix_t mtx;
1190
1191 sys->rowdeps = TRUE;
1192 if( ( (sys->reg.row.low == sys->rng.low) &&
1193 ( sys->reg.row.high == sys->rng.low+sys->rank-1 )
1194 ) || sys->rank==0 )
1195 return;
1196
1197 lc = sys->ludata->tmp;
1198 colrange.low = sys->rng.low;
1199 colrange.high = colrange.low + sys->rank - 1;
1200 rowrange.low = sys->rng.high;
1201 rowrange.high = sys->rng.low+sys->rank;
1202 mtx=sys->factors;
1203
1204 nz.row = sys->reg.row.low;
1205 for( ; nz.row <= sys->reg.row.high; nz.row++ ) {
1206 if( nz.row == sys->rng.low ) {
1207 nz.row = rowrange.high-1;
1208 continue;
1209 }
1210 mtx_zero_real64(lc,(sys->capacity));
1211 mtx_org_row_vec(mtx,nz.row,lc,&colrange);
1212 if( nz.row < rowrange.high || nz.row > rowrange.low )
1213 backward_substitute(sys,lc,TRUE);
1214 forward_substitute(sys,lc,TRUE);
1215 mtx_clear_row(mtx,nz.row,&colrange);
1216 for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) {
1217 value = lc[mtx_col_to_org(mtx,nz.col)];
1218 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
1219 }
1220 }
1221 }
1222
1223
1224 static void calc_dependent_cols_ranki1(linsolqr_system_t sys)
1225 {
1226 mtx_coord_t nz;
1227 real64 value;
1228 mtx_range_t rowrange;
1229 mtx_range_t colrange;
1230 real64 *lc;
1231 mtx_matrix_t mtx;
1232
1233 sys->coldeps = TRUE;
1234 if( ( (sys->reg.col.low == sys->rng.low) &&
1235 ( sys->reg.col.high == sys->rng.low+sys->rank-1 )
1236 ) || sys->rank==0 )
1237 return;
1238
1239 lc = sys->ludata->tmp;
1240 rowrange.low = sys->rng.low;
1241 rowrange.high = rowrange.low + sys->rank - 1;
1242 colrange.high = sys->rng.low+sys->rank;
1243 colrange.low = sys->rng.high;
1244 mtx=sys->factors;
1245
1246 nz.col = sys->reg.col.low;
1247 for( ; nz.col <= sys->reg.col.high; nz.col++ ) {
1248 if( nz.col == sys->rng.low ) {
1249 nz.col = colrange.high-1;
1250 continue;
1251 }
1252 mtx_zero_real64(lc,sys->capacity);
1253 mtx_org_col_vec(mtx,nz.col,lc,&rowrange);
1254 if( nz.col < colrange.high || nz.col > colrange.low )
1255 backward_substitute(sys,lc,FALSE);
1256 forward_substitute(sys,lc,FALSE);
1257 mtx_clear_col(mtx,nz.col,&rowrange);
1258 for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) {
1259 value = lc[mtx_row_to_org(mtx,nz.row)];
1260 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
1261 }
1262 }
1263 }
1264
1265 /**
1266 *** Given a matrix and a diagonal range, sets the diagonal elements to 0
1267 *** but does not delete them in the range low to high inclusive.
1268 *** worst cost= k*(nonzeros in rows of range treated).
1269 *** likely cost= k*(nonzeros in column pivoted rows of range treated)
1270 *** since you can smartly put in the diagonal last and find
1271 *** it first on many occasions.
1272 *** This is a good bit cheaper than deleting the elements unless one
1273 *** expects thousands of solves.
1274 **/
1275 static void zero_diagonal_elements(mtx_matrix_t mtx,
1276 int32 low, int32 high)
1277 {
1278 mtx_coord_t nz;
1279 for (nz.row = low; nz.row <= high; nz.row++) {
1280 nz.col = nz.row;
1281 mtx_set_value(mtx,&nz,0.0);
1282 }
1283 }
1284
1285 static void zero_unpivoted_vars(linsolqr_system_t sys,
1286 real64 *varvalues,
1287 boolean transpose)
1288 /**
1289 *** Sets the values of unpivoted variables to zero.
1290 **/
1291 {
1292 int32 ndx,order;
1293 mtx_matrix_t mtx;
1294
1295 mtx = sys->factors;
1296 order = mtx_order(mtx);
1297 for( ndx = 0 ; ndx < sys->rng.low ; ++ndx )
1298 if (transpose)
1299 varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO;
1300 else
1301 varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO;
1302
1303 for( ndx = sys->rng.low + sys->rank ; ndx < order ; ++ndx )
1304 if (transpose)
1305 varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO;
1306 else
1307 varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO;
1308 }
1309
1310 #if LINSOL_DEBUG
1311 static void debug_out_factors(FILE *fp,linsolqr_system_t sys)
1312 /**
1313 *** Outputs permutation and values of the nonzero elements in the
1314 *** factor matrix square region given by sys->rng.
1315 **/
1316 {
1317 mtx_region_t reg;
1318 reg.row.low=reg.col.low=sys->rng.low;
1319 reg.row.high=reg.col.high=sys->rng.high;
1320 mtx_write_region(fp,sys->factors,&reg);
1321 }
1322 #endif /* LINSOL_DEBUG */
1323
1324 /***************************************************************************\
1325 Reordering functions for SPK1, and possibly for other schemes to be
1326 implemented later.
1327 The stuff here is almost, but not quite, black magic. Don't tinker with it.
1328 \***************************************************************************/
1329
1330 /*********************************
1331 begin of spk1 stuff
1332 *********************************/
1333 struct reorder_list { /* List of rows/columns and their counts. */
1334 int32 ndx;
1335 int32 count;
1336 struct reorder_list *next;
1337 };
1338
1339 struct reorder_vars {
1340 mtx_matrix_t mtx;
1341 mtx_region_t reg; /* Current active region */
1342 int32 colhigh; /* Original value of reg.col.high */
1343 struct reorder_list *tlist; /* Array of (enough) list elements */
1344 int32 *rowcount; /* rowcount[reg.row.low .. reg.row.high] */
1345 };
1346
1347 static void adjust_row_count(struct reorder_vars *vars,int32 removed_col)
1348 /**
1349 *** Adjusts the row counts to account for the (to be) removed column.
1350 **/
1351 {
1352 mtx_coord_t nz;
1353 real64 value;
1354 nz.col = removed_col;
1355 nz.row = mtx_FIRST;
1356 while( value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row)),
1357 nz.row != mtx_LAST )
1358 --(vars->rowcount[nz.row]);
1359 }
1360
1361 static void assign_row_and_col(struct reorder_vars *vars,
1362 int32 row,
1363 int32 col)
1364 /**
1365 *** Assigns the given row to the given column, moving the row and column
1366 *** to the beginning of the active region and removing them (readjusting
1367 *** the region). The row counts are NOT adjusted. If col == mtx_NONE,
1368 *** then no column is assigned and the row is moved to the end of the
1369 *** active block instead. Otherwise, it is assumed that the column
1370 *** is active.
1371 **/
1372 {
1373 if( col == mtx_NONE ) {
1374 mtx_swap_rows(vars->mtx,row,vars->reg.row.high);
1375 vars->rowcount[row] = vars->rowcount[vars->reg.row.high];
1376 --(vars->reg.row.high);
1377 } else {
1378 mtx_swap_rows(vars->mtx,row,vars->reg.row.low);
1379 vars->rowcount[row] = vars->rowcount[vars->reg.row.low];
1380 mtx_swap_cols(vars->mtx,col,vars->reg.col.low);
1381 ++(vars->reg.row.low);
1382 ++(vars->reg.col.low);
1383 }
1384 }
1385
1386 static void push_column_on_stack(struct reorder_vars *vars,int32 col)
1387 /**
1388 *** Pushes the given column onto the stack. It is assumed that the
1389 *** column is active. Row counts are adjusted.
1390 **/
1391 {
1392 adjust_row_count(vars,col);
1393 mtx_swap_cols(vars->mtx,col,vars->reg.col.high);
1394 --(vars->reg.col.high);
1395 }
1396
1397 static int32 pop_column_from_stack(struct reorder_vars *vars)
1398 /**
1399 *** Pops the column on the "top" of the stack off of the stack and
1400 *** returns the column index, where it now lies in the active region.
1401 *** If the stack is empty, mtx_NONE is returned. Row counts are NOT
1402 *** adjusted (this together with a subsequent assignment of this column
1403 *** ==> no row count adjustment necessary).
1404 **/
1405 {
1406 if( vars->reg.col.high < vars->colhigh )
1407 return(++(vars->reg.col.high));
1408 else
1409 return( mtx_NONE );
1410 }
1411
1412 static void assign_null_rows(struct reorder_vars *vars)
1413 /**
1414 *** Assigns empty rows, moving them to the assigned region. It is
1415 *** assumed that row counts are correct. Columns are assigned off the
1416 *** stack.
1417 **/
1418 {
1419 int32 row;
1420
1421 for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1422 if( vars->rowcount[row] == 0 )
1423 assign_row_and_col(vars , row , pop_column_from_stack(vars));
1424 }
1425
1426 static void forward_triangularize(struct reorder_vars *vars)
1427 /**
1428 *** Forward triangularizes the region, assigning singleton rows with their
1429 *** one and only incident column until there are no more. The row counts
1430 *** must be correct, and they are updated.
1431 **/
1432 {
1433 boolean change;
1434 mtx_coord_t nz;
1435 real64 value;
1436
1437 do {
1438 change = FALSE;
1439 for( nz.row = vars->reg.row.low ;
1440 nz.row <= vars->reg.row.high && vars->rowcount[nz.row] != 1;
1441 ++nz.row ) ;
1442 if( nz.row <= vars->reg.row.high ) {
1443 /* found singleton row */
1444 nz.col = mtx_FIRST; /* this is somehow coming back with nz.col -1 */
1445 value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col));
1446 adjust_row_count(vars,nz.col);
1447 assign_row_and_col(vars,nz.row,nz.col);
1448 change = TRUE;
1449 }
1450 } while( change );
1451 }
1452
1453 static int32 select_row(struct reorder_vars *vars)
1454 /**
1455 *** Selects a row and returns its index. It is assumed that there is a
1456 *** row. Row counts must be correct. vars->tlist will be used.
1457 **/
1458 {
1459 int32 min_row_count;
1460 int32 max_col_count;
1461 int32 row;
1462 int32 i, nties=-2; /* # elements currently defined in vars->tlist */
1463 int32 sum;
1464 mtx_coord_t nz;
1465 real64 value;
1466 mtx_matrix_t mtx;
1467 mtx_range_t *colrng, *rowrng;
1468
1469 /* Set to something > any possible value */
1470 min_row_count = vars->reg.col.high-vars->reg.col.low+2;
1471 for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1472 if( vars->rowcount[row] <= min_row_count ) {
1473 if( vars->rowcount[row] < min_row_count ) {
1474 min_row_count = vars->rowcount[row];
1475 nties = 0;
1476 }
1477 vars->tlist[nties++].ndx = row;
1478 }
1479 /**
1480 *** vars->tlist[0..nties-1] is a list of row numbers which tie for
1481 *** minimum row count.
1482 **/
1483
1484 max_col_count = -1; /* < any possible value */
1485 i = 0;
1486 mtx = vars->mtx;
1487 colrng=&(vars->reg.col);
1488 rowrng=&(vars->reg.row);
1489 while( i < nties ) {
1490
1491 sum = 0;
1492 nz.row = vars->tlist[i].ndx;
1493 nz.col = mtx_FIRST;
1494 while( value = mtx_next_in_row(mtx,&nz,colrng),
1495 nz.col != mtx_LAST )
1496 sum += mtx_nonzeros_in_col(mtx,nz.col,rowrng);
1497 if( sum > max_col_count ) {
1498 max_col_count = sum;
1499 row = nz.row;
1500 }
1501 i++;
1502 }
1503 /**
1504 *** Now row contains the row with the minimum row count, which has the
1505 *** greatest total column count of incident columns among all rows with
1506 *** the same (minimum) row count. Select it.
1507 **/
1508 return(row);
1509 }
1510
1511 static void spk1_reorder(struct reorder_vars *vars)
1512 /**
1513 *** Reorders the assigned matrix vars->mtx within the specified bounding
1514 *** block region vars->reg. The region is split into 6 subregions during
1515 *** reordering: the rows are divided in two, and the columns divided in
1516 *** three. Initially everything is in the active subregion. Ultimately,
1517 *** everything will be assigned.
1518 ***
1519 *** <-- assigned -->|<-- active-->|<-- on stack -->|
1520 *** ----+----------------+-------------+----------------+
1521 *** a | | | |
1522 *** s | | | |
1523 *** s | | | |
1524 *** i | (SQUARE) | | |
1525 *** g | | | |
1526 *** n | | | |
1527 *** e | | | |
1528 *** d | | | |
1529 *** ----+----------------+-------------+----------------+
1530 *** a | | | |
1531 *** c | | ACTIVE | |
1532 *** t | | REGION | |
1533 *** i | | | |
1534 *** v | | | |
1535 *** e | | | |
1536 *** ----+----------------+-------------+----------------+
1537 ***
1538 *** The algorithm is roughly as follows:
1539 *** (1) select a row (by some criterion).
1540 *** (2) push columns incident on that row onto the stack in decreasing
1541 *** order of their length.
1542 *** (3) pop first column off the stack and assign it to the selected
1543 *** row.
1544 *** (4) forward-triangularize (assign singleton rows with their one
1545 *** and only incident column, until no longer possible).
1546 ***
1547 *** (1)-(4) should be repeated until the active subregion becomes empty.
1548 ***
1549 *** Everything above was written as though the entire matrix is
1550 *** involved. In reality, only the relevant square region is involved.
1551 **/
1552 {
1553 int32 row, size;
1554 int32 *rowcount_array_origin;
1555 mtx_matrix_t mtx;
1556
1557 size = MAX(vars->reg.row.high,vars->reg.col.high) + 1;
1558 vars->tlist = size > 0 ? (struct reorder_list *)
1559 ascmalloc( size*sizeof(struct reorder_list) ) : NULL;
1560 vars->rowcount = rowcount_array_origin = size > 0 ? (int32 *)
1561 ascmalloc( size*sizeof(int32) ) : NULL;
1562 mtx = vars->mtx;
1563
1564 vars->colhigh = vars->reg.col.high;
1565 /* Establish row counts */
1566 for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1567 vars->rowcount[row] =
1568 mtx_nonzeros_in_row(mtx,row,&(vars->reg.col));
1569
1570 while(TRUE) {
1571 struct reorder_list *head;
1572 int32 nelts; /* # elements "allocated" from vars->tlist */
1573 mtx_coord_t nz;
1574 real64 value;
1575
1576 forward_triangularize(vars);
1577 assign_null_rows(vars);
1578 if( vars->reg.row.low>vars->reg.row.high ||
1579 vars->reg.col.low>vars->reg.col.high ) {
1580 /* Active region is now empty, done */
1581 if( NOTNULL(vars->tlist) )
1582 ascfree( vars->tlist );
1583 if( NOTNULL(rowcount_array_origin) )
1584 ascfree( rowcount_array_origin );
1585 return;
1586 }
1587
1588 head = NULL;
1589 nelts = 0;
1590 nz.row = select_row(vars);
1591 nz.col = mtx_FIRST;
1592 while( value = mtx_next_in_row(mtx,&nz,&(vars->reg.col)),
1593 nz.col != mtx_LAST ) {
1594 struct reorder_list **q,*p;
1595
1596 p = &(vars->tlist[nelts++]);
1597 p->ndx = mtx_col_to_org(mtx,nz.col);
1598 p->count = mtx_nonzeros_in_col(mtx,nz.col,&(vars->reg.row));
1599 for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) );
1600 p->next = *q;
1601 *q = p;
1602 }
1603 /**
1604 *** We now have a list of columns which intersect the selected row.
1605 *** The list is in order of decreasing column count.
1606 **/
1607
1608 /* Push incident columns on stack */
1609 for( ; NOTNULL(head) ; head = head->next )
1610 push_column_on_stack(vars,mtx_org_to_col(mtx,head->ndx));
1611
1612 /* Pop column off stack and assign to selected row */
1613 assign_row_and_col(vars , nz.row , pop_column_from_stack(vars));
1614 }
1615 /* Not reached. */
1616 }
1617
1618 /*********************************
1619 end of spk1 stuff
1620 *********************************/
1621 /*********************************
1622 begin of tspk1 stuff
1623 *********************************/
1624 struct creorder_list { /* List of columns/rows and their counts. */
1625 int32 ndx;
1626 int32 count;
1627 struct creorder_list *next;
1628 };
1629
1630 struct creorder_vars {
1631 mtx_matrix_t mtx;
1632 mtx_region_t reg; /* Current active region */
1633 int32 rowhigh; /* Original value of reg.row.high */
1634 struct creorder_list *tlist; /* Array of (enough) list elements */
1635 int32 *colcount; /* colcount[reg.col.low .. reg.col.high] */
1636 };
1637
1638 static void adjust_col_count(struct creorder_vars *vars,int32 removed_row)
1639 /**
1640 *** Adjusts the column counts to account for the (to be) removed row.
1641 **/
1642 {
1643 mtx_coord_t nz;
1644 real64 value;
1645 nz.row = removed_row;
1646 nz.col = mtx_FIRST;
1647 while( value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col)),
1648 nz.col != mtx_LAST )
1649 --(vars->colcount[nz.col]);
1650 }
1651
1652 static void assign_col_and_row(struct creorder_vars *vars,
1653 int32 col,
1654 int32 row)
1655 /**
1656 *** Assigns the given row to the given column, moving the row and column
1657 *** to the beginning of the active region and removing them (readjusting
1658 *** the region). The col counts are NOT adjusted. If col == mtx_NONE,
1659 *** then no column is assigned and the col is moved to the end of the
1660 *** active block instead. Otherwise, it is assumed that the row
1661 *** is active.
1662 **/
1663 {
1664 if( row == mtx_NONE ) {
1665 mtx_swap_cols(vars->mtx,col,vars->reg.col.high);
1666 vars->colcount[col] = vars->colcount[vars->reg.col.high];
1667 --(vars->reg.col.high);
1668 } else {
1669 mtx_swap_cols(vars->mtx,col,vars->reg.col.low);
1670 vars->colcount[col] = vars->colcount[vars->reg.col.low];
1671 mtx_swap_rows(vars->mtx,row,vars->reg.row.low);
1672 ++(vars->reg.col.low);
1673 ++(vars->reg.row.low);
1674 }
1675 }
1676
1677 static void push_row_on_stack(struct creorder_vars *vars,int32 row)
1678 /**
1679 *** Pushes the given row onto the stack. It is assumed that the
1680 *** row is active. Col counts are adjusted.
1681 **/
1682 {
1683 adjust_col_count(vars,row);
1684 mtx_swap_rows(vars->mtx,row,vars->reg.row.high);
1685 --(vars->reg.row.high);
1686 }
1687
1688 static int32 pop_row_from_stack(struct creorder_vars *vars)
1689 /**
1690 *** Pops the row on the "top" of the stack off of the stack and
1691 *** returns the row index, where it now lies in the active region.
1692 *** If the stack is empty, mtx_NONE is returned. Col counts are NOT
1693 *** adjusted (this together with a subsequent assignment of this row
1694 *** ==> no col count adjustment necessary).
1695 **/
1696 {
1697 if( vars->reg.row.high < vars->rowhigh )
1698 return(++(vars->reg.row.high));
1699 else
1700 return( mtx_NONE );
1701 }
1702
1703 static void assign_null_cols(struct creorder_vars *vars)
1704 /**
1705 *** Assigns empty cols, moving them to the assigned region. It is
1706 *** assumed that col counts are correct. Rows are assigned off the
1707 *** stack.
1708 **/
1709 {
1710 int32 col;
1711
1712 for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1713 if( vars->colcount[col] == 0 )
1714 assign_col_and_row(vars , col , pop_row_from_stack(vars));
1715 }
1716
1717 static void cforward_triangularize(struct creorder_vars *vars)
1718 /**
1719 *** Forward triangularizes the region, assigning singleton columns with their
1720 *** one and only incident row until there are no more. The column counts
1721 *** must be correct, and they are updated.
1722 **/
1723 {
1724 boolean change;
1725
1726 do {
1727 mtx_coord_t nz;
1728 real64 value;
1729 change = FALSE;
1730 for( nz.col = vars->reg.col.low ;
1731 nz.col <= vars->reg.col.high && vars->colcount[nz.col] != 1;
1732 ++nz.col ) ;
1733 if( nz.col <= vars->reg.col.high ) {
1734 /* found singleton col */
1735 nz.row = mtx_FIRST;
1736 value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row));
1737 adjust_col_count(vars,nz.row);
1738 assign_col_and_row(vars,nz.col,nz.row);
1739 change = TRUE;
1740 }
1741 } while( change );
1742 }
1743
1744 static int32 select_col(struct creorder_vars *vars)
1745 /**
1746 *** Selects a col and returns its index. It is assumed that there is a
1747 *** col. Col counts must be correct. vars->tlist will be used.
1748 **/
1749 {
1750 int32 min_col_count;
1751 int32 max_row_count;
1752 int32 col;
1753 int32 i, nties=-2; /* # elements currently defined in vars->tlist */
1754 int32 sum;
1755 mtx_coord_t nz;
1756 real64 value;
1757 mtx_matrix_t mtx;
1758 mtx_range_t *colrng, *rowrng;
1759
1760 /* Set to something > any possible value */
1761 min_col_count = vars->reg.row.high-vars->reg.row.low+2;
1762 for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1763 if( vars->colcount[col] <= min_col_count ) {
1764 if( vars->colcount[col] < min_col_count ) {
1765 min_col_count = vars->colcount[col];
1766 nties = 0;
1767 }
1768 vars->tlist[nties++].ndx = col;
1769 }
1770 /**
1771 *** vars->tlist[0..nties-1] is a list of row numbers which tie for
1772 *** minimum col count.
1773 **/
1774
1775 max_row_count = -1; /* < any possible value */
1776 i = 0;
1777 mtx = vars->mtx;
1778 rowrng=&(vars->reg.row);
1779 colrng=&(vars->reg.col);
1780 while( i < nties ) {
1781
1782 sum = 0;
1783 nz.row = mtx_FIRST;
1784 nz.col = vars->tlist[i].ndx;
1785 while( value = mtx_next_in_col(mtx,&nz,rowrng),
1786 nz.row != mtx_LAST )
1787 sum += mtx_nonzeros_in_row(mtx,nz.row,colrng);
1788 if( sum > max_row_count ) {
1789 max_row_count = sum;
1790 col = nz.col;
1791 }
1792 i++;
1793 }
1794 /**
1795 *** Now col contains the col with the minimum col count, which has the
1796 *** greatest total row count of incident rows among all cols with
1797 *** the same (minimum) col count. Select it.
1798 **/
1799 return(col);
1800 }
1801
1802 static void tspk1_reorder(struct creorder_vars *vars)
1803 /**
1804 *** Transpose the picture and explanation that follows:
1805 *** Reorders the assigned matrix vars->mtx within the specified bounding
1806 *** block region vars->reg. The region is split into 6 subregions during
1807 *** reordering: the rows are divided in two, and the columns divided in
1808 *** three. Initially everything is in the active subregion. Ultimately,
1809 *** everything will be assigned.
1810 ***
1811 *** <-- assigned -->|<-- active-->|<-- on stack -->|
1812 *** ----+----------------+-------------+----------------+
1813 *** a | | | |
1814 *** s | | | |
1815 *** s | | | |
1816 *** i | (SQUARE) | | |
1817 *** g | | | |
1818 *** n | | | |
1819 *** e | | | |
1820 *** d | | | |
1821 *** ----+----------------+-------------+----------------+
1822 *** a | | | |
1823 *** c | | ACTIVE | |
1824 *** t | | REGION | |
1825 *** i | | | |
1826 *** v | | | |
1827 *** e | | | |
1828 *** ----+----------------+-------------+----------------+
1829 ***
1830 *** The algorithm is roughly as follows:
1831 *** (1) select a row (by some criterion).
1832 *** (2) push columns incident on that row onto the stack in decreasing
1833 *** order of their length.
1834 *** (3) pop first column off the stack and assign it to the selected
1835 *** row.
1836 *** (4) forward-triangularize (assign singleton rows with their one
1837 *** and only incident column, until no longer possible).
1838 ***
1839 *** (1)-(4) should be repeated until the active subregion becomes empty.
1840 ***
1841 *** Everything above was written as though the entire matrix is
1842 *** involved. In reality, only the relevant square region is involved.
1843 **/
1844 {
1845 int32 col, size;
1846 int32 *colcount_array_origin;
1847 mtx_matrix_t mtx;
1848
1849 size = vars->reg.col.high - vars->reg.col.low + 1;
1850 size = MAX(size,vars->reg.row.high - vars->reg.row.low + 1);
1851 vars->tlist = size > 0 ? (struct creorder_list *)
1852 ascmalloc( size*sizeof(struct creorder_list) ) : NULL;
1853 colcount_array_origin = size > 0 ? (int32 *)
1854 ascmalloc( size*sizeof(int32) ) : NULL;
1855 vars->colcount =
1856 NOTNULL(colcount_array_origin) ?
1857 colcount_array_origin - vars->reg.col.low : NULL;
1858 mtx = vars->mtx;
1859
1860 vars->rowhigh = vars->reg.row.high;
1861 /* Establish col counts */
1862 for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1863 vars->colcount[col] =
1864 mtx_nonzeros_in_col(mtx,col,&(vars->reg.row));
1865
1866 while(TRUE) {
1867 struct creorder_list *head;
1868 int32 nelts; /* # elements "allocated" from vars->tlist */
1869 mtx_coord_t nz;
1870 real64 value;
1871
1872 cforward_triangularize(vars);
1873 assign_null_cols(vars);
1874 if( vars->reg.col.low > vars->reg.col.high ||
1875 vars->reg.row.low > vars->reg.row.high ) {
1876 /* Active region is now empty, done */
1877 if( NOTNULL(vars->tlist) )
1878 ascfree( vars->tlist );
1879 if( NOTNULL(colcount_array_origin) )
1880 ascfree( colcount_array_origin );
1881 return;
1882 }
1883
1884 head = NULL;
1885 nelts = 0;
1886 nz.row = mtx_FIRST;
1887 nz.col = select_col(vars);
1888 while( value = mtx_next_in_col(mtx,&nz,&(vars->reg.row)),
1889 nz.row != mtx_LAST ) {
1890 struct creorder_list **q,*p;
1891
1892 p = &(vars->tlist[nelts++]);
1893 p->ndx = mtx_row_to_org(mtx,nz.row);
1894 p->count = mtx_nonzeros_in_row(mtx,nz.row,&(vars->reg.col));
1895 for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) );
1896 p->next = *q;
1897 *q = p;
1898 }
1899 /**
1900 *** We now have a list of columns which intersect the selected row.
1901 *** The list is in order of decreasing column count.
1902 **/
1903
1904 /* Push incident rows on stack */
1905 for( ; NOTNULL(head) ; head = head->next )
1906 push_row_on_stack(vars,mtx_org_to_row(mtx,head->ndx));
1907
1908 /* Pop row off stack and assign to selected col */
1909 assign_col_and_row(vars , nz.col , pop_row_from_stack(vars));
1910 }
1911 /* Not reached. */
1912 }
1913
1914 /*********************************
1915 end of tspk1 stuff
1916 *********************************/
1917
1918 static boolean nonempty_row(mtx_matrix_t mtx,int32 row)
1919 /**
1920 *** ? row not empty in mtx.
1921 **/
1922 {
1923 mtx_coord_t nz;
1924 real64 value;
1925 value = mtx_next_in_row(mtx, mtx_coord(&nz,row,mtx_FIRST), mtx_ALL_COLS);
1926 return( nz.col != mtx_LAST );
1927 }
1928
1929 static boolean nonempty_col(mtx_matrix_t mtx,int32 col)
1930 /**
1931 *** ? column not empty in mtx.
1932 **/
1933 {
1934 mtx_coord_t nz;
1935 real64 value;
1936 value = mtx_next_in_col(mtx, mtx_coord(&nz,mtx_FIRST,col), mtx_ALL_ROWS);
1937 return( nz.row != mtx_LAST );
1938 }
1939
1940 static void determine_pivot_range(linsolqr_system_t sys)
1941 /**
1942 *** Calculates sys->rng from sys->coef.
1943 **/
1944 {
1945 sys->reg.row.low = sys->reg.row.high = -1;
1946 sys->reg.col.low = sys->reg.col.high = -1;
1947
1948 for( sys->rng.high=mtx_order(sys->coef) ; --(sys->rng.high) >= 0 ; ) {
1949 if( nonempty_row(sys->coef,sys->rng.high) &&
1950 sys->reg.row.high < 0 )
1951 sys->reg.row.high = sys->rng.high;
1952 if( nonempty_col(sys->coef,sys->rng.high) &&
1953 sys->reg.col.high < 0 )
1954 sys->reg.col.high = sys->rng.high;
1955 if( nonempty_row(sys->coef,sys->rng.high) &&
1956 nonempty_col(sys->coef,sys->rng.high) )
1957 break;
1958 }
1959
1960 for( sys->rng.low=0 ; sys->rng.low <= sys->rng.high ; ++(sys->rng.low) ) {
1961 if( nonempty_row(sys->coef,sys->rng.low) &&
1962 sys->reg.row.low < 0 )
1963 sys->reg.row.low = sys->rng.low;
1964 if( nonempty_col(sys->coef,sys->rng.low) &&
1965 sys->reg.col.low < 0 )
1966 sys->reg.col.low = sys->rng.low;
1967 if( nonempty_row(sys->coef,sys->rng.low) &&
1968 nonempty_col(sys->coef,sys->rng.low) )
1969 break;
1970 }
1971 }
1972
1973 static void square_region(linsolqr_system_t sys,mtx_region_t *region)
1974 /**
1975 *** Get the largest square confined to the diagonal within the region given
1976 *** and set sys->rng accordingly.
1977 **/
1978 {
1979 sys->reg = *region;
1980 sys->rng.low = MAX(region->row.low,region->col.low);
1981 sys->rng.high = MIN(region->row.high,region->col.high);
1982 }
1983
1984 static int ranki_reorder(linsolqr_system_t sys,mtx_region_t *region)
1985 /**
1986 *** The region to reorder is first isolated by truncating the region
1987 *** provided to the largest square region confined to the matrix diagonal.
1988 *** It is presumed it will contain no empty rows or columns and will
1989 *** provide the basis of candidate pivots when factoring.
1990 **/
1991 {
1992 struct reorder_vars vars;
1993 CHECK_SYSTEM(sys);
1994 if (sys->fclass != ranki) {
1995 ERROR_REPORTER_HERE(ASC_PROG_ERR,"called on system with inappropriate factor class");
1996 return 1;
1997 }
1998 if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
1999 else square_region(sys,region);
2000
2001 vars.mtx = sys->coef;
2002 vars.reg.row.low = vars.reg.col.low = sys->rng.low;
2003 vars.reg.row.high = vars.reg.col.high = sys->rng.high;
2004 spk1_reorder(&vars);
2005 return 0;
2006 }
2007
2008 static int tranki_reorder(linsolqr_system_t sys,mtx_region_t *region)
2009 /**
2010 *** The region to reorder is first isolated by truncating the region
2011 *** provided to the largest square region confined to the matrix diagonal.
2012 *** It is presumed it will contain no empty rows or columns and will
2013 *** provide the basis of candidate pivots when factoring.
2014 **/
2015 {
2016 struct creorder_vars vars;
2017 CHECK_SYSTEM(sys);
2018 if ( !(sys->fclass==ranki || sys->fclass==s_qr) ) {
2019 ERROR_REPORTER_HERE(ASC_PROG_ERR,"reorder called on system with inappropriate factor method");
2020 return 1;
2021 }
2022 if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2023 else square_region(sys,region);
2024
2025 vars.mtx = sys->coef;
2026 vars.reg.row.low = vars.reg.col.low = sys->rng.low;
2027 vars.reg.row.high = vars.reg.col.high = sys->rng.high;
2028 tspk1_reorder(&vars);
2029 return 0;
2030 }
2031
2032 /***************************************************************************\
2033 End of reordering functions for SPK1.
2034 \***************************************************************************/
2035
2036 /***************************************************************************\
2037 RANKI implementation functions.
2038 \***************************************************************************/
2039
2040 static void eliminate_row(mtx_matrix_t mtx,
2041 mtx_range_t *rng,
2042 int32 row, /* row to eliminate */
2043 real64 *tmp, /* temporary array */
2044 real64 *pivots) /* prior pivots array */
2045 /**
2046 *** Eliminates the given row to the left of the diagonal element, assuming
2047 *** valid pivots for all of the diagonal elements above it (the elements
2048 *** above those diagonal elements, if any exist, are assumed to be U
2049 *** elements and therefore ignored). The temporary array is used by this
2050 *** function to do its job. tmp[k], where rng->low <= k <= rng->high must
2051 *** be defined (allocated) but need not be initialized.
2052 *** pivots[k] where rng->low <= k <=rng->high must be allocated and
2053 *** pivots[rng->low] to pivots[row-1] must contain the previous pivots.
2054 **/
2055 {
2056 mtx_coord_t nz;
2057 mtx_range_t left_to_eliminate,high_cols,tmp_cols;
2058 boolean do_series=FALSE;
2059
2060 high_cols.low = row;
2061 high_cols.high = rng->high;
2062 left_to_eliminate.low = rng->low;
2063 left_to_eliminate.high = row - 1;
2064
2065 /* Move non-zeros left of pivot from matrix to full array */
2066 mtx_zero_real64(tmp+rng->low,(row-rng->low));
2067 nz.row = row;
2068 mtx_cur_row_vec(mtx,row,tmp,&left_to_eliminate);
2069 mtx_clear_row(mtx,row,&left_to_eliminate);
2070
2071 /* eliminates nzs from pivot, one by one, filling tmp with multipliers */
2072 do_series= ( (row-1) >= (rng->low) );
2073 if (do_series) {
2074 mtx_add_row_series_init(mtx,row,FALSE);
2075 /* do nothing between here and the END call to _init which removes
2076 elements from row */
2077 }
2078 for( nz.row = row-1 ; nz.row >= rng->low ; --(nz.row) ) {
2079 if( tmp[nz.row] == D_ZERO )
2080 continue; /* Nothing to do for this row */
2081
2082 nz.col = nz.row;
2083 tmp[nz.row] /= pivots[nz.row];
2084 /* tmp[nz.row] now equals multiplier */
2085
2086 /* Perform "mtx_add_row" for full array part of the row */
2087 left_to_eliminate.high = nz.row - 1;
2088 mtx_cur_vec_add_row(mtx,tmp,nz.row,-tmp[nz.row],
2089 &left_to_eliminate,FALSE);
2090
2091 /* Perform "mtx_add_row" on remaining part of row */
2092 mtx_add_row_series(nz.row,-tmp[nz.row],&high_cols);
2093 }
2094 if (do_series) {
2095 mtx_add_row_series_init(mtx,mtx_NONE,TRUE);
2096 }
2097
2098 mtx_fill_cur_row_vec(mtx,row,tmp,mtx_range(&tmp_cols,rng->low,row-1));
2099
2100 }
2101
2102
2103 /* not in use it seems */
2104 #if BUILD_DEAD_CODE /* yes, baa */
2105 static int32 top_of_spike(linsolqr_system_t sys,int32 col)
2106 /**
2107 *** Determines the top row (row of lowest index) in a possible spike
2108 *** above the diagonal element in the given column. If there is no spike,
2109 *** then (row = ) col is returned.
2110 **/
2111 {
2112 mtx_range_t above_diagonal;
2113 mtx_coord_t nz;
2114 real64 value;
2115 int32 top_row;
2116
2117 above_diagonal.low = sys->rng.low;
2118 above_diagonal.high = col-1;
2119 top_row = nz.col = col;
2120 nz.row = mtx_FIRST;
2121 while( value = mtx_next_in_col(sys->factors,&nz,&above_diagonal),
2122 nz.row != mtx_LAST )
2123 if( nz.row < top_row )
2124 top_row = nz.row;
2125 return( top_row );
2126 }
2127 #endif /* BUILD_DEAD_CODE */
2128
2129 static boolean col_is_a_spike(linsolqr_system_t sys,int32 col)
2130 /**
2131 *** Determines if the col is a spike, characterized by having any
2132 *** nonzeros above the diagonal. By construction there shouldn't
2133 *** be any numeric 0 nonzeros above the diagonal, so we don't prune
2134 *** them out here.
2135 **/
2136 {
2137 mtx_range_t above_diagonal;
2138 mtx_coord_t nz;
2139
2140 above_diagonal.low = sys->rng.low;
2141 above_diagonal.high = col-1;
2142 nz.col = col;
2143 nz.row = mtx_FIRST;
2144 /* this loop is cheaper than it looks */
2145 while( (void)mtx_next_in_col(sys->factors,&nz,&above_diagonal),
2146 nz.row != mtx_LAST ) {
2147 if( nz.row < col ) return TRUE;
2148 }
2149 return( FALSE );
2150 }
2151
2152 /* see implementation notes before modifying this function! */
2153 static void number_drag(real64 *vec, int32 rfrom, int32 rto)
2154 {
2155 int32 i;
2156 real64 tmp;
2157 if (rto <rfrom) {
2158 tmp=vec[rfrom];
2159 for (i=rfrom; i> rto; i--) {
2160 vec[i]=vec[i-1]; /* rotate right */
2161 }
2162 vec[rto]=tmp;
2163 return;
2164 }
2165 if (rto > rfrom) {
2166 tmp=vec[rfrom];
2167 for (i=rfrom; i< rto; i++) {
2168 vec[i]=vec[i+1]; /* rotate left */
2169 }
2170 vec[rto]=tmp;
2171 }
2172 }
2173
2174 static void rankikw_factor(linsolqr_system_t sys)
2175 /**
2176 *** (the following description also applies to rankijz_factor which is
2177 *** different only in pivot selection strategy.)
2178 *** This is the heart of the linear equation solver. This function
2179 *** factorizes the matrix into a lower (L) and upper (U) triangular
2180 *** matrix. sys->smallest_pivot and sys->rank are calculated. The
2181 *** RANKI method is utilized. At the end of elimination, the matrix A
2182 *** is factored into A = U L, where U and L are stored as follows:
2183 ***
2184 *** <----- r ----> <- n-r ->
2185 *** +--------------+---------+
2186 *** | | |
2187 *** | U | |
2188 *** | | |
2189 *** | L | | r
2190 *** | | |
2191 *** +--------------+---------+
2192 *** | | |
2193 *** | | 0 | n-r
2194 *** | | |
2195 *** +--------------+---------+
2196 ***
2197 *** The rows and columns have been permuted so that all of the pivoted
2198 *** original rows and columns are found in the first r rows and columns
2199 *** of the region. The last n-r rows and columns are unpivoted. U has
2200 *** 1's on its diagonal, whereas L's diagonal is stored in the matrix.
2201 ***
2202 *** Everything above was written as though the entire matrix is
2203 *** involved. In reality, only the relevant square region is involved.
2204 **/
2205 {
2206 mtx_coord_t nz;
2207 int32 last_row;
2208 mtx_range_t pivot_candidates;
2209 real64 *tmp;
2210 real64 pivot, *pivots;
2211 int32 length;
2212 mtx_matrix_t mtx;
2213
2214 length = sys->rng.high - sys->rng.low + 1;
2215 tmp = sys->ludata->tmp;
2216 /* eliminate row takes care of zeroing the relevant region and won't
2217 change values outside of it. */
2218 pivots=sys->ludata->pivlist;
2219 mtx=sys->factors;
2220
2221 sys->smallest_pivot = MAXDOUBLE;
2222 last_row = pivot_candidates.high = sys->rng.high;
2223 for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
2224
2225 pivot_candidates.low = nz.col = nz.row;
2226 pivots[nz.row]=pivot = mtx_value(mtx,&nz);
2227 pivot = fabs(pivot);
2228 if( pivot > sys->pivot_zero &&
2229 pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) &&
2230 !col_is_a_spike(sys,nz.row) ) {
2231 /* Good pivot and not a spike: continue with next row */
2232 if( pivot < sys->smallest_pivot )
2233 sys->smallest_pivot = pivot;
2234 ++(nz.row);
2235 continue;
2236 }
2237 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2238 /**
2239 *** Row is a spike row or will
2240 *** be when a necessary column
2241 *** exchange occurs.
2242 **/
2243 eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots);
2244 /* pivot will be largest of those available. get size and value */
2245 pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row]));
2246 if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */
2247 /* Dependent row, drag to the end */
2248 mtx_drag(mtx,nz.row,last_row);
2249 number_drag(pivots,nz.row,last_row);
2250 --last_row;
2251 #undef KAA_DEBUG
2252 #ifdef KAA_DEBUG
2253 ERROR_REPORTER_HERE(ASC_PROG_WARNING,"Warning: Row %d is dependent with pivot %20.8g",nz.row,pivot);
2254 #endif /* KAA_DEBUG */
2255 } else {
2256 /* Independent row: nz contains best pivot */
2257 /* Move pivot to diagonal */
2258 mtx_swap_cols(mtx,nz.row,nz.col);
2259 mtx_drag( mtx , nz.row , sys->rng.low );
2260 number_drag(pivots,nz.row,sys->rng.low);
2261 if( pivot < sys->smallest_pivot )
2262 sys->smallest_pivot = pivot;
2263 ++(nz.row);
2264 }
2265 }
2266
2267 sys->rank = last_row - sys->rng.low + 1;
2268 }
2269
2270
2271 static void rankijz_factor(linsolqr_system_t sys)
2272 /**
2273 *** This is an alternate pivoting strategy introduced by Joe Zaher.
2274 *** it looks down and across for good pivots rather than just across.
2275 ***
2276 **/
2277 {
2278 real64 biggest;
2279 mtx_coord_t nz, best;
2280 mtx_region_t candidates;
2281 real64 *tmp;
2282 real64 pivot, *pivots;
2283 int32 length;
2284 mtx_matrix_t mtx;
2285
2286 length = sys->rng.high - sys->rng.low + 1;
2287 tmp = sys->ludata->tmp;
2288 /* eliminate row takes care of zeroing the relevant region and won't
2289 change values outside of it. */
2290 pivots=sys->ludata->pivlist;
2291 mtx=sys->factors;
2292
2293 sys->smallest_pivot = MAXDOUBLE;
2294 candidates.row.high = sys->rng.high;
2295 candidates.col.high = sys->rng.high;
2296 for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) {
2297 nz.col = nz.row;
2298 pivots[nz.row] = pivot = mtx_value(mtx,&nz);
2299 pivot = fabs(pivot);
2300 candidates.row.low = nz.row;
2301 candidates.col.low = nz.row;
2302 if( !col_is_a_spike(sys,nz.row) ) {
2303 best.col = nz.row;
2304 mtx_col_max(mtx,&best,&(candidates.row),&biggest);
2305 if( fabs(biggest) >= sys->pivot_zero ) {
2306 if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) {
2307 mtx_swap_rows(mtx,nz.row,best.row);
2308 pivots[nz.row] = biggest;
2309 pivot = fabs(biggest);
2310 }
2311 if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot;
2312 ++(nz.row);
2313 continue;
2314 }
2315 }
2316 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2317 /**
2318 *** Row is a spike row or will
2319 *** be when a necessary column
2320 *** exchange occurs.
2321 **/
2322 eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots);
2323 /* pivot will be largest of those available. get size and value */
2324 pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row]));
2325 /* Move pivot to diagonal */
2326 if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2327 /* Dependent row, drag nz to lower right */
2328 mtx_drag(mtx,nz.row,candidates.row.high);
2329 number_drag(pivots,nz.row,candidates.row.high);
2330 --(candidates.row.high);
2331 } else {
2332 /* Independent row, drag nz to upper left */
2333 mtx_swap_cols(mtx,nz.row,nz.col);
2334 mtx_drag( mtx , nz.row , sys->rng.low );
2335 number_drag(pivots,nz.row,sys->rng.low);
2336 if( pivot < sys->smallest_pivot )
2337 sys->smallest_pivot = pivot;
2338 ++(nz.row);
2339 }
2340 }
2341
2342 sys->rank = candidates.row.high - sys->rng.low + 1;
2343 }
2344
2345
2346 #define KAA_DEBUG 0
2347 #if KAA_DEBUG
2348 int ranki_entry(linsolqr_system_t sys,mtx_region_t *region)
2349 #else
2350 static int ranki_entry(linsolqr_system_t sys,mtx_region_t *region)
2351 #endif
2352 /**
2353 *** The region to factor is first isolated by truncating the region
2354 *** provided to the largest square region confined to the matrix diagonal.
2355 *** It is presumed it will contain no empty rows or columns and that it has
2356 *** been previously reordered using linsolqr_reorder(sys,region,spk1)
2357 *** or a sane variant of spk1.
2358 *** This is the entry point for all ranki based strategies, regardless of
2359 *** pivot selection subtleties.
2360 **/
2361 {
2362 struct rhs_list *rl;
2363 double comptime;
2364
2365 CHECK_SYSTEM(sys);
2366 if( sys->factored )
2367 return 0;
2368 switch(sys->fmethod) {
2369 case ranki_kw:
2370 case ranki_jz:
2371 case ranki_ka: /* add new methods here */
2372 break;
2373 default:
2374 return 1;
2375 }
2376
2377 if(ISNULL(sys->ludata))
2378 return 1;
2379
2380 if( NOTNULL(sys->inverse) )
2381 mtx_destroy(sys->inverse);
2382 sys->inverse=NULL;
2383 if( NOTNULL(sys->factors) )
2384 mtx_destroy(sys->factors);
2385 if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2386 else square_region(sys,region);
2387
2388 sys->factors = mtx_copy_region(sys->coef,region);
2389 sys->rank = -1;
2390 sys->smallest_pivot = MAXDOUBLE;
2391 for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
2392 rl->solved = FALSE;
2393 ensure_capacity(sys);
2394 ensure_lu_capacity(sys);
2395
2396 comptime = tm_cpu_time();
2397 switch(sys->fmethod) {
2398 case ranki_ka:
2399 case ranki_kw:
2400 rankikw_factor(sys);
2401 break;
2402 case ranki_jz:
2403 default:
2404 rankijz_factor(sys);
2405 }
2406 /* no longer done automatically as noone usually cares.
2407 calc_dependent_rows_ranki1(sys);
2408 calc_dependent_cols_ranki1(sys);
2409 */
2410 sys->factored = TRUE;
2411
2412 #undef KAA_DEBUG
2413 #if KAA_DEBUG
2414 comptime = tm_cpu_time() - comptime;
2415 ERROR_REPORTER_HERE(ASC_PROG_NOTE,"Time for Inversion = %f",comptime);
2416 ERROR_REPORTER_HERE(ASC_PROG_ERR,"Non-zeros in Inverse = %d",mtx_nonzeros_in_region(sys->factors,region));
2417 #endif
2418 return 0;
2419 }
2420
2421 static int ranki_solve(linsolqr_system_t sys, struct rhs_list *rl)
2422 {
2423 backward_substitute(sys,rl->varvalue,rl->transpose);
2424 forward_substitute(sys,rl->varvalue,rl->transpose);
2425 zero_unpivoted_vars(sys,rl->varvalue,rl->transpose);
2426 return 0;
2427 }
2428
2429
2430 /*
2431 *********************************************************************
2432 * Start of the 2 bodied factorization schemes.
2433 *
2434 * ranki2_entry will be used to access these routines.
2435 * there is now code called:
2436 * rankikw2_factor
2437 * rankijz2_factor
2438 * eliminate_row2
2439 * ranki2_entry
2440 * forward_substitute2
2441 * backward_substitute2
2442 * ranki2_solve
2443 *
2444 * We have fixed the calc_col_dependent and calc_row_dependent
2445 * routines.
2446 *********************************************************************
2447 */
2448
2449 extern int32 mtx_number_ops;
2450
2451 /* this function does no permutation of any sort */
2452 static
2453 void eliminate_row2(mtx_matrix_t mtx,
2454 mtx_matrix_t upper_mtx,
2455 mtx_range_t *rng,
2456 int32 row, /* row to eliminate */
2457 real64 *tmp, /* temporary array */
2458 real64 *pivots, /* prior pivots array */
2459 real64 dtol /* drop tolerance */
2460 )
2461 {
2462 mtx_coord_t nz;
2463 int j,low,high;
2464 double tmpval;
2465
2466 /*
2467 * Move all non-zeros from current row to full array.
2468 * The full array would have been initialized before,
2469 * we must put it back in the clean state when we leave.
2470 * All operations are done over mtx_ALL_COLS.
2471 *
2472 * Note: because of an addrow over mtx_ALL_COLS, entries
2473 * of tmp outside rng may have nonzeros put in them if
2474 * sys->factors has nonzeros in the outlying columns.
2475 * If enough of these pile up out there during elimination
2476 * of a block and the compiler treats double overflow as
2477 * a signalable error, we will have a floating point
2478 * exception.
2479 * Currently sys->factors is a copy of a region from
2480 * the sys->coef matrix, so we should not have anything
2481 * out there to pile up. If we make a variant which
2482 * does not copy the coef matrix but uses it directly,
2483 * this code needs to be revisited.
2484 */
2485 mtx_steal_cur_row_vec(mtx,row,tmp,mtx_ALL_COLS);
2486
2487 /*
2488 * Eliminates nzs from pivot, one by one, filling tmp with multipliers
2489 * We now operate on the entire row, since we are not writing the
2490 * multipliers back to the matrix.
2491 */
2492
2493 low = rng->low;
2494 high = rng->high;
2495 for (j = row-1 ; j >= low ; --(j) ) {
2496 if (tmp[j] == D_ZERO)
2497 continue; /* Nothing to do for this row */
2498 tmpval = tmp[j]/pivots[j]; /* Compute multiplier */
2499
2500 /*
2501 * tmpval is now the multiplier. We use it to eliminate the row,
2502 * but backpatch it to tmp, *after* we do the elimination, as
2503 * mtx_cur_vec_add_row over all columns will stomp on tmp[j]
2504 */
2505 mtx_cur_vec_add_row(mtx,tmp,j,-tmpval,mtx_ALL_COLS,FALSE);
2506 tmp[j] = tmpval; /* patch the diagonal */
2507 }
2508
2509 /*
2510 * Fill up the upper triangular matrix.
2511 * refill the range [low .. row-1]. Remember that we have
2512 * to zero all nnz's in tmp.
2513 */
2514 nz.row = row;
2515 for (j=low;j<row;j++) {
2516 if (tmp[j] != D_ZERO) {
2517 nz.col = j;
2518 mtx_fill_value(upper_mtx,&nz,tmp[j]);
2519 #if RBADEBUG
2520 FPRINTF(gscr,"fillingU: or %d oc %d (cc %d) %24.18g\n",
2521 mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), nz.col, tmp[j]);
2522 #endif
2523 tmp[j] = D_ZERO;
2524 }
2525 }
2526 /*
2527 * refill the range [row .. high]. We *wont* keep
2528 * the diagonal as this is sorted out by the pivot
2529 * array, but here we must keep it, sigh.
2530 * At this stage we don't know that the diagonal is the
2531 * pivot, as that is determined after elimination done.
2532 * Outer loop must delete the ultimate pivot element.
2533 * Odds are good, however, that it is the current diagonal,
2534 * so put in the diagonal last.
2535 */
2536 low = row;
2537 for (j=high;j>=low;j--) {
2538 if (tmp[j] != D_ZERO) {
2539 if (fabs(tmp[j]) > dtol) {
2540 nz.col = j;
2541 mtx_fill_value(mtx,&nz,tmp[j]);
2542 #if RBADEBUG
2543 FPRINTF(gscr,"fillingL: or %d oc %d %24.18g\n",
2544 mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), tmp[j]);
2545 #endif
2546 }
2547 tmp[j] = D_ZERO; /* zero element regardless */
2548 }
2549 }
2550 tmp[row] = D_ZERO;
2551 }
2552
2553 /*
2554 * This function is the same as rankikw_factor except
2555 * that it uses 2 matrices; one to store L and one for U.
2556 * As such it uses eliminate_row2 rather than eliminate_row.
2557 * Note there has been a change in the way ptol is used,
2558 * now selecting nearest passing, rather than taking max.
2559 * We assume there is NO incidence outside the region to be factored
2560 * in sys->factors.
2561 */
2562 static void rankikw2_factor(linsolqr_system_t sys)
2563 {
2564 mtx_coord_t nz;
2565 int32 last_row, defect;
2566 mtx_range_t pivot_candidates;
2567 real64 *tmp;
2568 real64 pivot, *pivots;
2569 mtx_matrix_t mtx, upper_mtx;
2570
2571
2572 /**
2573 ** tmp and pivots are cur indexed, tmp by cols, pivots by rows.
2574 ** rather than allocating tmp every time, which can get expensive,
2575 ** we allocate it once *elsewhere* and zero it all here.
2576 ** Because we don't know the state of it on entry (due to exceptions,
2577 ** etc, the rezeroing is unavoidable.
2578 ** Eliminate_row2 is responsible for returning the working region
2579 ** zeroed.
2580 **/
2581
2582 tmp = sys->ludata->tmp;
2583 mtx_zero_real64(tmp,sys->capacity);
2584 pivots = sys->ludata->pivlist;
2585 mtx = sys->factors;
2586 upper_mtx = sys->inverse;
2587 defect = 0;
2588 #if RBADEBUG
2589 gscr = fopen("/usr1/ballan/tmp/lu/rkw","w+");
2590 #endif
2591
2592 sys->smallest_pivot = MAXDOUBLE;
2593 last_row = pivot_candidates.high = sys->rng.high;
2594 for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
2595
2596 #if (RBADEBUG && 0)
2597 /* remove the && 0 and you better have a lot for free
2598 * disk space
2599 */
2600 mtx_write_region_human_orgrows(gscr,mtx,mtx_ENTIRE_MATRIX);
2601 #endif
2602
2603 pivot_candidates.low = nz.col = nz.row;
2604 pivots[nz.row]=pivot = mtx_value(mtx,&nz);
2605 pivot = fabs(pivot);
2606 if( pivot > sys->pivot_zero &&
2607 pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) &&
2608 !col_is_a_spike(sys,nz.row) ) {
2609 /* Good pivot and not a spike: continue with next row */
2610 if( pivot < sys->smallest_pivot ) {
2611 sys->smallest_pivot = pivot;
2612 }
2613 #if RBADEBUG
2614 FPRINTF(gscr,"Cheap pivot col %d (org %d)\n",
2615 nz.row, mtx_col_to_org(mtx,nz.row));
2616 if (nz.row ==179) {
2617 CONSOLE_DEBUG("ben, stop here");
2618 }
2619 #endif
2620 ++(nz.row);
2621 continue;
2622 }
2623 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2624 /**
2625 *** Row is a spike row or will
2626 *** be when a necessary column
2627 *** exchange occurs.
2628 **/
2629 eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol);
2630 /* pivot will be leftmost of those that pass ptol and eps, or 0.0. */
2631 if (defect) {
2632 /* unfortunately, can't use mtx_ALL_COLS in search now because we must
2633 ignore columns that were dragged out with previous singular rows. */
2634 pivot = mtx_get_pivot_col(mtx,&nz,&pivot_candidates,&(pivots[nz.row]),
2635 sys->ptol,sys->pivot_zero);
2636 } else {
2637 /* no singular cols, and eliminate moved all left of diag crap out, so */
2638 pivot = mtx_get_pivot_col(mtx,&nz,mtx_ALL_COLS,&(pivots[nz.row]),
2639 sys->ptol,sys->pivot_zero);
2640 }
2641 #if RBADEBUG
2642 FPRINTF(gscr,"Pivot column found %d (org %d)\n",
2643 nz.col, mtx_col_to_org(mtx,nz.col));
2644 #endif
2645 if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2646 /*
2647 * Dependent row, drag to the end. The upper_mtx is a slave
2648 * of mtx, and will be dragged automatically.
2649 */
2650 mtx_drag(mtx, nz.row, last_row);
2651 number_drag(pivots, nz.row, last_row);
2652 --last_row;
2653 defect = 1;
2654 } else {
2655 /* Independent row: nz contains selected pivot */
2656 mtx_swap_cols(mtx, nz.row, nz.col); /* this Fixes U as well */
2657 /* Move pivot to diagonal */
2658 mtx_drag(mtx, nz.row, sys->rng.low ); /* this Fix U as well */
2659 number_drag(pivots, nz.row, sys->rng.low);
2660 if( pivot < sys->smallest_pivot )
2661 sys->smallest_pivot = pivot;
2662 ++(nz.row);
2663 }
2664 }
2665 #if RBADEBUG
2666 /* get rid of this asap*/
2667 {
2668 FILE *fp;
2669 int32 cc;
2670
2671 fp = fopen("/usr1/ballan/tmp/lu/kw2p","w+");
2672 FPRINTF(fp,"kw2 final pivot sequence:\n");
2673 for (cc= sys->rng.low; cc <= last_row; cc++) {
2674 FPRINTF(fp,"orgrow,orgcol = %d,%d,%24.18g\n",
2675 mtx_row_to_org(mtx,cc),
2676 mtx_col_to_org(mtx,cc), pivots[cc]);
2677 }
2678 fclose(fp);
2679 }
2680
2681 #endif
2682
2683 zero_diagonal_elements(sys->factors,sys->rng.low,sys->rng.high);
2684 sys->rank = last_row - sys->rng.low + 1;
2685
2686 #if RBADEBUG
2687 {
2688 FILE *fp;
2689 fp = fopen("/usr1/ballan/tmp/lu/kw2m","w+");
2690 mtx_write_region_human_rows(fp,mtx,mtx_ENTIRE_MATRIX);
2691 fclose(fp);
2692 fp = fopen("/usr1/ballan/tmp/lu/kw2um","w+");
2693 mtx_write_region_human_rows(fp,upper_mtx,mtx_ENTIRE_MATRIX);
2694 fclose(fp);
2695 }
2696 #endif
2697 #if RBADEBUG
2698 fclose(gscr);
2699 #endif
2700 }
2701
2702 /*
2703 * This function is the same as rankijz_factor except
2704 * that it uses 2 matrices; one to store L and one for U.
2705 * As such it uses eliminate_row2 rather than eliminate_row.
2706 *
2707 * We are now using the slave matrix feature of the code, and
2708 * thus we all drag/swaps automatically affect the slave (upper_mtx)
2709 * as well.
2710 */
2711 static void rankijz2_factor(linsolqr_system_t sys)
2712 {
2713 real64 biggest;
2714 mtx_coord_t nz, best;
2715 mtx_region_t candidates;
2716 real64 *tmp;
2717 real64 pivot, *pivots;
2718 mtx_matrix_t mtx, upper_mtx;
2719 /**
2720 ** tmp and pivots are cur indexed, tmp by cols, pivots by rows.
2721 ** rather than allocating tmp every time, which can get expensive,
2722 ** we allocate it once elsewhere and zero it all here.
2723 ** Because we don't know the state of it on entry (due to exceptions,
2724 ** etc), the rezeroing is unavoidable.
2725 ** Eliminate_row2 is responsible for returning the working region
2726 ** zeroed.
2727 **/
2728 tmp = sys->ludata->tmp;
2729 mtx_zero_real64(tmp,sys->capacity);
2730 pivots=sys->ludata->pivlist;
2731 mtx = sys->factors;
2732 upper_mtx = sys->inverse;
2733
2734 sys->smallest_pivot = MAXDOUBLE;
2735 candidates.row.high = sys->rng.high;
2736 candidates.col.high = sys->rng.high;
2737 for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) {
2738 nz.col = nz.row;
2739 pivots[nz.row] = pivot = mtx_value(mtx,&nz);
2740 pivot = fabs(pivot);
2741 candidates.row.low = nz.row;
2742 candidates.col.low = nz.row;
2743 if( !col_is_a_spike(sys,nz.row) ) {
2744 best.col = nz.row;
2745 mtx_col_max(mtx,&best,&(candidates.row),&biggest);
2746 /* make sure we aren't missing someone really nice in the column */
2747 if( fabs(biggest) >= sys->pivot_zero ) {
2748 if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) {
2749 mtx_swap_rows(mtx,nz.row,best.row); /* Fixes U as well */
2750 pivots[nz.row] = biggest;
2751 pivot = fabs(biggest);
2752 }
2753 if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot;
2754 ++(nz.row);
2755 continue;
2756 }
2757 }
2758 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2759 /**
2760 *** Row is a spike row or will
2761 *** be when a necessary column
2762 *** exchange occurs.
2763 **/
2764 eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol);
2765 /* pivot will be largest of those available. get size and value */
2766 pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row]));
2767 /* Move pivot to diagonal */
2768 if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2769 /* Dependent row, drag nz to lower right */
2770 mtx_drag(mtx,nz.row,candidates.row.high);
2771 number_drag(pivots,nz.row,candidates.row.high);
2772 --(candidates.row.high);
2773 } else {
2774 /* Independent row, drag nz to upper left */
2775 mtx_swap_cols(mtx,nz.row,nz.col); /* this Fixes U as well */
2776 mtx_drag(mtx ,nz.row ,sys->rng.low);
2777
2778 number_drag(pivots,nz.row,sys->rng.low);
2779 if( pivot < sys->smallest_pivot )
2780 sys->smallest_pivot = pivot;
2781 ++(nz.row);
2782 }
2783 }
2784 zero_diagonal_elements(sys->factors,sys->rng.low,sys->rng.high);
2785 sys->rank = candidates.row.high - sys->rng.low + 1;
2786 }
2787
2788 /*
2789 * See the comments attached to forward_substitute.
2790 * This code otherwise is the same. It uses a 2 bodied
2791 * matrix as well as making use of mtx_ALL_COLS and
2792 * mtx_ALL_ROWS whereever possible.
2793 * We make the following additional assumptions here:
2794 * (if they do not hold, do NOT use this function)
2795 * - sys->inverse (U) has no incidence that is not multipliers
2796 * (and the diagonal of 1s is NOT in sys->inverse.)
2797 * As of 10/95, this is how ranki2 U is constructed.
2798 * - sys->factors (L) has no incidence on the upper triangle,
2799 * including the diagonal, or outside the factored region.
2800 * relaxation: incidence anywhere allowed if value = 0.0
2801 * since 0 doesn't contribute to a dot product
2802 * and the only thing we do with triangles is dot them.
2803 * - There may be singular rows and columns in the factorization,
2804 * but any additions coming from these rows/columns during
2805 * mtx_ALL_*O*S operations will not contribute to sums because the
2806 * user zeroed the arr entries corresponding to these before
2807 * calling this function.
2808 */
2809 static
2810 void forward_substitute2(linsolqr_system_t sys,
2811 real64 *arr,
2812 boolean transpose)
2813 {
2814 mtx_coord_t nz;
2815 real64 sum, *pivlist;
2816 mtx_matrix_t mtx;
2817 int32 dotlim;
2818 boolean nonzero_found=FALSE;
2819
2820 pivlist=sys->ludata->pivlist;
2821 dotlim = sys->rng.low+sys->rank;
2822 if (transpose) { /* arr is indexed by original column number */
2823 mtx=sys->inverse;
2824 for( nz.col=sys->rng.low; nz.col < dotlim; ++(nz.col) ) {
2825 register int32 org_col;
2826
2827 org_col = mtx_col_to_org(mtx,nz.col);
2828 if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
2829 if (nonzero_found) {
2830 sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,mtx_ALL_ROWS,TRUE);
2831 /* arr[org_col] = (arr[org_col] - sum) / D_ONE */;
2832 arr[org_col] -= sum;
2833 }
2834 }
2835 } else { /* arr is indexed by original row number */
2836 mtx=sys->factors;
2837 for( nz.row=sys->rng.low; nz.row < dotlim; ++(nz.row) ) {
2838 register int32 org_row;
2839
2840 org_row = mtx_row_to_org(mtx,nz.row);
2841 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
2842 if (nonzero_found) {
2843 sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,mtx_ALL_COLS,TRUE);
2844 /*
2845 nz.col = nz.row;
2846 arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz);
2847 */
2848 arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row];
2849 }
2850 }
2851 }
2852 }
2853
2854 /*
2855 * See the comments attached to backward_substitute and
2856 * forward_substitute2.
2857 * When solving for the transpose, then we are actually
2858 * running of the lower triangle, hence we use sys->factors.
2859 * Otherwise we use sys->inverse which stores U.
2860 */
2861 static
2862 void backward_substitute2(linsolqr_system_t sys,
2863 real64 *arr,
2864 boolean transpose)
2865 {
2866 mtx_coord_t nz;
2867 real64 sum, *pivlist;
2868 mtx_matrix_t mtx;
2869 int32 dotlim;
2870 boolean nonzero_found=FALSE; /* once TRUE, substitution must be done
2871 over remaining rows/cols */
2872
2873 dotlim=sys->rng.low;
2874 pivlist=sys->ludata->pivlist;
2875 if (transpose) { /* arr is indexed by original column number */
2876 mtx = sys->factors;
2877 for( nz.col = sys->rng.low+sys->rank-1; nz.col >= dotlim ; --(nz.col) ) {
2878 register int32 org_col;
2879
2880 org_col = mtx_col_to_org(mtx,nz.col);
2881 if (arr[org_col] != D_ZERO) nonzero_found=TRUE;
2882 if (nonzero_found) {
2883 sum = mtx_col_dot_full_org_vec(mtx,nz.col,arr,mtx_ALL_ROWS,TRUE);
2884 arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col];
2885 }
2886 }
2887 } else { /* arr is indexed by original row number */
2888 /* we are working from the bottom up */
2889 mtx = sys->inverse;
2890 for( nz.row = sys->rng.low+sys->rank-1; nz.row >= dotlim ; --(nz.row) ) {
2891 register int32 org_row;
2892
2893 org_row = mtx_row_to_org(mtx,nz.row);
2894 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
2895 if (nonzero_found) {
2896 sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,mtx_ALL_COLS,TRUE);
2897 arr[org_row] -= sum;
2898 }
2899 }
2900 }
2901 }
2902
2903 static void calc_dependent_rows_ranki2(linsolqr_system_t sys)
2904 {
2905 mtx_coord_t nz;
2906 real64 value;
2907 mtx_range_t colrange;
2908 mtx_range_t rowrange;
2909 real64 *lc;
2910 mtx_matrix_t mtx;
2911
2912 sys->rowdeps = TRUE;
2913 if( ( (sys->reg.row.low == sys->rng.low) &&
2914 ( sys->reg.row.high == sys->rng.low+sys->rank-1 )
2915 ) || sys->rank==0 )
2916 return;
2917
2918 lc = sys->ludata->tmp;
2919 colrange.low = sys->rng.low;
2920 colrange.high = colrange.low + sys->rank - 1;
2921 rowrange.low = sys->rng.high;
2922 rowrange.high = sys->rng.low+sys->rank;
2923 mtx=sys->factors;
2924
2925 nz.row = sys->reg.row.low;
2926 for( ; nz.row <= sys->reg.row.high; nz.row++ ) {
2927 if( nz.row == sys->rng.low ) {
2928 nz.row = rowrange.high-1;
2929 continue;
2930 }
2931 mtx_zero_real64(lc,(sys->capacity));
2932 /* must zero the whole thing to use the backward_substitute2 right */
2933 mtx_org_row_vec(mtx,nz.row,lc,&colrange);
2934 if( nz.row < rowrange.high || nz.row > rowrange.low )
2935 backward_substitute2(sys,lc,TRUE);
2936 forward_substitute2(sys,lc,TRUE);
2937 mtx_clear_row(mtx,nz.row,&colrange);
2938 for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) {
2939 value = lc[mtx_col_to_org(mtx,nz.col)];
2940 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
2941 }
2942 }
2943 }
2944
2945 static void calc_dependent_cols_ranki2(linsolqr_system_t sys)
2946 {
2947 mtx_coord_t nz;
2948 real64 value;
2949 mtx_range_t rowrange;
2950 mtx_range_t colrange;
2951 real64 *lc;
2952 mtx_matrix_t mtx;
2953
2954 sys->coldeps = TRUE;
2955 if( ( (sys->reg.col.low == sys->rng.low) &&
2956 ( sys->reg.col.high == sys->rng.low+sys->rank-1 )
2957 ) || sys->rank==0 )
2958 return;
2959
2960 lc = sys->ludata->tmp;
2961 rowrange.low = sys->rng.low;
2962 rowrange.high = rowrange.low + sys->rank - 1;
2963 colrange.high = sys->rng.low+sys->rank;
2964 colrange.low = sys->rng.high;
2965 mtx=sys->factors;
2966
2967 nz.col = sys->reg.col.low;
2968 for( ; nz.col <= sys->reg.col.high; nz.col++ ) {
2969 if( nz.col == sys->rng.low ) {
2970 nz.col = colrange.high-1;
2971 continue;
2972 }
2973 mtx_zero_real64(lc,sys->capacity);
2974 mtx_org_col_vec(mtx,nz.col,lc,&rowrange);
2975 if( nz.col < colrange.high || nz.col > colrange.low )
2976 backward_substitute2(sys,lc,FALSE);
2977 forward_substitute2(sys,lc,FALSE);
2978 mtx_clear_col(mtx,nz.col,&rowrange);
2979 for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) {
2980 value = lc[mtx_row_to_org(mtx,nz.row)];
2981 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
2982 }
2983 }
2984 }
2985
2986 static int ranki2_solve(linsolqr_system_t sys, struct rhs_list *rl)
2987 {
2988 /* zero any unsolved for vars first so they don't contaminate
2989 mtx_ALL_*O*S dot products.
2990 */
2991 zero_unpivoted_vars(sys,rl->varvalue,rl->transpose);
2992 backward_substitute2(sys,rl->varvalue,rl->transpose);
2993 forward_substitute2(sys,rl->varvalue,rl->transpose);
2994 return 0;
2995 }
2996
2997 /* include rankiba2 implementation. Note that this is VERY VERY bad style
2998 and needs to be fixed immediately by splitting up the linsolqr file.
2999 This is our fastest (far and away) ranki implementation -- drag free.
3000 */
3001 #if 1
3002 #include "rankiba2.c"
3003 #endif
3004
3005 #ifdef BUILD_KIRK_CODE
3006 /*
3007 * this forward declaration is necessary to keept the
3008 * system happy until we can organize this file
3009 */
3010
3011 static
3012 int kirk1_factor(linsolqr_system_t sys,
3013 mtx_region_t *A11,
3014 int kirk_method);
3015
3016 #endif /* BUILD_KIRK_CODE */
3017
3018 /*
3019 * This is the entry point for all the ranki2_* schemes
3020 * which make use of a 2 bodied matrix.
3021 * it does not respond to ranki_kw and ranki_jz as if they
3022 * were calls for ranki_kw2 and ranki_jz2. with the new
3023 * factorization classes , there's really no call to be
3024 * sneaking around the header functions.
3025 * ranki_ka goes here as well. check sys->fmethod to see whos
3026 * expected to be in use.
3027 */
3028 int ranki2_entry(linsolqr_system_t sys, mtx_region_t *region)
3029 {
3030 struct rhs_list *rl;
3031 double comptime;
3032
3033 CHECK_SYSTEM(sys);
3034 if (sys->factored) return 0;
3035 switch(sys->fmethod) {
3036 case ranki_ba2:
3037 case ranki_kw2:
3038 case ranki_jz2: /* add new methods here */
3039 #ifdef BUILD_KIRK_CODE
3040 case ranki_ka:
3041 #endif /* BUILD_KIRK_CODE */
3042 break;
3043 default:
3044 return 1;
3045 }
3046
3047 if (ISNULL(sys->ludata)) return 1;
3048 if (NOTNULL(sys->inverse)) mtx_destroy(sys->inverse);
3049 sys->inverse = NULL;
3050 if (NOTNULL(sys->factors)) mtx_destroy(sys->factors);
3051 if (region == mtx_ENTIRE_MATRIX) determine_pivot_range(sys);
3052 else square_region(sys,region);
3053
3054 sys->factors = mtx_copy_region(sys->coef,region);
3055 sys->inverse = mtx_create_slave(sys->factors);
3056 sys->rank = -1;
3057 sys->smallest_pivot = MAXDOUBLE;
3058 for (rl = sys->rl ; NOTNULL(rl) ; rl = rl->next)
3059 rl->solved = FALSE;
3060 ensure_capacity(sys);
3061 ensure_lu_capacity(sys);
3062
3063 comptime = tm_cpu_time();
3064 switch(sys->fmethod) {
3065 case ranki_ba2:
3066 rankiba2_factor(sys);
3067 break;
3068 case ranki_kw2:
3069 rankikw2_factor(sys);
3070 break;
3071 case ranki_jz2:
3072 rankijz2_factor(sys);
3073 break;
3074
3075 #ifdef BUILD_KIRK_CODE
3076 case ranki_ka:
3077 kirk1_factor(sys,region,2);
3078 break;
3079 #endif /* BUILD_KIRK_CODE */
3080
3081 default:
3082 return 1;
3083 }
3084 sys->factored = TRUE;
3085
3086 #define KAA_DEBUG 1
3087 #if KAA_DEBUG
3088 if (g_linsolqr_timing) {
3089 int anz;
3090 int fnz;
3091 anz = mtx_nonzeros_in_region(sys->coef,region);
3092 fnz = mtx_nonzeros_in_region(sys->factors,region) +
3093 mtx_nonzeros_in_region(sys->inverse,0);
3094 comptime = tm_cpu_time() - comptime;
3095 CONSOLE_DEBUG("A-NNZ: %d Factor time: %f Fill %g",
3096 anz,comptime,( anz>0 ? (double)fnz/(double)anz : 0));
3097 }
3098 #endif /* KAA_DEBUG */
3099 #undef KAA_DEBUG
3100 return 0;
3101 }
3102
3103 /***************************************************************************\
3104 End of RANKI implementation functions.
3105 \***************************************************************************/
3106
3107
3108 #ifdef BUILD_KIRK_CODE
3109 /*
3110 ***************************************************************************
3111 * Start of kirk_* routines
3112 *
3113 * kirk1_factor:
3114 * This routine is based on rankikw2_factor, except that it takes
3115 * an additional region, so as to do some *global* pivot restriction.
3116 * This is for the case of a single border for the entire matrix.
3117 *
3118 ***************************************************************************
3119 */
3120
3121 /*
3122 * This is just a dummy structure so as to get the size
3123 * correct. It is *exactly the same size as mtx_linklist,
3124 * until we can sort out the software engineering issues.
3125 */
3126 struct dlinklist {
3127 int index;
3128 struct dlinklist *prev;
3129 };
3130
3131 static
3132 int kirk1_factor(linsolqr_system_t sys,
3133 mtx_region_t *A11,
3134 int kirk_method)
3135 {
3136 mtx_coord_t nz;
3137 int32 last_row;
3138 mtx_range_t pivot_candidates;
3139 real64 *tmp,*tmp_array_origin;
3140 real64 pivot, *pivots;
3141 int32 length;
3142 mtx_matrix_t mtx, upper_mtx;
3143 real64 maxa;
3144
3145 int32 *inserted = NULL; /* stuff for the link list */
3146 mem_store_t eltbuffer = NULL;
3147
3148 #ifdef NOP_DEBUG
3149 mtx_number_ops = 0;
3150 #endif /* NOP_DEBUG */
3151
3152 length = sys->rng.high + 1;
3153 tmp_array_origin = length > 0 ? ASC_NEW_ARRAY_CLEAR(real64,length) : NULL;
3154 tmp = tmp_array_origin;
3155 pivots=sys->ludata->pivlist;
3156 mtx = sys->factors;
3157 upper_mtx = sys->inverse;
3158 inserted = ASC_NEW_ARRAY_CLEAR(int32,length);
3159 eltbuffer = mem_create_store(2,256,sizeof(struct dlinklist),
3160 2,256);
3161
3162 sys->smallest_pivot = MAXDOUBLE;
3163 last_row = pivot_candidates.high = sys->rng.high;
3164 for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
3165
3166 pivot_candidates.low = nz.col = nz.row;
3167 pivots[nz.row]=pivot = mtx_value(mtx,&nz);
3168 pivot = fabs(pivot);
3169 maxa = mtx_row_max(mtx,&nz,&pivot_candidates,NULL);
3170 if ((pivot > sys->pivot_zero) && (pivot >= sys->ptol*maxa) &&
3171 !col_is_a_spike(sys,nz.row)) {
3172 if (pivot < sys->smallest_pivot)
3173 sys->smallest_pivot = pivot;
3174 ++(nz.row);
3175 continue;
3176 }
3177 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
3178 /**
3179 *** Row is a spike row or will
3180 *** be when a necessary column
3181 *** exchange occurs.
3182 **/
3183 if (kirk_method==1)
3184 eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol);
3185 else{
3186 mtx_eliminate_row2(mtx,upper_mtx,&(sys->rng),
3187 nz.row,tmp,pivots,inserted,eltbuffer);
3188 mem_clear_store(eltbuffer);
3189 }
3190 /* pivot will be largest of those available. get size and value */
3191 pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row]));
3192 if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */
3193 /*
3194 * Dependent row, drag to the end. The upper_mtx is a slave
3195 * of mtx, and will be dragged automatically.
3196 */
3197 mtx_drag(mtx,nz.row,last_row);
3198 number_drag(pivots,nz.row,last_row);
3199 --last_row;
3200 #ifdef KAA_DEBUG
3201 ERROR_REPORTER_HERE(ASC_PROG_WARNING"Row %d is dependent with pivot %20.8g",
3202 nz.row,pivot);
3203 #endif /* KAA_DEBUG */
3204 } else {
3205 /* Independent row: nz contains best pivot */
3206 mtx_swap_cols(mtx,nz.row,nz.col); /* this Fixes U as well */
3207 /* Move pivot to diagonal */
3208 mtx_drag(mtx , nz.row , sys->rng.low ); /* this Fix U as well */
3209 number_drag(pivots,nz.row,sys->rng.low);
3210 if( pivot < sys->smallest_pivot )
3211 sys->smallest_pivot = pivot;
3212 ++(nz.row);
3213 }
3214 }
3215 if( NOTNULL(tmp_array_origin) )
3216 ascfree( (POINTER)tmp_array_origin );
3217 if (NOTNULL(inserted)) ascfree((char *)inserted);
3218 mem_destroy_store(eltbuffer);
3219
3220 #ifdef NOP_DEBUG
3221 CONSOLE_DEBUG("Number operations = %d",mtx_number_ops);
3222 #endif /* NOP_DEBUG */
3223
3224 sys->rank = last_row - sys->rng.low + 1;
3225 return 0; /* this return code needs to be useful */
3226 }
3227 #endif /* BUILD_KIRK_CODE */
3228
3229 #if 0
3230