/[ascend]/trunk/ascend4/solver/linsolqr.c
ViewVC logotype

Contents of /trunk/ascend4/solver/linsolqr.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations) (download) (as text)
Fri Oct 29 20:54:12 2004 UTC (18 years, 7 months ago) by aw0a
File MIME type: text/x-csrc
File size: 168133 byte(s)
Setting up web subdirectory in repository
1 /*
2 * linsol II: Ascend Sparse Linear Solvers
3 * by Benjamin Allan
4 * Created: 2/6/90
5 * Version: $Revision: 1.26 $
6 * Version control file: $RCSfile: linsolqr.c,v $
7 * Date last modified: $Date: 2000/01/25 02:26:58 $
8 * Last modified by: $Author: ballan $
9 *
10 * This file is part of the SLV solver.
11 *
12 * Copyright (C) 1994 Benjamin Andrew Allan
13 *
14 * Based on linsol:
15 * Copyright (C) 1990 Karl Michael Westerberg
16 * Copyright (C) 1993 Joseph Zaher
17 * Copyright (C) 1994 Joseph Zaher, Benjamin Andrew Allan
18 * and on sqr.pas v1.5:
19 * Copyright (C) 1994 Boyd Safrit
20 *
21 * The SLV solver is free software; you can redistribute
22 * it and/or modify it under the terms of the GNU General Public License as
23 * published by the Free Software Foundation; either version 2 of the
24 * License, or (at your option) any later version.
25 *
26 * The SLV solver is distributed in hope that it will be
27 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 * General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with the program; if not, write to the Free Software Foundation,
33 * Inc., 675 Mass Ave, Cambridge, MA 02139 USA. Check the file named
34 * COPYING. COPYING is found in ../compiler.
35 */
36
37 /*
38 * Implementation Notes:
39 * 9/95: there are many places in linsolqr.c where lower level matrix
40 * operations have been replaced by higher level ones. These used to
41 * be flagged as such so that people who wanted to construct newer
42 * operations from the lowest level mtx operators could see how to
43 * use the lowlevel operators. Since it has been decided that the old
44 * linsol will NOT be discarded completely (we like having easy
45 * benchmarks to look better than), the user wishing to code new
46 * operations is directed back to linsol to see how lowlevel operations
47 * can be expensively done.
48 *
49 * 10/95: (BAA) I attempted to optimize number_drag through use of
50 * pointer arithmetic. Testing the resulting code without optimization
51 * turned on gave about 20% improvement over the original on an alpha
52 * using the native compiler. However, with optimization on the array
53 * subscripted version which is left here won by ~40%.
54 * The results for gcc were similar: pointer 10% better unoptimized,
55 * subscripted 30% better when -O2.
56 * For both compilers -O3 was worse! gcc was ~1.8x slower than the
57 * native cc both optimized and not.
58 * I have not tested a loop unrolled drag, but I expect unrolling is
59 * exactly what the optimizer is doing, so why bother?
60 */
61
62 #include <math.h>
63 #include <stdarg.h>
64 #include "utilities/ascConfig.h"
65 #include "utilities/ascMalloc.h"
66 #include "utilities/ascPanic.h"
67 #include "utilities/mem.h"
68 #include "utilities/set.h"
69 #include "general/tm_time.h"
70 #include "solver/mtx.h"
71 #include "solver/linsolqr.h"
72
73 #ifdef __WIN32__
74 #define copysign _copysign
75 #endif /* __WIN32__ */
76
77 #define LINSOLQR_DEBUG FALSE
78 /* qr code debugging flag */
79 #define LINSOL_DEBUG FALSE
80 /* linsol debugging flag */
81
82 #define D_ZERO (real64)0.0
83 #define ZERO (int32)0
84 #define D_ONE (real64)1.0
85
86 /* housekeeping flags */
87 #define BUILD_DEAD_CODE 0
88 #undef BUILD_KIRK_CODE
89
90
91 #define RBADEBUG 0
92 /* turns on spew that will generate output files
93 * comparable to and in the same location as for rankiba2
94 * user specific hardcoded pathnames are involved.
95 * don't try this at home.
96 */
97 #if RBADEBUG
98 extern FILE *gscr;
99 #endif
100
101 /*
102 * timing messages disable flag.
103 */
104 int g_linsolqr_timing = 1;
105
106 /***************************************************************************\
107 Data structures for linsol
108 Any time you modify these, please verify linsolqr_size is still correct.
109 \***************************************************************************/
110 struct rhs_list {
111 real64 *rhs; /* Vector of rhs values */
112 real64 *varvalue; /* Solution of the linear system */
113 boolean solved; /* ? has the rhs been solved */
114 boolean transpose; /* ? transpose the coef matrix */
115 struct rhs_list *next;
116 };
117
118 struct qr_fill_t {
119 int32 cnt; /* number of real nonzeros in a column */
120 int32 col; /* column number prior to being sorted */
121 int32 fill; /* fill if this (or if col?) were the pivot column */
122 int32 overlap; /* scratch for fillable calculation. */
123 };
124 /* fill counting structure for sparse qr */
125
126 struct lu_auxdata { /* all indexed by cur row or cur col number */
127 real64 *pivlist; /* vector of pivots (diagonal of L for ranki) */
128 real64 *tmp; /* row elimination, dependency buffer */
129 int32 cap; /* current capacity of the vectors if not null */
130 };
131 /* structure for lu algorithms */
132
133 struct qr_auxdata { /* all indexed by cur row or cur col number */
134 mtx_matrix_t hhvects; /* slave matrix of sys->factors for holding u's */
135 mtx_sparse_t sp; /* sparse data space */
136 real64 *alpha; /* column norms for the Q\R factor matrix */
137 real64 *sigma; /* column norms for the S (R inverse) matrix */
138 real64 *tau; /* column tau of the Householder transforms */
139 real64 *hhcol; /* Householder transform scratch vector */
140 real64 *hhrow; /* Householder transform scratch vector */
141 real64 nu; /* current condition number of inverse */
142 real64 anu; /* condition number of coef */
143 real64 asubnu; /* condition number of active region in coef */
144 struct qr_fill_t *fill; /* fill counting structure */
145 mtx_region_t facreg; /* region over which qr is done if nonsquare */
146 int32 cap; /* current capacity of these vectors if not null */
147 };
148 /* structure for CondQR algorithms */
149
150 struct linsolqr_header {
151 int integrity;
152 enum factor_class fclass; /* Type of factoring expected */
153 enum factor_method fmethod; /* Method factoring expected */
154 enum reorder_method rmethod; /* Type of most recent reordering */
155 int32 capacity; /* Capacity of arrays */
156 mtx_matrix_t coef; /* Coefficient matrix */
157 struct rhs_list *rl; /* List of rhs vectors */
158 int rlength; /* Length of rhs list */
159 real64 pivot_zero; /* Smallest acceptable pivot */
160 real64 ptol; /* Pivot selection tolerance */
161 real64 ctol; /* Condition selection tolerance */
162 real64 dtol; /* Matrix entry drop tolerance */
163 mtx_matrix_t factors; /* Matrix with UL and dependence info (ranki),
164 or L and dependence info (ranki2),
165 or R and dependence info (qr methods) */
166 mtx_matrix_t inverse; /* Matrix with U multipliers (ranki2) or R^-1
167 (condqr), in both cases slave of factors */
168 boolean factored; /* ? has the matrix been factored */
169 boolean rowdeps; /* ? have row dependencies been calc'ed */
170 boolean coldeps; /* ? have col dependencies been calc'ed */
171 mtx_range_t rng; /* Pivot range */
172 mtx_region_t reg; /* Bounding region given */
173 int32 rank; /* Rank of the matrix */
174 real64 smallest_pivot; /* Smallest pivot accepted */
175 struct qr_auxdata *qrdata; /* Data vectors for qr methods */
176 struct lu_auxdata *ludata; /* Data vectors for lu methods */
177 };
178 /* linsol main structure */
179
180 /***************************************************************************\
181 string functions for linsol io
182 \***************************************************************************/
183
184 char *linsolqr_rmethods() {
185 static char names[]="Natural, SPK1, TSPK1";
186 return names;
187 }
188
189 char *linsolqr_fmethods() {
190 static char names[] =
191 "SPK1/RANKI,SPK1/RANKI+ROW,Fast-SPK1/RANKI,Fast-SPK1/RANKI+ROW,Fastest-SPK1/MR-RANKI,CondQR,CPQR";
192 return names;
193 }
194
195 /** **/
196 enum reorder_method linsolqr_rmethod_to_enum(char *name) {
197 if (strcmp(name,"SPK1")==0) return spk1;
198 if (strcmp(name,"TSPK1")==0) return tspk1;
199 if (strcmp(name,"Natural")==0) return natural;
200 return unknown_r;
201 }
202
203 enum factor_method linsolqr_fmethod_to_enum(char *name) {
204 if (strcmp(name,"KIRK-STUFF")==0) return ranki_ka;
205 if (strcmp(name,"SPK1/RANKI")==0) return ranki_kw;
206 if (strcmp(name,"SPK1/RANKI+ROW")==0) return ranki_jz;
207 if (strcmp(name,"Fast-SPK1/RANKI")==0) return ranki_kw2;
208 if (strcmp(name,"Fast-SPK1/RANKI+ROW")==0) return ranki_jz2;
209 if (strcmp(name,"Fastest-SPK1/MR-RANKI")==0) return ranki_ba2;
210 if (strcmp(name,"CondQR")==0) return cond_qr;
211 if (strcmp(name,"CPQR")==0) return plain_qr;
212 return unknown_f;
213 }
214
215 enum factor_class linsolqr_fmethod_to_fclass(enum factor_method fm)
216 {
217 switch (fm) {
218 /* ranki-like things */
219 case ranki_kw:
220 case ranki_jz:
221 case ranki_ba2:
222 case ranki_kw2:
223 case ranki_jz2:
224 case ranki_ka:
225 return ranki;
226 /* implemented qr things */
227 case plain_qr:
228 return s_qr;
229 /* other stuff unimplemented. all fall through */
230 case cond_qr:
231 case opt_qr:
232 case ls_qr:
233 case gauss_ba2:
234 case symmetric_lu:
235 case unknown_f:
236 default:
237 return unknown_c;
238 }
239 }
240
241 /** **/
242 char *linsolqr_enum_to_rmethod(enum reorder_method meth) {
243 switch (meth) {
244 case spk1: return "SPK1";
245 case tspk1: return "TSPK1";
246 case natural: return "Natural";
247 default: return "Unknown reordering method.";
248 }
249 }
250
251 char *linsolqr_enum_to_fmethod(enum factor_method meth) {
252 switch (meth) {
253 case ranki_kw: return "SPK1/RANKI";
254 case ranki_jz: return "SPK1/RANKI+ROW";
255 case ranki_ba2: return "Fastest-SPK1/MR-RANKI";
256 case ranki_kw2: return "Fast-SPK1/RANKI";
257 case ranki_jz2: return "Fast-SPK1/RANKI+ROW";
258 case ranki_ka: return "KIRK-STUFF";
259 case cond_qr: return "CondQR";
260 case plain_qr: return "CPQR";
261 default: return "Unknown factorization method.";
262 }
263 }
264
265 /** **/
266 char *linsolqr_rmethod_description(enum reorder_method meth) {
267 static char sspk1[]="SPK1 reordering ala Stadtherr.";
268 static char stspk1[]="SPK1 reordering column wise ala Stadtherr.";
269 static char nat[]="Ordering as received from user";
270 switch (meth) {
271 case spk1: return sspk1;
272 case tspk1: return stspk1;
273 case natural: return nat;
274 default: return "Unknown reordering method.";
275 }
276 }
277
278 char *linsolqr_fmethod_description(enum factor_method meth) {
279 static char ex_ranki[] =
280 "SPK1 reordering with RANKI LU factoring ala Stadtherr.";
281 static char ex_rankib[] =
282 "SPK1 reordering with RANKI LU factoring ala Stadtherr, but fast.";
283 static char ex_rankij[]="SPK1/RANKI LU with pseudo-complete pivoting";
284 static char ex_rankik[]="KIRK-STUFF/RANKI LU with pseudo-complete pivoting";
285 static char ex_condqr[]="Sparse QR with condition controlled pivoting.";
286 static char ex_plainqr[]="Sparse QR with column pivoting.";
287 switch (meth) {
288 case ranki_kw: return ex_ranki;
289 case ranki_kw2: return ex_ranki;
290 case ranki_ba2: return ex_rankib;
291 case ranki_jz: return ex_rankij;
292 case ranki_jz2: return ex_rankij;
293 case ranki_ka: return ex_rankik;
294 case cond_qr: return ex_condqr;
295 case plain_qr: return ex_plainqr;
296 default: return "Unknown factorization method.";
297 }
298 }
299
300 /***************************************************************************\
301 internal check routines
302 \***************************************************************************/
303 #define OK ((int)439828676)
304 #define DESTROYED ((int)839276847)
305 #define CHECK_SYSTEM(a) check_system((a),__FILE__,__LINE__)
306 static int check_system(linsolqr_system_t sys, char *file, int line)
307 /**
308 *** Checks the system handle. Return 0 if ok, 1 otherwise.
309 **/
310 {
311 if( ISNULL(sys) ) {
312 FPRINTF(stderr,"ERROR: (linsolqr) check_system\n");
313 FPRINTF(stderr," NULL system handle.\n");
314 return 1;
315 }
316
317 switch( sys->integrity ) {
318 case OK:
319 return 0;
320 case DESTROYED:
321 FPRINTF(stderr,"ERROR: (linsolqr) check_system, line %d\n",line);
322 FPRINTF(stderr," System was recently destroyed.\n");
323 break;
324 default:
325 FPRINTF(stderr,"ERROR: (linsolqr) check_system, line %d\n",line);
326 FPRINTF(stderr," System was reused or never created.\n");
327 break;
328 }
329 return 1;
330 }
331
332 /***************************************************************************\
333 external calls (and some of their internals)
334 \***************************************************************************/
335 static void destroy_rhs_list(struct rhs_list *rl)
336 /**
337 *** Destroys rhs list.
338 **/
339 {
340 while( NOTNULL(rl) ) {
341 struct rhs_list *p;
342 p = rl;
343 rl = rl->next;
344 if( NOTNULL(p->varvalue) )
345 ascfree( (POINTER)(p->varvalue) );
346 ascfree( (POINTER)p );
347 }
348 }
349
350 static struct rhs_list *find_rhs(struct rhs_list *rl,real64 *rhs)
351 /**
352 *** Searches for rhs in rhs list, returning it or NULL if not found.
353 **/
354 {
355 for( ; NOTNULL(rl) ; rl = rl->next )
356 if( rl->rhs == rhs )
357 break;
358 return(rl);
359 }
360
361 static struct lu_auxdata *create_ludata()
362 /**
363 *** Creates an empty zeroed ludata struct.
364 *** Filled up by insure_lu_capacity.
365 **/
366 {
367 struct lu_auxdata *d;
368 d=(struct lu_auxdata *)asccalloc(1,sizeof(struct lu_auxdata));
369 return d;
370 }
371
372 static struct qr_auxdata *create_qrdata()
373 /**
374 *** Creates an empty zeroed qrdata struct.
375 *** Filled up by insure_qr_capacity.
376 **/
377 {
378 struct qr_auxdata *d;
379 d=(struct qr_auxdata *)asccalloc(1,sizeof(struct qr_auxdata));
380 return d;
381 }
382
383 static void destroy_ludata(struct lu_auxdata *d) {
384 /**
385 *** Conditionally destroys a ludata struct and its parts.
386 **/
387 if (NOTNULL(d)) {
388 if (NOTNULL(d->pivlist)) ascfree(d->pivlist);
389 d->pivlist=NULL;
390 if (NOTNULL(d->tmp)) ascfree(d->tmp);
391 d->tmp=NULL;
392 ascfree(d);
393 }
394 }
395
396 static void destroy_qrdata(struct qr_auxdata *d) {
397 /**
398 *** Conditionally destroys a qrdata struct and its parts.
399 **/
400 if (NOTNULL(d)) {
401 if (NOTNULL(d->alpha)) ascfree(d->alpha);
402 if (NOTNULL(d->sigma)) ascfree(d->sigma);
403 if (NOTNULL(d->tau)) ascfree(d->tau);
404 if (NOTNULL(d->hhcol)) ascfree(d->hhcol);
405 if (NOTNULL(d->hhrow)) ascfree(d->hhrow);
406 if (NOTNULL(d->fill)) ascfree(d->fill);
407
408 ascfree(d);
409 }
410 }
411
412 linsolqr_system_t linsolqr_create()
413 {
414 linsolqr_system_t sys;
415
416 sys = (linsolqr_system_t)ascmalloc( sizeof(struct linsolqr_header) );
417 sys->fclass = unknown_c;
418 sys->fmethod = unknown_f;
419 sys->rmethod = unknown_r;
420 sys->integrity = OK;
421 sys->capacity = 0;
422 sys->coef = NULL;
423 sys->rl = NULL;
424 sys->rlength = 0;
425 sys->pivot_zero = 1e-12; /* default value */
426 sys->ptol = 0.1; /* default value */
427 sys->ctol = 0.1; /* default value */
428 sys->dtol = LINQR_DROP_TOLERANCE; /* default value */
429 sys->factors = NULL;
430 sys->inverse = NULL;
431 sys->factored = FALSE;
432 sys->rowdeps = FALSE;
433 sys->coldeps = FALSE;
434 sys->rng.low = sys->rng.high = -1;
435 sys->reg.row.low = sys->reg.row.high = -1;
436 sys->reg.col.low = sys->reg.col.high = -1;
437 sys->rank = -1;
438 sys->smallest_pivot = MAXDOUBLE;
439 sys->qrdata = NULL;
440 sys->ludata = NULL;
441 return(sys);
442 }
443
444 void linsolqr_destroy(linsolqr_system_t sys)
445 {
446 if(CHECK_SYSTEM(sys)) {
447 FPRINTF(stderr,"Bad linsolqr_system_t found. Not destroyed.\n");
448 return;
449 }
450 if( NOTNULL(sys->coef) ) {
451 FPRINTF(stderr,"Notice: (linsolqr) non-NULL coefficient matrix\n");
452 FPRINTF(stderr," in linsolqr system not being\n");
453 FPRINTF(stderr," destroyed with linsolqr_system_t.\n");
454 }
455 if( NOTNULL(sys->inverse) )
456 mtx_destroy(sys->inverse);
457 if( NOTNULL(sys->factors) )
458 mtx_destroy(sys->factors);
459 destroy_rhs_list(sys->rl);
460 destroy_qrdata(sys->qrdata);
461 destroy_ludata(sys->ludata);
462 sys->integrity = DESTROYED;
463 ascfree( (POINTER)sys );
464 }
465
466 void linsolqr_set_matrix(linsolqr_system_t sys,mtx_matrix_t mtx)
467 {
468 if(CHECK_SYSTEM(sys)) {
469 FPRINTF(stderr,"Bad linsolqr_system_t found. coef mtx not set.\n");
470 return;
471 }
472 sys->coef = mtx;
473 }
474
475
476 void linsolqr_set_region(linsolqr_system_t sys,mtx_region_t region)
477 {
478 if(CHECK_SYSTEM(sys)) {
479 FPRINTF(stderr,"Bad linsolqr_system_t found. coef mtx not set.\n");
480 return;
481 }
482 sys->reg = region;
483 }
484
485
486 mtx_matrix_t linsolqr_get_matrix(linsolqr_system_t sys)
487 {
488 CHECK_SYSTEM(sys);
489 return( sys->coef );
490 }
491
492 mtx_region_t *linsolqr_get_region(linsolqr_system_t sys)
493 {
494 return &(sys->reg);
495 }
496
497 void linsolqr_add_rhs(linsolqr_system_t sys,
498 real64 *rhs,
499 boolean transpose)
500 {
501 struct rhs_list *rl;
502
503 if(CHECK_SYSTEM(sys)) {
504 FPRINTF(stderr,"Bad linsolqr_system_t found. rhs not added.\n");
505 return;
506 }
507 rl = find_rhs(sys->rl,rhs);
508 if( NOTNULL(rl) ) {
509 return;
510 } else {
511 if( NOTNULL(rhs) ) {
512 rl = (struct rhs_list *)ascmalloc( sizeof(struct rhs_list) );
513 rl->rhs = rhs;
514 rl->varvalue = NULL;
515 rl->solved = FALSE;
516 rl->transpose = transpose;
517 rl->next = sys->rl;
518 sys->rl = rl;
519 ++(sys->rlength);
520 if (sys->capacity>0) {
521 rl->varvalue =
522 (real64 *)ascmalloc(sys->capacity*sizeof(real64));
523 }
524 }
525 }
526 }
527
528 void linsolqr_remove_rhs(linsolqr_system_t sys,real64 *rhs)
529 {
530 struct rhs_list **q;
531
532 if(CHECK_SYSTEM(sys)) {
533 FPRINTF(stderr,"Bad linsolqr_system_t found. rhs not removed.\n");
534 return;
535 }
536 for( q = &(sys->rl) ; NOTNULL(*q) ; q = &((*q)->next) )
537 if( (*q)->rhs == rhs )
538 break;
539 if( NOTNULL(*q) ) {
540 struct rhs_list *p;
541 p = *q;
542 *q = p->next;
543 if( NOTNULL(p->varvalue) )
544 ascfree( (POINTER)(p->varvalue) );
545 ascfree( (POINTER)p );
546 --(sys->rlength);
547 } else if( NOTNULL(rhs) ) {
548 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_remove_rhs\n");
549 FPRINTF(stderr," Rhs does not exist.\n");
550 }
551 }
552
553 int32 linsolqr_number_of_rhs(linsolqr_system_t sys)
554 {
555 CHECK_SYSTEM(sys);
556 return( sys->rlength );
557 }
558
559 real64 *linsolqr_get_rhs(linsolqr_system_t sys,int n)
560 {
561 struct rhs_list *rl;
562 int count;
563
564 CHECK_SYSTEM(sys);
565
566 count = sys->rlength - 1 - n;
567 if( count < 0 ) return(NULL);
568 for( rl = sys->rl ; count > 0 && NOTNULL(rl) ; rl = rl->next )
569 --count;
570 return( ISNULL(rl) ? NULL : rl->rhs );
571 }
572
573 void linsolqr_matrix_was_changed(linsolqr_system_t sys)
574 {
575 if(CHECK_SYSTEM(sys)) {
576 FPRINTF(stderr,
577 "Bad linsolqr_system_t found. matrix change message ignored.\n");
578 return;
579 }
580 sys->rowdeps = sys->coldeps = sys->factored = FALSE;
581 }
582
583 void linsolqr_rhs_was_changed(linsolqr_system_t sys, real64 *rhs)
584 {
585 struct rhs_list *rl;
586
587 if(CHECK_SYSTEM(sys)) {
588 FPRINTF(stderr,"Bad linsolqr_system_t found. rhs change ignored.\n");
589 return;
590 }
591 rl = find_rhs(sys->rl,rhs);
592 if( NOTNULL(rl) ) {
593 rl->solved = FALSE;
594 } else if( NOTNULL(rhs) ) {
595 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_rhs_was_modified\n");
596 FPRINTF(stderr," Rhs does not exist.\n");
597 }
598 }
599
600 void linsolqr_set_pivot_zero(linsolqr_system_t sys,real64 pivot_zero)
601 {
602 if(CHECK_SYSTEM(sys)) {
603 FPRINTF(stderr,"Bad linsolqr_system_t found. set_pivot_zero ignored.\n");
604 return;
605 }
606 if( pivot_zero < D_ZERO ) {
607 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_pivot_zero\n");
608 FPRINTF(stderr," Invalid pivot zero of %g\n",pivot_zero);
609 } else {
610 sys->pivot_zero = pivot_zero;
611 linsolqr_matrix_was_changed(sys);
612 }
613 }
614
615 real64 linsolqr_pivot_zero(linsolqr_system_t sys)
616 {
617 CHECK_SYSTEM(sys);
618 return( sys->pivot_zero );
619 }
620
621 void linsolqr_set_pivot_tolerance(linsolqr_system_t sys, real64 ptol)
622 {
623 if(CHECK_SYSTEM(sys)) {
624 FPRINTF(stderr,"Bad linsolqr_system_t found. set_pivot_tol ignored.\n");
625 return;
626 }
627 if( ptol < D_ZERO || ptol > D_ONE ) {
628 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_pivot_tolerance\n");
629 FPRINTF(stderr," Invalid pivot tolerance of %g\n",ptol);
630 } else {
631 sys->ptol = ptol;
632 linsolqr_matrix_was_changed(sys);
633 }
634 }
635
636 void linsolqr_set_condition_tolerance(linsolqr_system_t sys, real64 ctol)
637 {
638 if(CHECK_SYSTEM(sys)) {
639 FPRINTF(stderr,
640 "Bad linsolqr_system_t found. set_condition_tolerance ignored.\n");
641 return;
642 }
643 if( ctol < D_ZERO || ctol > D_ONE ) {
644 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_condition_tolerance\n");
645 FPRINTF(stderr," Invalid condition tolerance of %g\n",ctol);
646 } else {
647 sys->ctol = ctol;
648 linsolqr_matrix_was_changed(sys);
649 }
650 }
651
652 void linsolqr_set_drop_tolerance(linsolqr_system_t sys, real64 dtol)
653 {
654 if(CHECK_SYSTEM(sys)) {
655 FPRINTF(stderr,
656 "Bad linsolqr_system_t found. set_drop_tolerance ignored.\n");
657 return;
658 }
659 if( dtol < D_ZERO || dtol > D_ONE ) {
660 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_drop_tolerance\n");
661 FPRINTF(stderr," Invalid drop tolerance of %g\n",dtol);
662 } else {
663 sys->dtol = dtol;
664 linsolqr_matrix_was_changed(sys);
665 }
666 }
667
668 real64 linsolqr_pivot_tolerance(linsolqr_system_t sys)
669 {
670 CHECK_SYSTEM(sys);
671 return( sys->ptol );
672 }
673
674 real64 linsolqr_condition_tolerance(linsolqr_system_t sys)
675 {
676 CHECK_SYSTEM(sys);
677 return( sys->ctol );
678 }
679
680 real64 linsolqr_drop_tolerance(linsolqr_system_t sys)
681 {
682 CHECK_SYSTEM(sys);
683 return( sys->dtol );
684 }
685
686 extern enum factor_class linsolqr_fclass(linsolqr_system_t sys)
687 {
688 CHECK_SYSTEM(sys);
689 return( sys->fclass );
690 }
691 extern enum factor_method linsolqr_fmethod(linsolqr_system_t sys)
692 {
693 CHECK_SYSTEM(sys);
694 return( sys->fmethod );
695 }
696 extern enum reorder_method linsolqr_rmethod(linsolqr_system_t sys)
697 {
698 CHECK_SYSTEM(sys);
699 return( sys->rmethod );
700 }
701
702 int32 linsolqr_rank(linsolqr_system_t sys)
703 {
704 CHECK_SYSTEM(sys);
705 if( !sys->factored ) {
706 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_rank\n");
707 FPRINTF(stderr," System not factored yet.\n");
708 }
709 return(sys->rank);
710 }
711
712 real64 linsolqr_smallest_pivot(linsolqr_system_t sys)
713 {
714 CHECK_SYSTEM(sys);
715 #if LINSOL_DEBUG
716 if( !sys->factored ) {
717 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_smallest_pivot\n");
718 FPRINTF(stderr," System not factored yet.\n");
719 }
720 #endif
721 return(sys->smallest_pivot);
722 }
723
724 /***************************************************************************\
725 Commonly used internal functions for sparse linear solvers based on mtx.
726 void insure_capacity(sys)
727 void insure_lu_capacity(sys)
728 void insure_qr_capacity(sys)
729 void forward_substitute(sys,rvec,transpose)
730 void backward_substitute(sys,rvec,transpose)
731 macro SQR(x)
732 int find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi)
733 \***************************************************************************/
734
735 static real64 *raise_capacity(real64 *vec,
736 int32 oldcap,
737 int32 newcap)
738 /**
739 *** Raises the capacity of the array and returns a new array.
740 *** It is assumed that oldcap < newcap. vec is destroyed or
741 *** returned as appropriate.
742 *** If !NDEBUG, the vector expanded is also set to 0.
743 **/
744 {
745 real64 *newvec=NULL;
746 int i;
747 if (newcap < oldcap) {
748 #ifndef NDEBUG
749 for (i = 0; i < newcap; i++) {
750 newvec[i] = 0.0;
751 }
752 #endif
753 return vec;
754 }
755 if (NOTNULL(vec)) {
756 /* don't call realloc on null with newcap 0 or it frees */
757 newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64)));
758 } else {
759 newvec=(newcap > 0 ?
760 (real64 *)ascmalloc( newcap * sizeof(real64) ) : NULL);
761 }
762 #ifndef NDEBUG
763 for (i = 0; i < newcap; i++) {
764 newvec[i] = 0.0;
765 }
766 #endif
767 return newvec;
768 }
769
770 static int32 *raise_qr_int_capacity(int32 *vec,
771 int32 oldcap,
772 int32 newcap)
773 /**
774 *** Raises the capacity of the index array and returns a new array.
775 *** It is assumed that oldcap < newcap. vec is destroyed or
776 *** returned as appropriate. vec returned is zeroed.
777 *** calling with newcap=0 does not force deallocation.
778 **/
779 {
780 int32 *newvec=NULL;
781 if (newcap < oldcap)
782 return vec;
783 if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
784 newvec=(int32 *)ascrealloc(vec,(newcap * sizeof(int32)));
785 mtx_zero_int32(vec,newcap);
786 } else {
787 newvec= (newcap > 0 ?
788 (int32 *)asccalloc( newcap , sizeof(int32) ) : NULL);
789 }
790 return newvec;
791 }
792
793 static real64 *raise_qr_real_capacity(real64 *vec,
794 int32 oldcap,
795 int32 newcap)
796 /**
797 *** Raises the capacity of the real array and returns a new array.
798 *** It is assumed that oldcap < newcap. vec is destroyed or
799 *** returned as appropriate. vec returned is zeroed.
800 *** calling with newcap=0 does not force deallocation.
801 **/
802 {
803 real64 *newvec=NULL;
804 if (newcap < oldcap)
805 return vec;
806 if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
807 newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64)));
808 mtx_zero_real64(vec,newcap);
809 } else {
810 newvec= (newcap > 0 ?
811 (real64 *)asccalloc( newcap , sizeof(real64) ) : NULL);
812 }
813 return newvec;
814 }
815
816 static struct qr_fill_t *raise_qr_fill_capacity(struct qr_fill_t *vec,
817 int32 oldcap,
818 int32 newcap)
819 /**
820 *** Raises the capacity of the fill array and returns a new array.
821 *** It is assumed that oldcap < newcap. vec is destroyed or
822 *** returned as appropriate. vec returned is zeroed.
823 *** calling with newcap=0 does not force deallocation.
824 **/
825 {
826 struct qr_fill_t *newvec=NULL;
827 if (newcap < oldcap)
828 return vec;
829 if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
830 newvec=(struct qr_fill_t *)
831 ascrealloc(vec,(newcap * sizeof(struct qr_fill_t)));
832 mtx_zero_char((char *)vec,newcap*sizeof(struct qr_fill_t));
833 } else {
834 newvec= (newcap > 0 ?
835 (struct qr_fill_t *)asccalloc( newcap , sizeof(struct qr_fill_t) ) :
836 NULL);
837 }
838 return newvec;
839 }
840
841 static void insure_capacity(linsolqr_system_t sys)
842 /**
843 *** Insures that the capacity of all of the solution vectors
844 *** for each rhs is large enough.
845 *** The above implies a malloc if varvalue is null.
846 *** Assumes varvalue are at sys->capacity already.
847 **/
848 {
849 int32 req_cap;
850 req_cap = mtx_capacity(sys->coef);
851
852 if( req_cap > sys->capacity ) {
853 struct rhs_list *rl;
854
855 for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
856 rl->varvalue = raise_capacity(rl->varvalue,sys->capacity,req_cap);
857 sys->capacity = req_cap;
858 }
859 }
860
861 static void insure_lu_capacity(linsolqr_system_t sys)
862 /**
863 *** Insures that the capacity of all of the ludata vectors.
864 *** The above implies a malloc if vector is null.
865 *** If not null, implies an extension if needed.
866 **/
867 {
868 int32 req_cap;
869 if (!sys || !(sys->coef) || !(sys->ludata)) {
870 FPRINTF(stderr,"linsolqr: insure_lu_capacity called with NULL pointer");
871 return;
872 }
873 req_cap = mtx_capacity(sys->coef);
874
875 if( req_cap > sys->ludata->cap ) {
876 sys->ludata->pivlist =
877 raise_capacity(sys->ludata->pivlist,sys->ludata->cap,req_cap);
878 sys->ludata->tmp =
879 raise_capacity(sys->ludata->tmp,sys->ludata->cap,req_cap);
880 sys->ludata->cap = req_cap;
881 }
882 }
883
884 static void insure_qr_capacity(linsolqr_system_t sys)
885 /**
886 *** Insures that the capacity of all of the qrdata vectors
887 *** is large enough.
888 *** The above implies a calloc if vector is null.
889 *** If not null, implies an extension and zeroing of the vector.
890 *** Also zeroes the simple elements of the qrdata.
891 **/
892 {
893 int32 req_cap;
894 if (!sys || !(sys->coef) || !(sys->qrdata)) {
895 FPRINTF(stderr,"linsolqr: insure_qr_capacity called with NULL pointer");
896 return;
897 }
898 req_cap = mtx_capacity(sys->coef);
899
900 if( req_cap > sys->qrdata->cap ) {
901 sys->qrdata->alpha =
902 raise_qr_real_capacity(sys->qrdata->alpha,sys->qrdata->cap,req_cap);
903 sys->qrdata->sigma =
904 raise_qr_real_capacity(sys->qrdata->sigma,sys->qrdata->cap,req_cap);
905 sys->qrdata->tau =
906 raise_qr_real_capacity(sys->qrdata->tau,sys->qrdata->cap,req_cap);
907 sys->qrdata->hhcol =
908 raise_qr_real_capacity(sys->qrdata->hhcol,sys->qrdata->cap,req_cap);
909 sys->qrdata->hhrow =
910 raise_qr_real_capacity(sys->qrdata->hhrow,sys->qrdata->cap,req_cap);
911
912 sys->qrdata->sp.data =
913 raise_qr_real_capacity(sys->qrdata->sp.data,sys->qrdata->cap,req_cap);
914 sys->qrdata->sp.idata =
915 raise_qr_int_capacity(sys->qrdata->sp.idata,sys->qrdata->cap,req_cap);
916
917 sys->qrdata->fill =
918 raise_qr_fill_capacity(sys->qrdata->fill,sys->qrdata->cap,req_cap);
919
920 sys->qrdata->cap = req_cap;
921 sys->qrdata->sp.cap = req_cap;
922 sys->qrdata->sp.len = 0;
923 }
924 sys->qrdata->nu=D_ZERO;
925 sys->qrdata->anu=D_ZERO;
926 sys->qrdata->asubnu=D_ZERO;
927 }
928
929 static void forward_substitute(linsolqr_system_t sys,
930 real64 *arr,
931 boolean transpose)
932 /**
933 *** Forward substitute. It is assumed that the L (or U) part of
934 *** sys->factors is computed. This function converts c to x in place. The
935 *** values are stored in arr indexed by original row number (or original
936 *** column number).
937 ***
938 *** transpose = FALSE: transpose = TRUE:
939 *** T
940 *** L x = c U x = c
941 ***
942 *** The following formula holds:
943 *** 0<=k<r ==> x(k) = [c(k) - L(k,(0..k-1)) dot x(0..k-1)] / L(k,k)
944 *** or
945 *** 0<=k<r ==> x(k) = [c(k) - U((0..k-1),k) dot x(0..k-1)] / U(k,k)
946 ***
947 *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k]
948 *** and in matrix.
949 **/
950 {
951 mtx_range_t dot_rng;
952 mtx_coord_t nz;
953 real64 sum, *pivlist;
954 mtx_matrix_t mtx;
955 int32 dotlim;
956 boolean nonzero_found=FALSE;
957
958 mtx=sys->factors;
959 pivlist=sys->ludata->pivlist;
960 dot_rng.low = sys->rng.low;
961 dotlim=dot_rng.low+sys->rank;
962 if (transpose) { /* arr is indexed by original column number */
963 for( nz.col=dot_rng.low; nz.col < dotlim; ++(nz.col) ) {
964 register int32 org_col;
965
966 dot_rng.high = nz.col - 1;
967 org_col = mtx_col_to_org(mtx,nz.col);
968 if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
969 if (nonzero_found) {
970 sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE);
971 /* arr[org_col] = (arr[org_col] - sum) / D_ONE */;
972 arr[org_col] -=sum;
973 }
974 }
975
976 } else { /* arr is indexed by original row number */
977 for( nz.row=dot_rng.low; nz.row < dotlim; ++(nz.row) ) {
978 register int32 org_row;
979
980 dot_rng.high = nz.row - 1;
981 org_row = mtx_row_to_org(mtx,nz.row);
982 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
983 if (nonzero_found) {
984 sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE);
985 /*
986 nz.col = nz.row;
987 arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz);
988 */
989 arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row];
990 }
991 }
992 }
993 }
994
995 static void backward_substitute(linsolqr_system_t sys,
996 real64 *arr,
997 boolean transpose)
998 /**
999 *** Backward substitute. It is assumed that the U (or L) part of
1000 *** sys->factors is computed. This function converts rhs to c in place. The
1001 *** values are stored in arr indexed by original row number (or original
1002 *** column number).
1003 ***
1004 *** transpose = FALSE: transpose = TRUE:
1005 *** T
1006 *** U c = rhs L c = rhs
1007 ***
1008 *** The following formula holds:
1009 *** transpose=FALSE: (the usual for J.dx=-f where rhs is -f)
1010 *** 0<=k<r ==> c(k) = [rhs(k) - U(k,(k+1..r-1)) dot c(k+1..r-1)] / U(k,k)
1011 *** working up from the bottom.
1012 *** or
1013 *** transpose=TRUE:
1014 *** 0<=k<r ==> c(k) = [rhs(k) - L((k+1..r-1),k) dot c(k+1..r-1)] / L(k,k)
1015 *** working right to left.
1016 ***
1017 *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k]
1018 *** and in matrix.
1019 **/
1020 {
1021 mtx_range_t dot_rng;
1022 mtx_coord_t nz;
1023 real64 sum, *pivlist;
1024 mtx_matrix_t mtx;
1025 int32 dotlim;
1026 boolean nonzero_found=FALSE; /* once TRUE, substitution must be done
1027 over remaining rows/cols */
1028
1029 dot_rng.high = sys->rng.low + sys->rank - 1;
1030 dotlim=sys->rng.low;
1031 mtx=sys->factors;
1032 pivlist=sys->ludata->pivlist;
1033 if (transpose) { /* arr is indexed by original column number */
1034 for( nz.col = dot_rng.high ; nz.col >= dotlim ; --(nz.col) ) {
1035 register int32 org_col;
1036
1037 dot_rng.low = nz.col + 1;
1038
1039 org_col = mtx_col_to_org(mtx,nz.col);
1040 if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
1041 if (nonzero_found) {
1042 sum= mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE);
1043 /*
1044 reminders:
1045 nz.row = nz.col;
1046 arr[org_col] = (arr[org_col] - sum) / mtx_value(mtx,&nz);
1047 */
1048 arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col];
1049 }
1050 }
1051 } else { /* arr is indexed by original row number */
1052 /* we are working from the bottom up */
1053 for( nz.row = dot_rng.high ; nz.row >= dotlim ; --(nz.row) ) {
1054 register int32 org_row;
1055
1056 dot_rng.low = nz.row + 1;
1057 org_row = mtx_row_to_org(mtx,nz.row);
1058
1059 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
1060 if (nonzero_found) {
1061 sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE);
1062 /*
1063 reminders:
1064 nz.row = nz.col;
1065 arr[org_row] = (arr[org_row] - sum) / D_ONE;
1066 */
1067 arr[org_row] -= sum;
1068 }
1069 }
1070 }
1071 }
1072
1073 #ifdef SQR
1074 #undef SQR
1075 #endif
1076 #define SQR(x) ((x)*(x))
1077 /*
1078 * int32 find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi);
1079 * real64 *vec,*rvec,tol;
1080 * int32 len, *ivec, *maxi;
1081 *
1082 * Search array vec of positive numbers for the first entry, k, which passes
1083 * (vec[k] >= tol*vecmax) where vecmax is the largest value in vec.
1084 * vec is an array len long. rvec, ivec are (worst case) len long.
1085 * *maxi will be set to the index of the first occurence of the largest
1086 * number in vec which is >= eps.
1087 * 0.0 <= tol <= 1.
1088 * If tol <=0, no search is done (*maxi = 0, k = 0).
1089 * Eps is an absolute number which vec values must be >= to before
1090 * being eligible for the tol test.
1091 *
1092 * Best case, highly nonlinear feature:
1093 * if on entry *maxi = len, we will do a simplified search for the
1094 * first k which satisfies tol and eps tests, based on the value
1095 * stored at vec[len-1].
1096 *
1097 * We could allocate and deallocate rvec/ivec since they are
1098 * temporaries, but it is more efficient for caller to manage that.
1099 * If tol>=1.0, rvec and ivec are ignored and may be NULL.
1100 * NOTE: we are going on value vec[i], not fabs(vec[i]).
1101 * Returns k.
1102 * Remember: GIGO.
1103 * Failure modes- if all numbers are <= 0.0, k and *maxi will be returned 0.
1104 * Failure modes- if all numbers are < eps, k and *maxi will be returned 0.
1105 *
1106 * find_pivot_index, should we implement, would search an int vector.
1107 *
1108 * Hint: if you know you have not increased any values in vec and have not
1109 * changed the value at vec[maxi] before a subsequent search, then call
1110 * with len = previous maxi+1 to reduce the search time.
1111 */
1112 static int32 find_pivot_number(const real64 *vec,
1113 const int32 len,
1114 const real64 tol,
1115 const real64 eps,
1116 int32 * const ivec,
1117 real64 * const rvec,
1118 int32 * const maxi)
1119 {
1120 int32 j,k;
1121 if (tol <= D_ZERO ) {
1122 *maxi = 0;
1123 return 0;
1124 }
1125 if (tol >= D_ONE) {
1126 register real64 biggest = MAX(eps,D_ZERO);
1127 /* get the biggest, period */
1128 k = 0;
1129 for (j=0; j < len; j++) {
1130 if (vec[j] > biggest) {
1131 biggest = vec[j];
1132 k = j;
1133 }
1134 }
1135 *maxi = k;
1136 } else {
1137 int32 bigi;
1138 bigi = 0;
1139 rvec[0] = eps;
1140 ivec[0] = 0;
1141 if (*maxi==len) {
1142 /* cheap search against eps and tol, no list. */
1143 register real64 thold;
1144 thold = tol * vec[len-1];
1145 if (thold >= eps) {
1146 for (k = 0; k < len && vec[k] < thold; k++);
1147 } else {
1148 for (k = 0; k < len && vec[k] < eps; k++);
1149 }
1150 /* adjust maxi to still point at last location, as indicated by len */
1151 (*maxi)--;
1152 } else {
1153 real64 rlast;
1154 rlast = eps;
1155 /* create short list */
1156 for (j=0; j < len; j++) {
1157 while (j < len && vec[j] <= rlast) j++; /* skip to next max */
1158 if (j < len) {
1159 ivec[bigi] = j;
1160 rvec[bigi] = vec[j];
1161 bigi++; /* bigi ends up being len of rvec or, if vec all<= eps, 0 */
1162 rlast = rvec[bigi-1];
1163 }
1164 }
1165 if (bigi == 0) {
1166 *maxi = k = 0;
1167 } else {
1168 register real64 thold;
1169 /* Note: if bigi is enormous, we should do a binary search,
1170 not linear. */
1171 *maxi = ivec[bigi-1];
1172 thold = tol * rvec[bigi-1];
1173 /* get pivot from shortlist, searching backward */
1174 if (thold >= eps) {
1175 for (k = bigi-1; k >= 0 && rvec[k] >= thold; k--);
1176 } else {
1177 for (k = bigi-1; k >= 0 && rvec[k] >= eps; k--);
1178 }
1179 /* translate pivot to vec index */
1180 k = ivec[k+1];
1181 }
1182 }
1183 }
1184 return k;
1185 }
1186
1187 static void calc_dependent_rows_ranki1(linsolqr_system_t sys)
1188 {
1189 mtx_coord_t nz;
1190 real64 value;
1191 mtx_range_t colrange;
1192 mtx_range_t rowrange;
1193 real64 *lc;
1194 mtx_matrix_t mtx;
1195
1196 sys->rowdeps = TRUE;
1197 if( ( (sys->reg.row.low == sys->rng.low) &&
1198 ( sys->reg.row.high == sys->rng.low+sys->rank-1 )
1199 ) || sys->rank==0 )
1200 return;
1201
1202 lc = sys->ludata->tmp;
1203 colrange.low = sys->rng.low;
1204 colrange.high = colrange.low + sys->rank - 1;
1205 rowrange.low = sys->rng.high;
1206 rowrange.high = sys->rng.low+sys->rank;
1207 mtx=sys->factors;
1208
1209 nz.row = sys->reg.row.low;
1210 for( ; nz.row <= sys->reg.row.high; nz.row++ ) {
1211 if( nz.row == sys->rng.low ) {
1212 nz.row = rowrange.high-1;
1213 continue;
1214 }
1215 mtx_zero_real64(lc,(sys->capacity));
1216 mtx_org_row_vec(mtx,nz.row,lc,&colrange);
1217 if( nz.row < rowrange.high || nz.row > rowrange.low )
1218 backward_substitute(sys,lc,TRUE);
1219 forward_substitute(sys,lc,TRUE);
1220 mtx_clear_row(mtx,nz.row,&colrange);
1221 for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) {
1222 value = lc[mtx_col_to_org(mtx,nz.col)];
1223 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
1224 }
1225 }
1226 }
1227
1228
1229 static void calc_dependent_cols_ranki1(linsolqr_system_t sys)
1230 {
1231 mtx_coord_t nz;
1232 real64 value;
1233 mtx_range_t rowrange;
1234 mtx_range_t colrange;
1235 real64 *lc;
1236 mtx_matrix_t mtx;
1237
1238 sys->coldeps = TRUE;
1239 if( ( (sys->reg.col.low == sys->rng.low) &&
1240 ( sys->reg.col.high == sys->rng.low+sys->rank-1 )
1241 ) || sys->rank==0 )
1242 return;
1243
1244 lc = sys->ludata->tmp;
1245 rowrange.low = sys->rng.low;
1246 rowrange.high = rowrange.low + sys->rank - 1;
1247 colrange.high = sys->rng.low+sys->rank;
1248 colrange.low = sys->rng.high;
1249 mtx=sys->factors;
1250
1251 nz.col = sys->reg.col.low;
1252 for( ; nz.col <= sys->reg.col.high; nz.col++ ) {
1253 if( nz.col == sys->rng.low ) {
1254 nz.col = colrange.high-1;
1255 continue;
1256 }
1257 mtx_zero_real64(lc,sys->capacity);
1258 mtx_org_col_vec(mtx,nz.col,lc,&rowrange);
1259 if( nz.col < colrange.high || nz.col > colrange.low )
1260 backward_substitute(sys,lc,FALSE);
1261 forward_substitute(sys,lc,FALSE);
1262 mtx_clear_col(mtx,nz.col,&rowrange);
1263 for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) {
1264 value = lc[mtx_row_to_org(mtx,nz.row)];
1265 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
1266 }
1267 }
1268 }
1269
1270 /**
1271 *** Given a matrix and a diagonal range, sets the diagonal elements to 0
1272 *** but does not delete them in the range low to high inclusive.
1273 *** worst cost= k*(nonzeros in rows of range treated).
1274 *** likely cost= k*(nonzeros in column pivoted rows of range treated)
1275 *** since you can smartly put in the diagonal last and find
1276 *** it first on many occasions.
1277 *** This is a good bit cheaper than deleting the elements unless one
1278 *** expects thousands of solves.
1279 **/
1280 static void zero_diagonal_elements(mtx_matrix_t mtx,
1281 int32 low, int32 high)
1282 {
1283 mtx_coord_t nz;
1284 for (nz.row = low; nz.row <= high; nz.row++) {
1285 nz.col = nz.row;
1286 mtx_set_value(mtx,&nz,0.0);
1287 }
1288 }
1289
1290 static void zero_unpivoted_vars(linsolqr_system_t sys,
1291 real64 *varvalues,
1292 boolean transpose)
1293 /**
1294 *** Sets the values of unpivoted variables to zero.
1295 **/
1296 {
1297 int32 ndx,order;
1298 mtx_matrix_t mtx;
1299
1300 mtx = sys->factors;
1301 order = mtx_order(mtx);
1302 for( ndx = 0 ; ndx < sys->rng.low ; ++ndx )
1303 if (transpose)
1304 varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO;
1305 else
1306 varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO;
1307
1308 for( ndx = sys->rng.low + sys->rank ; ndx < order ; ++ndx )
1309 if (transpose)
1310 varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO;
1311 else
1312 varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO;
1313 }
1314
1315 #if LINSOL_DEBUG
1316 static void debug_out_factors(FILE *fp,linsolqr_system_t sys)
1317 /**
1318 *** Outputs permutation and values of the nonzero elements in the
1319 *** factor matrix square region given by sys->rng.
1320 **/
1321 {
1322 mtx_region_t reg;
1323 reg.row.low=reg.col.low=sys->rng.low;
1324 reg.row.high=reg.col.high=sys->rng.high;
1325 mtx_write_region(fp,sys->factors,&reg);
1326 }
1327 #endif /* LINSOL_DEBUG */
1328
1329 /***************************************************************************\
1330 Reordering functions for SPK1, and possibly for other schemes to be
1331 implemented later.
1332 The stuff here is almost, but not quite, black magic. Don't tinker with it.
1333 \***************************************************************************/
1334
1335 /*********************************
1336 begin of spk1 stuff
1337 *********************************/
1338 struct reorder_list { /* List of rows/columns and their counts. */
1339 int32 ndx;
1340 int32 count;
1341 struct reorder_list *next;
1342 };
1343
1344 struct reorder_vars {
1345 mtx_matrix_t mtx;
1346 mtx_region_t reg; /* Current active region */
1347 int32 colhigh; /* Original value of reg.col.high */
1348 struct reorder_list *tlist; /* Array of (enough) list elements */
1349 int32 *rowcount; /* rowcount[reg.row.low .. reg.row.high] */
1350 };
1351
1352 static void adjust_row_count(struct reorder_vars *vars,int32 removed_col)
1353 /**
1354 *** Adjusts the row counts to account for the (to be) removed column.
1355 **/
1356 {
1357 mtx_coord_t nz;
1358 real64 value;
1359 nz.col = removed_col;
1360 nz.row = mtx_FIRST;
1361 while( value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row)),
1362 nz.row != mtx_LAST )
1363 --(vars->rowcount[nz.row]);
1364 }
1365
1366 static void assign_row_and_col(struct reorder_vars *vars,
1367 int32 row,
1368 int32 col)
1369 /**
1370 *** Assigns the given row to the given column, moving the row and column
1371 *** to the beginning of the active region and removing them (readjusting
1372 *** the region). The row counts are NOT adjusted. If col == mtx_NONE,
1373 *** then no column is assigned and the row is moved to the end of the
1374 *** active block instead. Otherwise, it is assumed that the column
1375 *** is active.
1376 **/
1377 {
1378 if( col == mtx_NONE ) {
1379 mtx_swap_rows(vars->mtx,row,vars->reg.row.high);
1380 vars->rowcount[row] = vars->rowcount[vars->reg.row.high];
1381 --(vars->reg.row.high);
1382 } else {
1383 mtx_swap_rows(vars->mtx,row,vars->reg.row.low);
1384 vars->rowcount[row] = vars->rowcount[vars->reg.row.low];
1385 mtx_swap_cols(vars->mtx,col,vars->reg.col.low);
1386 ++(vars->reg.row.low);
1387 ++(vars->reg.col.low);
1388 }
1389 }
1390
1391 static void push_column_on_stack(struct reorder_vars *vars,int32 col)
1392 /**
1393 *** Pushes the given column onto the stack. It is assumed that the
1394 *** column is active. Row counts are adjusted.
1395 **/
1396 {
1397 adjust_row_count(vars,col);
1398 mtx_swap_cols(vars->mtx,col,vars->reg.col.high);
1399 --(vars->reg.col.high);
1400 }
1401
1402 static int32 pop_column_from_stack(struct reorder_vars *vars)
1403 /**
1404 *** Pops the column on the "top" of the stack off of the stack and
1405 *** returns the column index, where it now lies in the active region.
1406 *** If the stack is empty, mtx_NONE is returned. Row counts are NOT
1407 *** adjusted (this together with a subsequent assignment of this column
1408 *** ==> no row count adjustment necessary).
1409 **/
1410 {
1411 if( vars->reg.col.high < vars->colhigh )
1412 return(++(vars->reg.col.high));
1413 else
1414 return( mtx_NONE );
1415 }
1416
1417 static void assign_null_rows(struct reorder_vars *vars)
1418 /**
1419 *** Assigns empty rows, moving them to the assigned region. It is
1420 *** assumed that row counts are correct. Columns are assigned off the
1421 *** stack.
1422 **/
1423 {
1424 int32 row;
1425
1426 for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1427 if( vars->rowcount[row] == 0 )
1428 assign_row_and_col(vars , row , pop_column_from_stack(vars));
1429 }
1430
1431 static void forward_triangularize(struct reorder_vars *vars)
1432 /**
1433 *** Forward triangularizes the region, assigning singleton rows with their
1434 *** one and only incident column until there are no more. The row counts
1435 *** must be correct, and they are updated.
1436 **/
1437 {
1438 boolean change;
1439 mtx_coord_t nz;
1440 real64 value;
1441
1442 do {
1443 change = FALSE;
1444 for( nz.row = vars->reg.row.low ;
1445 nz.row <= vars->reg.row.high && vars->rowcount[nz.row] != 1;
1446 ++nz.row ) ;
1447 if( nz.row <= vars->reg.row.high ) {
1448 /* found singleton row */
1449 nz.col = mtx_FIRST; /* this is somehow coming back with nz.col -1 */
1450 value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col));
1451 adjust_row_count(vars,nz.col);
1452 assign_row_and_col(vars,nz.row,nz.col);
1453 change = TRUE;
1454 }
1455 } while( change );
1456 }
1457
1458 static int32 select_row(struct reorder_vars *vars)
1459 /**
1460 *** Selects a row and returns its index. It is assumed that there is a
1461 *** row. Row counts must be correct. vars->tlist will be used.
1462 **/
1463 {
1464 int32 min_row_count;
1465 int32 max_col_count;
1466 int32 row;
1467 int32 i, nties=-2; /* # elements currently defined in vars->tlist */
1468 int32 sum;
1469 mtx_coord_t nz;
1470 real64 value;
1471 mtx_matrix_t mtx;
1472 mtx_range_t *colrng, *rowrng;
1473
1474 /* Set to something > any possible value */
1475 min_row_count = vars->reg.col.high-vars->reg.col.low+2;
1476 for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1477 if( vars->rowcount[row] <= min_row_count ) {
1478 if( vars->rowcount[row] < min_row_count ) {
1479 min_row_count = vars->rowcount[row];
1480 nties = 0;
1481 }
1482 vars->tlist[nties++].ndx = row;
1483 }
1484 /**
1485 *** vars->tlist[0..nties-1] is a list of row numbers which tie for
1486 *** minimum row count.
1487 **/
1488
1489 max_col_count = -1; /* < any possible value */
1490 i = 0;
1491 mtx = vars->mtx;
1492 colrng=&(vars->reg.col);
1493 rowrng=&(vars->reg.row);
1494 while( i < nties ) {
1495
1496 sum = 0;
1497 nz.row = vars->tlist[i].ndx;
1498 nz.col = mtx_FIRST;
1499 while( value = mtx_next_in_row(mtx,&nz,colrng),
1500 nz.col != mtx_LAST )
1501 sum += mtx_nonzeros_in_col(mtx,nz.col,rowrng);
1502 if( sum > max_col_count ) {
1503 max_col_count = sum;
1504 row = nz.row;
1505 }
1506 i++;
1507 }
1508 /**
1509 *** Now row contains the row with the minimum row count, which has the
1510 *** greatest total column count of incident columns among all rows with
1511 *** the same (minimum) row count. Select it.
1512 **/
1513 return(row);
1514 }
1515
1516 static void spk1_reorder(struct reorder_vars *vars)
1517 /**
1518 *** Reorders the assigned matrix vars->mtx within the specified bounding
1519 *** block region vars->reg. The region is split into 6 subregions during
1520 *** reordering: the rows are divided in two, and the columns divided in
1521 *** three. Initially everything is in the active subregion. Ultimately,
1522 *** everything will be assigned.
1523 ***
1524 *** <-- assigned -->|<-- active-->|<-- on stack -->|
1525 *** ----+----------------+-------------+----------------+
1526 *** a | | | |
1527 *** s | | | |
1528 *** s | | | |
1529 *** i | (SQUARE) | | |
1530 *** g | | | |
1531 *** n | | | |
1532 *** e | | | |
1533 *** d | | | |
1534 *** ----+----------------+-------------+----------------+
1535 *** a | | | |
1536 *** c | | ACTIVE | |
1537 *** t | | REGION | |
1538 *** i | | | |
1539 *** v | | | |
1540 *** e | | | |
1541 *** ----+----------------+-------------+----------------+
1542 ***
1543 *** The algorithm is roughly as follows:
1544 *** (1) select a row (by some criterion).
1545 *** (2) push columns incident on that row onto the stack in decreasing
1546 *** order of their length.
1547 *** (3) pop first column off the stack and assign it to the selected
1548 *** row.
1549 *** (4) forward-triangularize (assign singleton rows with their one
1550 *** and only incident column, until no longer possible).
1551 ***
1552 *** (1)-(4) should be repeated until the active subregion becomes empty.
1553 ***
1554 *** Everything above was written as though the entire matrix is
1555 *** involved. In reality, only the relevant square region is involved.
1556 **/
1557 {
1558 int32 row, size;
1559 int32 *rowcount_array_origin;
1560 mtx_matrix_t mtx;
1561
1562 size = MAX(vars->reg.row.high,vars->reg.col.high) + 1;
1563 vars->tlist = size > 0 ? (struct reorder_list *)
1564 ascmalloc( size*sizeof(struct reorder_list) ) : NULL;
1565 vars->rowcount = rowcount_array_origin = size > 0 ? (int32 *)
1566 ascmalloc( size*sizeof(int32) ) : NULL;
1567 mtx = vars->mtx;
1568
1569 vars->colhigh = vars->reg.col.high;
1570 /* Establish row counts */
1571 for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1572 vars->rowcount[row] =
1573 mtx_nonzeros_in_row(mtx,row,&(vars->reg.col));
1574
1575 while(TRUE) {
1576 struct reorder_list *head;
1577 int32 nelts; /* # elements "allocated" from vars->tlist */
1578 mtx_coord_t nz;
1579 real64 value;
1580
1581 forward_triangularize(vars);
1582 assign_null_rows(vars);
1583 if( vars->reg.row.low>vars->reg.row.high ||
1584 vars->reg.col.low>vars->reg.col.high ) {
1585 /* Active region is now empty, done */
1586 if( NOTNULL(vars->tlist) )
1587 ascfree( vars->tlist );
1588 if( NOTNULL(rowcount_array_origin) )
1589 ascfree( rowcount_array_origin );
1590 return;
1591 }
1592
1593 head = NULL;
1594 nelts = 0;
1595 nz.row = select_row(vars);
1596 nz.col = mtx_FIRST;
1597 while( value = mtx_next_in_row(mtx,&nz,&(vars->reg.col)),
1598 nz.col != mtx_LAST ) {
1599 struct reorder_list **q,*p;
1600
1601 p = &(vars->tlist[nelts++]);
1602 p->ndx = mtx_col_to_org(mtx,nz.col);
1603 p->count = mtx_nonzeros_in_col(mtx,nz.col,&(vars->reg.row));
1604 for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) );
1605 p->next = *q;
1606 *q = p;
1607 }
1608 /**
1609 *** We now have a list of columns which intersect the selected row.
1610 *** The list is in order of decreasing column count.
1611 **/
1612
1613 /* Push incident columns on stack */
1614 for( ; NOTNULL(head) ; head = head->next )
1615 push_column_on_stack(vars,mtx_org_to_col(mtx,head->ndx));
1616
1617 /* Pop column off stack and assign to selected row */
1618 assign_row_and_col(vars , nz.row , pop_column_from_stack(vars));
1619 }
1620 /* Not reached. */
1621 }
1622
1623 /*********************************
1624 end of spk1 stuff
1625 *********************************/
1626 /*********************************
1627 begin of tspk1 stuff
1628 *********************************/
1629 struct creorder_list { /* List of columns/rows and their counts. */
1630 int32 ndx;
1631 int32 count;
1632 struct creorder_list *next;
1633 };
1634
1635 struct creorder_vars {
1636 mtx_matrix_t mtx;
1637 mtx_region_t reg; /* Current active region */
1638 int32 rowhigh; /* Original value of reg.row.high */
1639 struct creorder_list *tlist; /* Array of (enough) list elements */
1640 int32 *colcount; /* colcount[reg.col.low .. reg.col.high] */
1641 };
1642
1643 static void adjust_col_count(struct creorder_vars *vars,int32 removed_row)
1644 /**
1645 *** Adjusts the column counts to account for the (to be) removed row.
1646 **/
1647 {
1648 mtx_coord_t nz;
1649 real64 value;
1650 nz.row = removed_row;
1651 nz.col = mtx_FIRST;
1652 while( value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col)),
1653 nz.col != mtx_LAST )
1654 --(vars->colcount[nz.col]);
1655 }
1656
1657 static void assign_col_and_row(struct creorder_vars *vars,
1658 int32 col,
1659 int32 row)
1660 /**
1661 *** Assigns the given row to the given column, moving the row and column
1662 *** to the beginning of the active region and removing them (readjusting
1663 *** the region). The col counts are NOT adjusted. If col == mtx_NONE,
1664 *** then no column is assigned and the col is moved to the end of the
1665 *** active block instead. Otherwise, it is assumed that the row
1666 *** is active.
1667 **/
1668 {
1669 if( row == mtx_NONE ) {
1670 mtx_swap_cols(vars->mtx,col,vars->reg.col.high);
1671 vars->colcount[col] = vars->colcount[vars->reg.col.high];
1672 --(vars->reg.col.high);
1673 } else {
1674 mtx_swap_cols(vars->mtx,col,vars->reg.col.low);
1675 vars->colcount[col] = vars->colcount[vars->reg.col.low];
1676 mtx_swap_rows(vars->mtx,row,vars->reg.row.low);
1677 ++(vars->reg.col.low);
1678 ++(vars->reg.row.low);
1679 }
1680 }
1681
1682 static void push_row_on_stack(struct creorder_vars *vars,int32 row)
1683 /**
1684 *** Pushes the given row onto the stack. It is assumed that the
1685 *** row is active. Col counts are adjusted.
1686 **/
1687 {
1688 adjust_col_count(vars,row);
1689 mtx_swap_rows(vars->mtx,row,vars->reg.row.high);
1690 --(vars->reg.row.high);
1691 }
1692
1693 static int32 pop_row_from_stack(struct creorder_vars *vars)
1694 /**
1695 *** Pops the row on the "top" of the stack off of the stack and
1696 *** returns the row index, where it now lies in the active region.
1697 *** If the stack is empty, mtx_NONE is returned. Col counts are NOT
1698 *** adjusted (this together with a subsequent assignment of this row
1699 *** ==> no col count adjustment necessary).
1700 **/
1701 {
1702 if( vars->reg.row.high < vars->rowhigh )
1703 return(++(vars->reg.row.high));
1704 else
1705 return( mtx_NONE );
1706 }
1707
1708 static void assign_null_cols(struct creorder_vars *vars)
1709 /**
1710 *** Assigns empty cols, moving them to the assigned region. It is
1711 *** assumed that col counts are correct. Rows are assigned off the
1712 *** stack.
1713 **/
1714 {
1715 int32 col;
1716
1717 for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1718 if( vars->colcount[col] == 0 )
1719 assign_col_and_row(vars , col , pop_row_from_stack(vars));
1720 }
1721
1722 static void cforward_triangularize(struct creorder_vars *vars)
1723 /**
1724 *** Forward triangularizes the region, assigning singleton columns with their
1725 *** one and only incident row until there are no more. The column counts
1726 *** must be correct, and they are updated.
1727 **/
1728 {
1729 boolean change;
1730
1731 do {
1732 mtx_coord_t nz;
1733 real64 value;
1734 change = FALSE;
1735 for( nz.col = vars->reg.col.low ;
1736 nz.col <= vars->reg.col.high && vars->colcount[nz.col] != 1;
1737 ++nz.col ) ;
1738 if( nz.col <= vars->reg.col.high ) {
1739 /* found singleton col */
1740 nz.row = mtx_FIRST;
1741 value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row));
1742 adjust_col_count(vars,nz.row);
1743 assign_col_and_row(vars,nz.col,nz.row);
1744 change = TRUE;
1745 }
1746 } while( change );
1747 }
1748
1749 static int32 select_col(struct creorder_vars *vars)
1750 /**
1751 *** Selects a col and returns its index. It is assumed that there is a
1752 *** col. Col counts must be correct. vars->tlist will be used.
1753 **/
1754 {
1755 int32 min_col_count;
1756 int32 max_row_count;
1757 int32 col;
1758 int32 i, nties=-2; /* # elements currently defined in vars->tlist */
1759 int32 sum;
1760 mtx_coord_t nz;
1761 real64 value;
1762 mtx_matrix_t mtx;
1763 mtx_range_t *colrng, *rowrng;
1764
1765 /* Set to something > any possible value */
1766 min_col_count = vars->reg.row.high-vars->reg.row.low+2;
1767 for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1768 if( vars->colcount[col] <= min_col_count ) {
1769 if( vars->colcount[col] < min_col_count ) {
1770 min_col_count = vars->colcount[col];
1771 nties = 0;
1772 }
1773 vars->tlist[nties++].ndx = col;
1774 }
1775 /**
1776 *** vars->tlist[0..nties-1] is a list of row numbers which tie for
1777 *** minimum col count.
1778 **/
1779
1780 max_row_count = -1; /* < any possible value */
1781 i = 0;
1782 mtx = vars->mtx;
1783 rowrng=&(vars->reg.row);
1784 colrng=&(vars->reg.col);
1785 while( i < nties ) {
1786
1787 sum = 0;
1788 nz.row = mtx_FIRST;
1789 nz.col = vars->tlist[i].ndx;
1790 while( value = mtx_next_in_col(mtx,&nz,rowrng),
1791 nz.row != mtx_LAST )
1792 sum += mtx_nonzeros_in_row(mtx,nz.row,colrng);
1793 if( sum > max_row_count ) {
1794 max_row_count = sum;
1795 col = nz.col;
1796 }
1797 i++;
1798 }
1799 /**
1800 *** Now col contains the col with the minimum col count, which has the
1801 *** greatest total row count of incident rows among all cols with
1802 *** the same (minimum) col count. Select it.
1803 **/
1804 return(col);
1805 }
1806
1807 static void tspk1_reorder(struct creorder_vars *vars)
1808 /**
1809 *** Transpose the picture and explanation that follows:
1810 *** Reorders the assigned matrix vars->mtx within the specified bounding
1811 *** block region vars->reg. The region is split into 6 subregions during
1812 *** reordering: the rows are divided in two, and the columns divided in
1813 *** three. Initially everything is in the active subregion. Ultimately,
1814 *** everything will be assigned.
1815 ***
1816 *** <-- assigned -->|<-- active-->|<-- on stack -->|
1817 *** ----+----------------+-------------+----------------+
1818 *** a | | | |
1819 *** s | | | |
1820 *** s | | | |
1821 *** i | (SQUARE) | | |
1822 *** g | | | |
1823 *** n | | | |
1824 *** e | | | |
1825 *** d | | | |
1826 *** ----+----------------+-------------+----------------+
1827 *** a | | | |
1828 *** c | | ACTIVE | |
1829 *** t | | REGION | |
1830 *** i | | | |
1831 *** v | | | |
1832 *** e | | | |
1833 *** ----+----------------+-------------+----------------+
1834 ***
1835 *** The algorithm is roughly as follows:
1836 *** (1) select a row (by some criterion).
1837 *** (2) push columns incident on that row onto the stack in decreasing
1838 *** order of their length.
1839 *** (3) pop first column off the stack and assign it to the selected
1840 *** row.
1841 *** (4) forward-triangularize (assign singleton rows with their one
1842 *** and only incident column, until no longer possible).
1843 ***
1844 *** (1)-(4) should be repeated until the active subregion becomes empty.
1845 ***
1846 *** Everything above was written as though the entire matrix is
1847 *** involved. In reality, only the relevant square region is involved.
1848 **/
1849 {
1850 int32 col, size;
1851 int32 *colcount_array_origin;
1852 mtx_matrix_t mtx;
1853
1854 size = vars->reg.col.high - vars->reg.col.low + 1;
1855 size = MAX(size,vars->reg.row.high - vars->reg.row.low + 1);
1856 vars->tlist = size > 0 ? (struct creorder_list *)
1857 ascmalloc( size*sizeof(struct creorder_list) ) : NULL;
1858 colcount_array_origin = size > 0 ? (int32 *)
1859 ascmalloc( size*sizeof(int32) ) : NULL;
1860 vars->colcount =
1861 NOTNULL(colcount_array_origin) ?
1862 colcount_array_origin - vars->reg.col.low : NULL;
1863 mtx = vars->mtx;
1864
1865 vars->rowhigh = vars->reg.row.high;
1866 /* Establish col counts */
1867 for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1868 vars->colcount[col] =
1869 mtx_nonzeros_in_col(mtx,col,&(vars->reg.row));
1870
1871 while(TRUE) {
1872 struct creorder_list *head;
1873 int32 nelts; /* # elements "allocated" from vars->tlist */
1874 mtx_coord_t nz;
1875 real64 value;
1876
1877 cforward_triangularize(vars);
1878 assign_null_cols(vars);
1879 if( vars->reg.col.low > vars->reg.col.high ||
1880 vars->reg.row.low > vars->reg.row.high ) {
1881 /* Active region is now empty, done */
1882 if( NOTNULL(vars->tlist) )
1883 ascfree( vars->tlist );
1884 if( NOTNULL(colcount_array_origin) )
1885 ascfree( colcount_array_origin );
1886 return;
1887 }
1888
1889 head = NULL;
1890 nelts = 0;
1891 nz.row = mtx_FIRST;
1892 nz.col = select_col(vars);
1893 while( value = mtx_next_in_col(mtx,&nz,&(vars->reg.row)),
1894 nz.row != mtx_LAST ) {
1895 struct creorder_list **q,*p;
1896
1897 p = &(vars->tlist[nelts++]);
1898 p->ndx = mtx_row_to_org(mtx,nz.row);
1899 p->count = mtx_nonzeros_in_row(mtx,nz.row,&(vars->reg.col));
1900 for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) );
1901 p->next = *q;
1902 *q = p;
1903 }
1904 /**
1905 *** We now have a list of columns which intersect the selected row.
1906 *** The list is in order of decreasing column count.
1907 **/
1908
1909 /* Push incident rows on stack */
1910 for( ; NOTNULL(head) ; head = head->next )
1911 push_row_on_stack(vars,mtx_org_to_row(mtx,head->ndx));
1912
1913 /* Pop row off stack and assign to selected col */
1914 assign_col_and_row(vars , nz.col , pop_row_from_stack(vars));
1915 }
1916 /* Not reached. */
1917 }
1918
1919 /*********************************
1920 end of tspk1 stuff
1921 *********************************/
1922
1923 static boolean nonempty_row(mtx_matrix_t mtx,int32 row)
1924 /**
1925 *** ? row not empty in mtx.
1926 **/
1927 {
1928 mtx_coord_t nz;
1929 real64 value;
1930 value = mtx_next_in_row(mtx, mtx_coord(&nz,row,mtx_FIRST), mtx_ALL_COLS);
1931 return( nz.col != mtx_LAST );
1932 }
1933
1934 static boolean nonempty_col(mtx_matrix_t mtx,int32 col)
1935 /**
1936 *** ? column not empty in mtx.
1937 **/
1938 {
1939 mtx_coord_t nz;
1940 real64 value;
1941 value = mtx_next_in_col(mtx, mtx_coord(&nz,mtx_FIRST,col), mtx_ALL_ROWS);
1942 return( nz.row != mtx_LAST );
1943 }
1944
1945 static void determine_pivot_range(linsolqr_system_t sys)
1946 /**
1947 *** Calculates sys->rng from sys->coef.
1948 **/
1949 {
1950 sys->reg.row.low = sys->reg.row.high = -1;
1951 sys->reg.col.low = sys->reg.col.high = -1;
1952
1953 for( sys->rng.high=mtx_order(sys->coef) ; --(sys->rng.high) >= 0 ; ) {
1954 if( nonempty_row(sys->coef,sys->rng.high) &&
1955 sys->reg.row.high < 0 )
1956 sys->reg.row.high = sys->rng.high;
1957 if( nonempty_col(sys->coef,sys->rng.high) &&
1958 sys->reg.col.high < 0 )
1959 sys->reg.col.high = sys->rng.high;
1960 if( nonempty_row(sys->coef,sys->rng.high) &&
1961 nonempty_col(sys->coef,sys->rng.high) )
1962 break;
1963 }
1964
1965 for( sys->rng.low=0 ; sys->rng.low <= sys->rng.high ; ++(sys->rng.low) ) {
1966 if( nonempty_row(sys->coef,sys->rng.low) &&
1967 sys->reg.row.low < 0 )
1968 sys->reg.row.low = sys->rng.low;
1969 if( nonempty_col(sys->coef,sys->rng.low) &&
1970 sys->reg.col.low < 0 )
1971 sys->reg.col.low = sys->rng.low;
1972 if( nonempty_row(sys->coef,sys->rng.low) &&
1973 nonempty_col(sys->coef,sys->rng.low) )
1974 break;
1975 }
1976 }
1977
1978 static void square_region(linsolqr_system_t sys,mtx_region_t *region)
1979 /**
1980 *** Get the largest square confined to the diagonal within the region given
1981 *** and set sys->rng accordingly.
1982 **/
1983 {
1984 sys->reg = *region;
1985 sys->rng.low = MAX(region->row.low,region->col.low);
1986 sys->rng.high = MIN(region->row.high,region->col.high);
1987 }
1988
1989 static int ranki_reorder(linsolqr_system_t sys,mtx_region_t *region)
1990 /**
1991 *** The region to reorder is first isolated by truncating the region
1992 *** provided to the largest square region confined to the matrix diagonal.
1993 *** It is presumed it will contain no empty rows or columns and will
1994 *** provide the basis of candidate pivots when factoring.
1995 **/
1996 {
1997 struct reorder_vars vars;
1998 CHECK_SYSTEM(sys);
1999 if (sys->fclass != ranki) {
2000 FPRINTF(stderr,"linsolqr_reorder: spk1 reorder called on system\n");
2001 FPRINTF(stderr," with inappropriate factor class\n");
2002 return 1;
2003 }
2004 if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2005 else square_region(sys,region);
2006
2007 vars.mtx = sys->coef;
2008 vars.reg.row.low = vars.reg.col.low = sys->rng.low;
2009 vars.reg.row.high = vars.reg.col.high = sys->rng.high;
2010 spk1_reorder(&vars);
2011 return 0;
2012 }
2013
2014 static int tranki_reorder(linsolqr_system_t sys,mtx_region_t *region)
2015 /**
2016 *** The region to reorder is first isolated by truncating the region
2017 *** provided to the largest square region confined to the matrix diagonal.
2018 *** It is presumed it will contain no empty rows or columns and will
2019 *** provide the basis of candidate pivots when factoring.
2020 **/
2021 {
2022 struct creorder_vars vars;
2023 CHECK_SYSTEM(sys);
2024 if ( !(sys->fclass==ranki || sys->fclass==s_qr) ) {
2025 FPRINTF(stderr,"linsolqr_reorder: tspk1 reorder called on system\n");
2026 FPRINTF(stderr," with inappropriate factor method\n");
2027 return 1;
2028 }
2029 if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2030 else square_region(sys,region);
2031
2032 vars.mtx = sys->coef;
2033 vars.reg.row.low = vars.reg.col.low = sys->rng.low;
2034 vars.reg.row.high = vars.reg.col.high = sys->rng.high;
2035 tspk1_reorder(&vars);
2036 return 0;
2037 }
2038
2039 /***************************************************************************\
2040 End of reordering functions for SPK1.
2041 \***************************************************************************/
2042
2043 /***************************************************************************\
2044 RANKI implementation functions.
2045 \***************************************************************************/
2046
2047 static void eliminate_row(mtx_matrix_t mtx,
2048 mtx_range_t *rng,
2049 int32 row, /* row to eliminate */
2050 real64 *tmp, /* temporary array */
2051 real64 *pivots) /* prior pivots array */
2052 /**
2053 *** Eliminates the given row to the left of the diagonal element, assuming
2054 *** valid pivots for all of the diagonal elements above it (the elements
2055 *** above those diagonal elements, if any exist, are assumed to be U
2056 *** elements and therefore ignored). The temporary array is used by this
2057 *** function to do its job. tmp[k], where rng->low <= k <= rng->high must
2058 *** be defined (allocated) but need not be initialized.
2059 *** pivots[k] where rng->low <= k <=rng->high must be allocated and
2060 *** pivots[rng->low] to pivots[row-1] must contain the previous pivots.
2061 **/
2062 {
2063 mtx_coord_t nz;
2064 mtx_range_t left_to_eliminate,high_cols,tmp_cols;
2065 boolean do_series=FALSE;
2066
2067 high_cols.low = row;
2068 high_cols.high = rng->high;
2069 left_to_eliminate.low = rng->low;
2070 left_to_eliminate.high = row - 1;
2071
2072 /* Move non-zeros left of pivot from matrix to full array */
2073 mtx_zero_real64(tmp+rng->low,(row-rng->low));
2074 nz.row = row;
2075 mtx_cur_row_vec(mtx,row,tmp,&left_to_eliminate);
2076 mtx_clear_row(mtx,row,&left_to_eliminate);
2077
2078 /* eliminates nzs from pivot, one by one, filling tmp with multipliers */
2079 do_series= ( (row-1) >= (rng->low) );
2080 if (do_series) {
2081 mtx_add_row_series_init(mtx,row,FALSE);
2082 /* do nothing between here and the END call to _init which removes
2083 elements from row */
2084 }
2085 for( nz.row = row-1 ; nz.row >= rng->low ; --(nz.row) ) {
2086 if( tmp[nz.row] == D_ZERO )
2087 continue; /* Nothing to do for this row */
2088
2089 nz.col = nz.row;
2090 tmp[nz.row] /= pivots[nz.row];
2091 /* tmp[nz.row] now equals multiplier */
2092
2093 /* Perform "mtx_add_row" for full array part of the row */
2094 left_to_eliminate.high = nz.row - 1;
2095 mtx_cur_vec_add_row(mtx,tmp,nz.row,-tmp[nz.row],
2096 &left_to_eliminate,FALSE);
2097
2098 /* Perform "mtx_add_row" on remaining part of row */
2099 mtx_add_row_series(nz.row,-tmp[nz.row],&high_cols);
2100 }
2101 if (do_series) {
2102 mtx_add_row_series_init(mtx,mtx_NONE,TRUE);
2103 }
2104
2105 mtx_fill_cur_row_vec(mtx,row,tmp,mtx_range(&tmp_cols,rng->low,row-1));
2106
2107 }
2108
2109
2110 /* not in use it seems */
2111 #if BUILD_DEAD_CODE /* yes, baa */
2112 static int32 top_of_spike(linsolqr_system_t sys,int32 col)
2113 /**
2114 *** Determines the top row (row of lowest index) in a possible spike
2115 *** above the diagonal element in the given column. If there is no spike,
2116 *** then (row = ) col is returned.
2117 **/
2118 {
2119 mtx_range_t above_diagonal;
2120 mtx_coord_t nz;
2121 real64 value;
2122 int32 top_row;
2123
2124 above_diagonal.low = sys->rng.low;
2125 above_diagonal.high = col-1;
2126 top_row = nz.col = col;
2127 nz.row = mtx_FIRST;
2128 while( value = mtx_next_in_col(sys->factors,&nz,&above_diagonal),
2129 nz.row != mtx_LAST )
2130 if( nz.row < top_row )
2131 top_row = nz.row;
2132 return( top_row );
2133 }
2134 #endif /* BUILD_DEAD_CODE */
2135
2136 static boolean col_is_a_spike(linsolqr_system_t sys,int32 col)
2137 /**
2138 *** Determines if the col is a spike, characterized by having any
2139 *** nonzeros above the diagonal. By construction there shouldn't
2140 *** be any numeric 0 nonzeros above the diagonal, so we don't prune
2141 *** them out here.
2142 **/
2143 {
2144 mtx_range_t above_diagonal;
2145 mtx_coord_t nz;
2146
2147 above_diagonal.low = sys->rng.low;
2148 above_diagonal.high = col-1;
2149 nz.col = col;
2150 nz.row = mtx_FIRST;
2151 /* this loop is cheaper than it looks */
2152 while( (void)mtx_next_in_col(sys->factors,&nz,&above_diagonal),
2153 nz.row != mtx_LAST ) {
2154 if( nz.row < col ) return TRUE;
2155 }
2156 return( FALSE );
2157 }
2158
2159 /* see implementation notes before modifying this function! */
2160 static void number_drag(real64 *vec, int32 rfrom, int32 rto)
2161 {
2162 int32 i;
2163 real64 tmp;
2164 if (rto <rfrom) {
2165 tmp=vec[rfrom];
2166 for (i=rfrom; i> rto; i--) {
2167 vec[i]=vec[i-1]; /* rotate right */
2168 }
2169 vec[rto]=tmp;
2170 return;
2171 }
2172 if (rto > rfrom) {
2173 tmp=vec[rfrom];
2174 for (i=rfrom; i< rto; i++) {
2175 vec[i]=vec[i+1]; /* rotate left */
2176 }
2177 vec[rto]=tmp;
2178 }
2179 }
2180
2181 static void rankikw_factor(linsolqr_system_t sys)
2182 /**
2183 *** (the following description also applies to rankijz_factor which is
2184 *** different only in pivot selection strategy.)
2185 *** This is the heart of the linear equation solver. This function
2186 *** factorizes the matrix into a lower (L) and upper (U) triangular
2187 *** matrix. sys->smallest_pivot and sys->rank are calculated. The
2188 *** RANKI method is utilized. At the end of elimination, the matrix A
2189 *** is factored into A = U L, where U and L are stored as follows:
2190 ***
2191 *** <----- r ----> <- n-r ->
2192 *** +--------------+---------+
2193 *** | | |
2194 *** | U | |
2195 *** | | |
2196 *** | L | | r
2197 *** | | |
2198 *** +--------------+---------+
2199 *** | | |
2200 *** | | 0 | n-r
2201 *** | | |
2202 *** +--------------+---------+
2203 ***
2204 *** The rows and columns have been permuted so that all of the pivoted
2205 *** original rows and columns are found in the first r rows and columns
2206 *** of the region. The last n-r rows and columns are unpivoted. U has
2207 *** 1's on its diagonal, whereas L's diagonal is stored in the matrix.
2208 ***
2209 *** Everything above was written as though the entire matrix is
2210 *** involved. In reality, only the relevant square region is involved.
2211 **/
2212 {
2213 mtx_coord_t nz;
2214 int32 last_row;
2215 mtx_range_t pivot_candidates;
2216 real64 *tmp;
2217 real64 pivot, *pivots;
2218 int32 length;
2219 mtx_matrix_t mtx;
2220
2221 length = sys->rng.high - sys->rng.low + 1;
2222 tmp = sys->ludata->tmp;
2223 /* eliminate row takes care of zeroing the relevant region and won't
2224 change values outside of it. */
2225 pivots=sys->ludata->pivlist;
2226 mtx=sys->factors;
2227
2228 sys->smallest_pivot = MAXDOUBLE;
2229 last_row = pivot_candidates.high = sys->rng.high;
2230 for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
2231
2232 pivot_candidates.low = nz.col = nz.row;
2233 pivots[nz.row]=pivot = mtx_value(mtx,&nz);
2234 pivot = fabs(pivot);
2235 if( pivot > sys->pivot_zero &&
2236 pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) &&
2237 !col_is_a_spike(sys,nz.row) ) {
2238 /* Good pivot and not a spike: continue with next row */
2239 if( pivot < sys->smallest_pivot )
2240 sys->smallest_pivot = pivot;
2241 ++(nz.row);
2242 continue;
2243 }
2244 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2245 /**
2246 *** Row is a spike row or will
2247 *** be when a necessary column
2248 *** exchange occurs.
2249 **/
2250 eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots);
2251 /* pivot will be largest of those available. get size and value */
2252 pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row]));
2253 if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */
2254 /* Dependent row, drag to the end */
2255 mtx_drag(mtx,nz.row,last_row);
2256 number_drag(pivots,nz.row,last_row);
2257 --last_row;
2258 #undef KAA_DEBUG
2259 #ifdef KAA_DEBUG
2260 FPRINTF(stderr,"Warning: Row %d is dependent with pivot %20.8g\n",
2261 nz.row,pivot);
2262 #endif /* KAA_DEBUG */
2263 } else {
2264 /* Independent row: nz contains best pivot */
2265 /* Move pivot to diagonal */
2266 mtx_swap_cols(mtx,nz.row,nz.col);
2267 mtx_drag( mtx , nz.row , sys->rng.low );
2268 number_drag(pivots,nz.row,sys->rng.low);
2269 if( pivot < sys->smallest_pivot )
2270 sys->smallest_pivot = pivot;
2271 ++(nz.row);
2272 }
2273 }
2274
2275 sys->rank = last_row - sys->rng.low + 1;
2276 }
2277
2278
2279 static void rankijz_factor(linsolqr_system_t sys)
2280 /**
2281 *** This is an alternate pivoting strategy introduced by Joe Zaher.
2282 *** it looks down and across for good pivots rather than just across.
2283 ***
2284 **/
2285 {
2286 real64 biggest;
2287 mtx_coord_t nz, best;
2288 mtx_region_t candidates;
2289 real64 *tmp;
2290 real64 pivot, *pivots;
2291 int32 length;
2292 mtx_matrix_t mtx;
2293
2294 length = sys->rng.high - sys->rng.low + 1;
2295 tmp = sys->ludata->tmp;
2296 /* eliminate row takes care of zeroing the relevant region and won't
2297 change values outside of it. */
2298 pivots=sys->ludata->pivlist;
2299 mtx=sys->factors;
2300
2301 sys->smallest_pivot = MAXDOUBLE;
2302 candidates.row.high = sys->rng.high;
2303 candidates.col.high = sys->rng.high;
2304 for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) {
2305 nz.col = nz.row;
2306 pivots[nz.row] = pivot = mtx_value(mtx,&nz);
2307 pivot = fabs(pivot);
2308 candidates.row.low = nz.row;
2309 candidates.col.low = nz.row;
2310 if( !col_is_a_spike(sys,nz.row) ) {
2311 best.col = nz.row;
2312 mtx_col_max(mtx,&best,&(candidates.row),&biggest);
2313 if( fabs(biggest) >= sys->pivot_zero ) {
2314 if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) {
2315 mtx_swap_rows(mtx,nz.row,best.row);
2316 pivots[nz.row] = biggest;
2317 pivot = fabs(biggest);
2318 }
2319 if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot;
2320 ++(nz.row);
2321 continue;
2322 }
2323 }
2324 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2325 /**
2326 *** Row is a spike row or will
2327 *** be when a necessary column
2328 *** exchange occurs.
2329 **/
2330 eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots);
2331 /* pivot will be largest of those available. get size and value */
2332 pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row]));
2333 /* Move pivot to diagonal */
2334 if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2335 /* Dependent row, drag nz to lower right */
2336 mtx_drag(mtx,nz.row,candidates.row.high);
2337 number_drag(pivots,nz.row,candidates.row.high);
2338 --(candidates.row.high);
2339 } else {
2340 /* Independent row, drag nz to upper left */
2341 mtx_swap_cols(mtx,nz.row,nz.col);
2342 mtx_drag( mtx , nz.row , sys->rng.low );
2343 number_drag(pivots,nz.row,sys->rng.low);
2344 if( pivot < sys->smallest_pivot )
2345 sys->smallest_pivot = pivot;
2346 ++(nz.row);
2347 }
2348 }
2349
2350 sys->rank = candidates.row.high - sys->rng.low + 1;
2351 }
2352
2353
2354 #define KAA_DEBUG 0
2355 #if KAA_DEBUG
2356 int ranki_entry(linsolqr_system_t sys,mtx_region_t *region)
2357 #else
2358 static int ranki_entry(linsolqr_system_t sys,mtx_region_t *region)
2359 #endif
2360 /**
2361 *** The region to factor is first isolated by truncating the region
2362 *** provided to the largest square region confined to the matrix diagonal.
2363 *** It is presumed it will contain no empty rows or columns and that it has
2364 *** been previously reordered using linsolqr_reorder(sys,region,spk1)
2365 *** or a sane variant of spk1.
2366 *** This is the entry point for all ranki based strategies, regardless of
2367 *** pivot selection subtleties.
2368 **/
2369 {
2370 struct rhs_list *rl;
2371 double time;
2372
2373 CHECK_SYSTEM(sys);
2374 if( sys->factored )
2375 return 0;
2376 switch(sys->fmethod) {
2377 case ranki_kw:
2378 case ranki_jz:
2379 case ranki_ka: /* add new methods here */
2380 break;
2381 default:
2382 return 1;
2383 }
2384
2385 if(ISNULL(sys->ludata))
2386 return 1;
2387
2388 if( NOTNULL(sys->inverse) )
2389 mtx_destroy(sys->inverse);
2390 sys->inverse=NULL;
2391 if( NOTNULL(sys->factors) )
2392 mtx_destroy(sys->factors);
2393 if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2394 else square_region(sys,region);
2395
2396 sys->factors = mtx_copy_region(sys->coef,region);
2397 sys->rank = -1;
2398 sys->smallest_pivot = MAXDOUBLE;
2399 for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
2400 rl->solved = FALSE;
2401 insure_capacity(sys);
2402 insure_lu_capacity(sys);
2403
2404 time = tm_cpu_time();
2405 switch(sys->fmethod) {
2406 case ranki_ka:
2407 case ranki_kw:
2408 rankikw_factor(sys);
2409 break;
2410 case ranki_jz:
2411 default:
2412 rankijz_factor(sys);
2413 }
2414 /* no longer done automatically as noone usually cares.
2415 calc_dependent_rows_ranki1(sys);
2416 calc_dependent_cols_ranki1(sys);
2417 */
2418 sys->factored = TRUE;
2419
2420 #undef KAA_DEBUG
2421 #if KAA_DEBUG
2422 time = tm_cpu_time() - time;
2423 FPRINTF(stderr,"Time for Inversion = %f\n",time);
2424 FPRINTF(stderr,"Non-zeros in Inverse = %d\n",
2425 mtx_nonzeros_in_region(sys->factors,region));
2426 #endif
2427 return 0;
2428 }
2429
2430 static int ranki_solve(linsolqr_system_t sys, struct rhs_list *rl)
2431 {
2432 backward_substitute(sys,rl->varvalue,rl->transpose);
2433 forward_substitute(sys,rl->varvalue,rl->transpose);
2434 zero_unpivoted_vars(sys,rl->varvalue,rl->transpose);
2435 return 0;
2436 }
2437
2438
2439 /*
2440 *********************************************************************
2441 * Start of the 2 bodied factorization schemes.
2442 *
2443 * ranki2_entry will be used to access these routines.
2444 * there is now code called:
2445 * rankikw2_factor
2446 * rankijz2_factor
2447 * eliminate_row2
2448 * ranki2_entry
2449 * forward_substitute2
2450 * backward_substitute2
2451 * ranki2_solve
2452 *
2453 * We have fixed the calc_col_dependent and calc_row_dependent
2454 * routines.
2455 *********************************************************************
2456 */
2457
2458 extern int32 mtx_number_ops;
2459
2460 /* this function does no permutation of any sort */
2461 static
2462 void eliminate_row2(mtx_matrix_t mtx,
2463 mtx_matrix_t upper_mtx,
2464 mtx_range_t *rng,
2465 int32 row, /* row to eliminate */
2466 real64 *tmp, /* temporary array */
2467 real64 *pivots, /* prior pivots array */
2468 real64 dtol /* drop tolerance */
2469 )
2470 {
2471 mtx_coord_t nz;
2472 int j,low,high;
2473 double tmpval;
2474
2475 /*
2476 * Move all non-zeros from current row to full array.
2477 * The full array would have been initialized before,
2478 * we must put it back in the clean state when we leave.
2479 * All operations are done over mtx_ALL_COLS.
2480 *
2481 * Note: because of an addrow over mtx_ALL_COLS, entries
2482 * of tmp outside rng may have nonzeros put in them if
2483 * sys->factors has nonzeros in the outlying columns.
2484 * If enough of these pile up out there during elimination
2485 * of a block and the compiler treats double overflow as
2486 * a signalable error, we will have a floating point
2487 * exception.
2488 * Currently sys->factors is a copy of a region from
2489 * the sys->coef matrix, so we should not have anything
2490 * out there to pile up. If we make a variant which
2491 * does not copy the coef matrix but uses it directly,
2492 * this code needs to be revisited.
2493 */
2494 mtx_steal_cur_row_vec(mtx,row,tmp,mtx_ALL_COLS);
2495
2496 /*
2497 * Eliminates nzs from pivot, one by one, filling tmp with multipliers
2498 * We now operate on the entire row, since we are not writing the
2499 * multipliers back to the matrix.
2500 */
2501
2502 low = rng->low;
2503 high = rng->high;
2504 for (j = row-1 ; j >= low ; --(j) ) {
2505 if (tmp[j] == D_ZERO)
2506 continue; /* Nothing to do for this row */
2507 tmpval = tmp[j]/pivots[j]; /* Compute multiplier */
2508
2509 /*
2510 * tmpval is now the multiplier. We use it to eliminate the row,
2511 * but backpatch it to tmp, *after* we do the elimination, as
2512 * mtx_cur_vec_add_row over all columns will stomp on tmp[j]
2513 */
2514 mtx_cur_vec_add_row(mtx,tmp,j,-tmpval,mtx_ALL_COLS,FALSE);
2515 tmp[j] = tmpval; /* patch the diagonal */
2516 }
2517
2518 /*
2519 * Fill up the upper triangular matrix.
2520 * refill the range [low .. row-1]. Remember that we have
2521 * to zero all nnz's in tmp.
2522 */
2523 nz.row = row;
2524 for (j=low;j<row;j++) {
2525 if (tmp[j] != D_ZERO) {
2526 nz.col = j;
2527 mtx_fill_value(upper_mtx,&nz,tmp[j]);
2528 #if RBADEBUG
2529 FPRINTF(gscr,"fillingU: or %d oc %d (cc %d) %24.18g\n",
2530 mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), nz.col, tmp[j]);
2531 #endif
2532 tmp[j] = D_ZERO;
2533 }
2534 }
2535 /*
2536 * refill the range [row .. high]. We *wont* keep
2537 * the diagonal as this is sorted out by the pivot
2538 * array, but here we must keep it, sigh.
2539 * At this stage we don't know that the diagonal is the
2540 * pivot, as that is determined after elimination done.
2541 * Outer loop must delete the ultimate pivot element.
2542 * Odds are good, however, that it is the current diagonal,
2543 * so put in the diagonal last.
2544 */
2545 low = row;
2546 for (j=high;j>=low;j--) {
2547 if (tmp[j] != D_ZERO) {
2548 if (fabs(tmp[j]) > dtol) {
2549 nz.col = j;
2550 mtx_fill_value(mtx,&nz,tmp[j]);
2551 #if RBADEBUG
2552 FPRINTF(gscr,"fillingL: or %d oc %d %24.18g\n",
2553 mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), tmp[j]);
2554 #endif
2555 }
2556 tmp[j] = D_ZERO; /* zero element regardless */
2557 }
2558 }
2559 tmp[row] = D_ZERO;
2560 }
2561
2562 /*
2563 * This function is the same as rankikw_factor except
2564 * that it uses 2 matrices; one to store L and one for U.
2565 * As such it uses eliminate_row2 rather than eliminate_row.
2566 * Note there has been a change in the way ptol is used,
2567 * now selecting nearest passing, rather than taking max.
2568 * We assume there is NO incidence outside the region to be factored
2569 * in sys->factors.
2570 */
2571 static void rankikw2_factor(linsolqr_system_t sys)
2572 {
2573 mtx_coord_t nz;
2574 int32 last_row, defect;
2575 mtx_range_t pivot_candidates;
2576 real64 *tmp;
2577 real64 pivot, *pivots;
2578 mtx_matrix_t mtx, upper_mtx;
2579
2580
2581 /**
2582 ** tmp and pivots are cur indexed, tmp by cols, pivots by rows.
2583 ** rather than allocating tmp every time, which can get expensive,
2584 ** we allocate it once *elsewhere* and zero it all here.
2585 ** Because we don't know the state of it on entry (due to exceptions,
2586 ** etc, the rezeroing is unavoidable.
2587 ** Eliminate_row2 is responsible for returning the working region
2588 ** zeroed.
2589 **/
2590
2591 tmp = sys->ludata->tmp;
2592 mtx_zero_real64(tmp,sys->capacity);
2593 pivots = sys->ludata->pivlist;
2594 mtx = sys->factors;
2595 upper_mtx = sys->inverse;
2596 defect = 0;
2597 #if RBADEBUG
2598 gscr = fopen("/usr1/ballan/tmp/lu/rkw","w+");
2599 #endif
2600
2601 sys->smallest_pivot = MAXDOUBLE;
2602 last_row = pivot_candidates.high = sys->rng.high;
2603 for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
2604
2605 #if (RBADEBUG && 0)
2606 /* remove the && 0 and you better have a lot for free
2607 * disk space
2608 */
2609 mtx_write_region_human_orgrows(gscr,mtx,mtx_ENTIRE_MATRIX);
2610 #endif
2611
2612 pivot_candidates.low = nz.col = nz.row;
2613 pivots[nz.row]=pivot = mtx_value(mtx,&nz);
2614 pivot = fabs(pivot);
2615 if( pivot > sys->pivot_zero &&
2616 pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) &&
2617 !col_is_a_spike(sys,nz.row) ) {
2618 /* Good pivot and not a spike: continue with next row */
2619 if( pivot < sys->smallest_pivot ) {
2620 sys->smallest_pivot = pivot;
2621 }
2622 #if RBADEBUG
2623 FPRINTF(gscr,"Cheap pivot col %d (org %d)\n",
2624 nz.row, mtx_col_to_org(mtx,nz.row));
2625 if (nz.row ==179) {
2626 FPRINTF(stderr,"ben, stop here\n");
2627 }
2628 #endif
2629 ++(nz.row);
2630 continue;
2631 }
2632 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2633 /**
2634 *** Row is a spike row or will
2635 *** be when a necessary column
2636 *** exchange occurs.
2637 **/
2638 eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol);
2639 /* pivot will be leftmost of those that pass ptol and eps, or 0.0. */
2640 if (defect) {
2641 /* unfortunately, can't use mtx_ALL_COLS in search now because we must
2642 ignore columns that were dragged out with previous singular rows. */
2643 pivot = mtx_get_pivot_col(mtx,&nz,&pivot_candidates,&(pivots[nz.row]),
2644 sys->ptol,sys->pivot_zero);
2645 } else {
2646 /* no singular cols, and eliminate moved all left of diag crap out, so */
2647 pivot = mtx_get_pivot_col(mtx,&nz,mtx_ALL_COLS,&(pivots[nz.row]),
2648 sys->ptol,sys->pivot_zero);
2649 }
2650 #if RBADEBUG
2651 FPRINTF(gscr,"Pivot column found %d (org %d)\n",
2652 nz.col, mtx_col_to_org(mtx,nz.col));
2653 #endif
2654 if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2655 /*
2656 * Dependent row, drag to the end. The upper_mtx is a slave
2657 * of mtx, and will be dragged automatically.
2658 */
2659 mtx_drag(mtx, nz.row, last_row);
2660 number_drag(pivots, nz.row, last_row);
2661 --last_row;
2662 defect = 1;
2663 } else {
2664 /* Independent row: nz contains selected pivot */
2665 mtx_swap_cols(mtx, nz.row, nz.col); /* this Fixes U as well */
2666 /* Move pivot to diagonal */
2667 mtx_drag(mtx, nz.row, sys->rng.low ); /* this Fix U as well */
2668 number_drag(pivots, nz.row, sys->rng.low);
2669 if( pivot < sys->smallest_pivot )
2670 sys->smallest_pivot = pivot;
2671 ++(nz.row);
2672 }
2673 }
2674 #if RBADEBUG
2675 /* get rid of this asap*/
2676 {
2677 FILE *fp;
2678 int32 cc;
2679
2680 fp = fopen("/usr1/ballan/tmp/lu/kw2p","w+");
2681 FPRINTF(fp,"kw2 final pivot sequence:\n");
2682 for (cc= sys->rng.low; cc <= last_row; cc++) {
2683 FPRINTF(fp,"orgrow,orgcol = %d,%d,%24.18g\n",
2684 mtx_row_to_org(mtx,cc),
2685 mtx_col_to_org(mtx,cc), pivots[cc]);
2686 }
2687 fclose(fp);
2688 }
2689
2690 #endif
2691
2692 zero_diagonal_elements(sys->factors,sys->rng.low,sys->rng.high);
2693 sys->rank = last_row - sys->rng.low + 1;
2694
2695 #if RBADEBUG
2696 {
2697 FILE *fp;
2698 fp = fopen("/usr1/ballan/tmp/lu/kw2m","w+");
2699 mtx_write_region_human_rows(fp,mtx,mtx_ENTIRE_MATRIX);
2700 fclose(fp);
2701 fp = fopen("/usr1/ballan/tmp/lu/kw2um","w+");
2702 mtx_write_region_human_rows(fp,upper_mtx,mtx_ENTIRE_MATRIX);
2703 fclose(fp);
2704 }
2705 #endif
2706 #if RBADEBUG
2707 fclose(gscr);
2708 #endif
2709 }
2710
2711 /*
2712 * This function is the same as rankijz_factor except
2713 * that it uses 2 matrices; one to store L and one for U.
2714 * As such it uses eliminate_row2 rather than eliminate_row.
2715 *
2716 * We are now using the slave matrix feature of the code, and
2717 * thus we all drag/swaps automatically affect the slave (upper_mtx)
2718 * as well.
2719 */
2720 static void rankijz2_factor(linsolqr_system_t sys)
2721 {
2722 real64 biggest;
2723 mtx_coord_t nz, best;
2724 mtx_region_t candidates;
2725 real64 *tmp;
2726 real64 pivot, *pivots;
2727 mtx_matrix_t mtx, upper_mtx;
2728 /**
2729 ** tmp and pivots are cur indexed, tmp by cols, pivots by rows.
2730 ** rather than allocating tmp every time, which can get expensive,
2731 ** we allocate it once elsewhere and zero it all here.
2732 ** Because we don't know the state of it on entry (due to exceptions,
2733 ** etc), the rezeroing is unavoidable.
2734 ** Eliminate_row2 is responsible for returning the working region
2735 ** zeroed.
2736 **/
2737 tmp = sys->ludata->tmp;
2738 mtx_zero_real64(tmp,sys->capacity);
2739 pivots=sys->ludata->pivlist;
2740 mtx = sys->factors;
2741 upper_mtx = sys->inverse;
2742
2743 sys->smallest_pivot = MAXDOUBLE;
2744 candidates.row.high = sys->rng.high;
2745 candidates.col.high = sys->rng.high;
2746 for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) {
2747 nz.col = nz.row;
2748 pivots[nz.row] = pivot = mtx_value(mtx,&nz);
2749 pivot = fabs(pivot);
2750 candidates.row.low = nz.row;
2751 candidates.col.low = nz.row;
2752 if( !col_is_a_spike(sys,nz.row) ) {
2753 best.col = nz.row;
2754 mtx_col_max(mtx,&best,&(candidates.row),&biggest);
2755 /* make sure we aren't missing someone really nice in the column */
2756 if( fabs(biggest) >= sys->pivot_zero ) {
2757 if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) {
2758 mtx_swap_rows(mtx,nz.row,best.row); /* Fixes U as well */
2759 pivots[nz.row] = biggest;
2760 pivot = fabs(biggest);
2761 }
2762 if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot;
2763 ++(nz.row);
2764 continue;
2765 }
2766 }
2767 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2768 /**
2769 *** Row is a spike row or will
2770 *** be when a necessary column
2771 *** exchange occurs.
2772 **/
2773 eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol);
2774 /* pivot will be largest of those available. get size and value */
2775 pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row]));
2776 /* Move pivot to diagonal */
2777 if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2778 /* Dependent row, drag nz to lower right */
2779 mtx_drag(mtx,nz.row,candidates.row.high);
2780 number_drag(pivots,nz.row,candidates.row.high);
2781 --(candidates.row.high);
2782 } else {
2783 /* Independent row, drag nz to upper left */
2784 mtx_swap_cols(mtx,nz.row,nz.col); /* this Fixes U as well */
2785 mtx_drag(mtx ,nz.row ,sys->rng.low);
2786
2787 number_drag(pivots,nz.row,sys->rng.low);
2788 if( pivot < sys->smallest_pivot )
2789 sys->smallest_pivot = pivot;
2790 ++(nz.row);
2791 }
2792 }
2793 zero_diagonal_elements(sys->factors,sys->rng.low,sys->rng.high);
2794 sys->rank = candidates.row.high - sys->rng.low + 1;
2795 }
2796
2797 /*
2798 * See the comments attached to forward_substitute.
2799 * This code otherwise is the same. It uses a 2 bodied
2800 * matrix as well as making use of mtx_ALL_COLS and
2801 * mtx_ALL_ROWS whereever possible.
2802 * We make the following additional assumptions here:
2803 * (if they do not hold, do NOT use this function)
2804 * - sys->inverse (U) has no incidence that is not multipliers
2805 * (and the diagonal of 1s is NOT in sys->inverse.)
2806 * As of 10/95, this is how ranki2 U is constructed.
2807 * - sys->factors (L) has no incidence on the upper triangle,
2808 * including the diagonal, or outside the factored region.
2809 * relaxation: incidence anywhere allowed if value = 0.0
2810 * since 0 doesn't contribute to a dot product
2811 * and the only thing we do with triangles is dot them.
2812 * - There may be singular rows and columns in the factorization,
2813 * but any additions coming from these rows/columns during
2814 * mtx_ALL_*O*S operations will not contribute to sums because the
2815 * user zeroed the arr entries corresponding to these before
2816 * calling this function.
2817 */
2818 static
2819 void forward_substitute2(linsolqr_system_t sys,
2820 real64 *arr,
2821 boolean transpose)
2822 {
2823 mtx_coord_t nz;
2824 real64 sum, *pivlist;
2825 mtx_matrix_t mtx;
2826 int32 dotlim;
2827 boolean nonzero_found=FALSE;
2828
2829 pivlist=sys->ludata->pivlist;
2830 dotlim = sys->rng.low+sys->rank;
2831 if (transpose) { /* arr is indexed by original column number */
2832 mtx=sys->inverse;
2833 for( nz.col=sys->rng.low; nz.col < dotlim; ++(nz.col) ) {
2834 register int32 org_col;
2835
2836 org_col = mtx_col_to_org(mtx,nz.col);
2837 if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
2838 if (nonzero_found) {
2839 sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,mtx_ALL_ROWS,TRUE);
2840 /* arr[org_col] = (arr[org_col] - sum) / D_ONE */;
2841 arr[org_col] -= sum;
2842 }
2843 }
2844 } else { /* arr is indexed by original row number */
2845 mtx=sys->factors;
2846 for( nz.row=sys->rng.low; nz.row < dotlim; ++(nz.row) ) {
2847 register int32 org_row;
2848
2849 org_row = mtx_row_to_org(mtx,nz.row);
2850 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
2851 if (nonzero_found) {
2852 sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,mtx_ALL_COLS,TRUE);
2853 /*
2854 nz.col = nz.row;
2855 arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz);
2856 */
2857 arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row];
2858 }
2859 }
2860 }
2861 }
2862
2863 /*
2864 * See the comments attached to backward_substitute and
2865 * forward_substitute2.
2866 * When solving for the transpose, then we are actually
2867 * running of the lower triangle, hence we use sys->factors.
2868 * Otherwise we use sys->inverse which stores U.
2869 */
2870 static
2871 void backward_substitute2(linsolqr_system_t sys,
2872 real64 *arr,
2873 boolean transpose)
2874 {
2875 mtx_coord_t nz;
2876 real64 sum, *pivlist;
2877 mtx_matrix_t mtx;
2878 int32 dotlim;
2879 boolean nonzero_found=FALSE; /* once TRUE, substitution must be done
2880 over remaining rows/cols */
2881
2882 dotlim=sys->rng.low;
2883 pivlist=sys->ludata->pivlist;
2884 if (transpose) { /* arr is indexed by original column number */
2885 mtx = sys->factors;
2886 for( nz.col = sys->rng.low+sys->rank-1; nz.col >= dotlim ; --(nz.col) ) {
2887 register int32 org_col;
2888
2889 org_col = mtx_col_to_org(mtx,nz.col);
2890 if (arr[org_col] != D_ZERO) nonzero_found=TRUE;
2891 if (nonzero_found) {
2892 sum = mtx_col_dot_full_org_vec(mtx,nz.col,arr,mtx_ALL_ROWS,TRUE);
2893 arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col];
2894 }
2895 }
2896 } else { /* arr is indexed by original row number */
2897 /* we are working from the bottom up */
2898 mtx = sys->inverse;
2899 for( nz.row = sys->rng.low+sys->rank-1; nz.row >= dotlim ; --(nz.row) ) {
2900 register int32 org_row;
2901
2902 org_row = mtx_row_to_org(mtx,nz.row);
2903 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
2904 if (nonzero_found) {
2905 sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,mtx_ALL_COLS,TRUE);
2906 arr[org_row] -= sum;
2907 }
2908 }
2909 }
2910 }
2911
2912 static void calc_dependent_rows_ranki2(linsolqr_system_t sys)
2913 {
2914 mtx_coord_t nz;
2915 real64 value;
2916 mtx_range_t colrange;
2917 mtx_range_t rowrange;
2918 real64 *lc;
2919 mtx_matrix_t mtx;
2920
2921 sys->rowdeps = TRUE;
2922 if( ( (sys->reg.row.low == sys->rng.low) &&
2923 ( sys->reg.row.high == sys->rng.low+sys->rank-1 )
2924 ) || sys->rank==0 )
2925 return;
2926
2927 lc = sys->ludata->tmp;
2928 colrange.low = sys->rng.low;
2929 colrange.high = colrange.low + sys->rank - 1;
2930 rowrange.low = sys->rng.high;
2931 rowrange.high = sys->rng.low+sys->rank;
2932 mtx=sys->factors;
2933
2934 nz.row = sys->reg.row.low;
2935 for( ; nz.row <= sys->reg.row.high; nz.row++ ) {
2936 if( nz.row == sys->rng.low ) {
2937 nz.row = rowrange.high-1;
2938 continue;
2939 }
2940 mtx_zero_real64(lc,(sys->capacity));
2941 /* must zero the whole thing to use the backward_substitute2 right */
2942 mtx_org_row_vec(mtx,nz.row,lc,&colrange);
2943 if( nz.row < rowrange.high || nz.row > rowrange.low )
2944 backward_substitute2(sys,lc,TRUE);
2945 forward_substitute2(sys,lc,TRUE);
2946 mtx_clear_row(mtx,nz.row,&colrange);
2947 for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) {
2948 value = lc[mtx_col_to_org(mtx,nz.col)];
2949 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
2950 }
2951 }
2952 }
2953
2954 static void calc_dependent_cols_ranki2(linsolqr_system_t sys)
2955 {
2956 mtx_coord_t nz;
2957 real64 value;
2958 mtx_range_t rowrange;
2959 mtx_range_t colrange;
2960 real64 *lc;
2961 mtx_matrix_t mtx;
2962
2963 sys->coldeps = TRUE;
2964 if( ( (sys->reg.col.low == sys->rng.low) &&
2965 ( sys->reg.col.high == sys->rng.low+sys->rank-1 )
2966 ) || sys->rank==0 )
2967 return;
2968
2969 lc = sys->ludata->tmp;
2970 rowrange.low = sys->rng.low;
2971 rowrange.high = rowrange.low + sys->rank - 1;
2972 colrange.high = sys->rng.low+sys->rank;
2973 colrange.low = sys->rng.high;
2974 mtx=sys->factors;
2975
2976 nz.col = sys->reg.col.low;
2977 for( ; nz.col <= sys->reg.col.high; nz.col++ ) {
2978 if( nz.col == sys->rng.low ) {
2979 nz.col = colrange.high-1;
2980 continue;
2981 }
2982 mtx_zero_real64(lc,sys->capacity);
2983 mtx_org_col_vec(mtx,nz.col,lc,&rowrange);
2984 if( nz.col < colrange.high || nz.col > colrange.low )
2985 backward_substitute2(sys,lc,FALSE);
2986 forward_substitute2(sys,lc,FALSE);
2987 mtx_clear_col(mtx,nz.col,&rowrange);
2988 for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) {
2989 value = lc[mtx_row_to_org(mtx,nz.row)];
2990 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
2991 }
2992 }
2993 }
2994
2995 static int ranki2_solve(linsolqr_system_t sys, struct rhs_list *rl)
2996 {
2997 /* zero any unsolved for vars first so they don't contaminate
2998 mtx_ALL_*O*S dot products.
2999 */
3000 zero_unpivoted_vars(sys,rl->varvalue,rl->transpose);
3001 backward_substitute2(sys,rl->varvalue,rl->transpose);
3002 forward_substitute2(sys,rl->varvalue,rl->transpose);
3003 return 0;
3004 }
3005
3006 /* include rankiba2 implementation. Note that this is VERY VERY bad style
3007 and needs to be fixed immediately by splitting up the linsolqr file.
3008 This is our fastest (far and away) ranki implementation -- drag free.
3009 */
3010 #if 1
3011 #include "rankiba2.c"
3012 #endif
3013
3014 #ifdef BUILD_KIRK_CODE
3015 /*
3016 * this forward declaration is necessary to keept the
3017 * system happy until we can organize this file
3018 */
3019
3020 static
3021 int kirk1_factor(linsolqr_system_t sys,
3022 mtx_region_t *A11,
3023 int kirk_method);
3024
3025 #endif /* BUILD_KIRK_CODE */
3026
3027 /*
3028 * This is the entry point for all the ranki2_* schemes
3029 * which make use of a 2 bodied matrix.
3030 * it does not respond to ranki_kw and ranki_jz as if they
3031 * were calls for ranki_kw2 and ranki_jz2. with the new
3032 * factorization classes , there's really no call to be
3033 * sneaking around the header functions.
3034 * ranki_ka goes here as well. check sys->fmethod to see whos
3035 * expected to be in use.
3036 */
3037 int ranki2_entry(linsolqr_system_t sys, mtx_region_t *region)
3038 {
3039 struct rhs_list *rl;
3040 double time;
3041
3042 CHECK_SYSTEM(sys);
3043 if (sys->factored) return 0;
3044 switch(sys->fmethod) {
3045 case ranki_ba2:
3046 case ranki_kw2:
3047 case ranki_jz2: /* add new methods here */
3048 #ifdef BUILD_KIRK_CODE
3049 case ranki_ka:
3050 #endif /* BUILD_KIRK_CODE */
3051 break;
3052 default:
3053 return 1;
3054 }
3055
3056 if (ISNULL(sys->ludata)) return 1;
3057 if (NOTNULL(sys->inverse)) mtx_destroy(sys->inverse);
3058 sys->inverse = NULL;
3059 if (NOTNULL(sys->factors)) mtx_destroy(sys->factors);
3060 if (region == mtx_ENTIRE_MATRIX) determine_pivot_range(sys);
3061 else square_region(sys,region);
3062
3063 sys->factors = mtx_copy_region(sys->coef,region);
3064 sys->inverse = mtx_create_slave(sys->factors);
3065 sys->rank = -1;
3066 sys->smallest_pivot = MAXDOUBLE;
3067 for (rl = sys->rl ; NOTNULL(rl) ; rl = rl->next)
3068 rl->solved = FALSE;
3069 insure_capacity(sys);
3070 insure_lu_capacity(sys);
3071
3072 time = tm_cpu_time();
3073 switch(sys->fmethod) {
3074 case ranki_ba2:
3075 rankiba2_factor(sys);
3076 break;
3077 case ranki_kw2:
3078 rankikw2_factor(sys);
3079 break;
3080 case ranki_jz2:
3081 rankijz2_factor(sys);
3082 break;
3083
3084 #ifdef BUILD_KIRK_CODE
3085 case ranki_ka:
3086 kirk1_factor(sys,region,2);
3087 break;
3088 #endif /* BUILD_KIRK_CODE */
3089
3090 default:
3091 return 1;
3092 }
3093 sys->factored = TRUE;
3094
3095 #define KAA_DEBUG 1
3096 #if KAA_DEBUG
3097 if (g_linsolqr_timing) {
3098 int anz;
3099 int fnz;
3100 anz = mtx_nonzeros_in_region(sys->coef,region);
3101 fnz = mtx_nonzeros_in_region(sys->factors,region) +
3102 mtx_nonzeros_in_region(sys->inverse,0);
3103 time = tm_cpu_time() - time;
3104 FPRINTF(stderr,"A-NNZ: %d Factor time: %f Fill %g\n",
3105 anz,time,( anz>0 ? (double)fnz/(double)anz : 0));
3106 }
3107 #endif /* KAA_DEBUG */
3108 #undef KAA_DEBUG
3109 return 0;
3110 }
3111
3112 /***************************************************************************\
3113 End of RANKI implementation functions.
3114 \***************************************************************************/
3115
3116
3117 #ifdef BUILD_KIRK_CODE
3118 /*
3119 ***************************************************************************
3120 * Start of kirk_* routines
3121 *
3122 * kirk1_factor:
3123 * This routine is based on rankikw2_factor, except that it takes
3124 * an additional region, so as to do some *global* pivot restriction.
3125 * This is for the case of a single border for the entire matrix.
3126 *
3127 ***************************************************************************
3128 */
3129
3130 /*
3131 * This is just a dummy structure so as to get the size
3132 * correct. It is *exactly the same size as mtx_linklist,
3133 * until we can sort out the software engineering issues.
3134 */
3135 struct dlinklist {
3136 int index;
3137 struct dlinklist *prev;
3138 };
3139
3140 static
3141 int kirk1_factor(linsolqr_system_t sys,
3142 mtx_region_t *A11,
3143 int kirk_method)
3144 {
3145 mtx_coord_t nz;
3146 int32 last_row;
3147 mtx_range_t pivot_candidates;
3148 real64 *tmp,*tmp_array_origin;
3149 real64 pivot, *pivots;
3150 int32 length;
3151 mtx_matrix_t mtx, upper_mtx;
3152 real64 maxa;
3153
3154 int32 *inserted = NULL; /* stuff for the link list */
3155 mem_store_t eltbuffer = NULL;
3156
3157 #ifdef NOP_DEBUG
3158 mtx_number_ops = 0;
3159 #endif /* NOP_DEBUG */
3160
3161 length = sys->rng.high + 1;
3162 tmp_array_origin = length > 0 ?
3163 (real64 *)asccalloc( length,sizeof(real64) ) : NULL;
3164 tmp = tmp_array_origin;
3165 pivots=sys->ludata->pivlist;
3166 mtx = sys->factors;
3167 upper_mtx = sys->inverse;
3168 inserted = (int32 *)asccalloc(length,sizeof(int32));
3169 eltbuffer = mem_create_store(2,256,sizeof(struct dlinklist),
3170 2,256);
3171
3172 sys->smallest_pivot = MAXDOUBLE;
3173 last_row = pivot_candidates.high = sys->rng.high;
3174 for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
3175
3176 pivot_candidates.low = nz.col = nz.row;
3177 pivots[nz.row]=pivot = mtx_value(mtx,&nz);
3178 pivot = fabs(pivot);
3179 maxa = mtx_row_max(mtx,&nz,&pivot_candidates,NULL);
3180 if ((pivot > sys->pivot_zero) && (pivot >= sys->ptol*maxa) &&
3181 !col_is_a_spike(sys,nz.row)) {
3182 if (pivot < sys->smallest_pivot)
3183 sys->smallest_pivot = pivot;
3184 ++(nz.row);
3185 continue;
3186 }
3187 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
3188 /**
3189 *** Row is a spike row or will
3190 *** be when a necessary column
3191 *** exchange occurs.
3192 **/
3193 if (kirk_method==1)
3194 eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol);
3195 else{
3196 mtx_eliminate_row2(mtx,upper_mtx,&(sys->rng),
3197 nz.row,tmp,pivots,inserted,eltbuffer);
3198 mem_clear_store(eltbuffer);
3199 }
3200 /* pivot will be largest of those available. get size and value */
3201 pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row]));
3202 if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */
3203 /*
3204 * Dependent row, drag to the end. The upper_mtx is a slave
3205 * of mtx, and will be dragged automatically.
3206 */
3207 mtx_drag(mtx,nz.row,last_row);
3208 number_drag(pivots,nz.row,last_row);
3209 --last_row;
3210 #ifdef KAA_DEBUG
3211 FPRINTF(stderr,"Warning: Row %d is dependent with pivot %20.8g\n",
3212 nz.row,pivot);
3213 #endif /* KAA_DEBUG */
3214 } else {
3215 /* Independent row: nz contains best pivot */
3216 mtx_swap_cols(mtx,nz.row,nz.col); /* this Fixes U as well */