/[ascend]/trunk/base/generic/solver/linsolqr.c
ViewVC logotype

Contents of /trunk/base/generic/solver/linsolqr.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 708 - (show annotations) (download) (as text)
Tue Jun 27 07:34:31 2006 UTC (17 years, 11 months ago) by johnpye
File MIME type: text/x-csrc
File size: 168443 byte(s)
Replaced some references to ascmalloc with ASC_NEW_ARRAY
1 /*
2 * linsol II: Ascend Sparse Linear Solvers
3 * by Benjamin Allan
4 * Created: 2/6/90
5 * Version: $Revision: 1.26 $
6 * Version control file: $RCSfile: linsolqr.c,v $
7 * Date last modified: $Date: 2000/01/25 02:26:58 $
8 * Last modified by: $Author: ballan $
9 *
10 * This file is part of the SLV solver.
11 *
12 * Copyright (C) 1994 Benjamin Andrew Allan
13 *
14 * Based on linsol:
15 * Copyright (C) 1990 Karl Michael Westerberg
16 * Copyright (C) 1993 Joseph Zaher
17 * Copyright (C) 1994 Joseph Zaher, Benjamin Andrew Allan
18 * and on sqr.pas v1.5:
19 * Copyright (C) 1994 Boyd Safrit
20 *
21 * The SLV solver is free software; you can redistribute
22 * it and/or modify it under the terms of the GNU General Public License as
23 * published by the Free Software Foundation; either version 2 of the
24 * License, or (at your option) any later version.
25 *
26 * The SLV solver is distributed in hope that it will be
27 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 * General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with the program; if not, write to the Free Software Foundation,
33 * Inc., 675 Mass Ave, Cambridge, MA 02139 USA. Check the file named
34 * COPYING. COPYING is found in ../compiler.
35 */
36
37 /*
38 * Implementation Notes:
39 * 9/95: there are many places in linsolqr.c where lower level matrix
40 * operations have been replaced by higher level ones. These used to
41 * be flagged as such so that people who wanted to construct newer
42 * operations from the lowest level mtx operators could see how to
43 * use the lowlevel operators. Since it has been decided that the old
44 * linsol will NOT be discarded completely (we like having easy
45 * benchmarks to look better than), the user wishing to code new
46 * operations is directed back to linsol to see how lowlevel operations
47 * can be expensively done.
48 *
49 * 10/95: (BAA) I attempted to optimize number_drag through use of
50 * pointer arithmetic. Testing the resulting code without optimization
51 * turned on gave about 20% improvement over the original on an alpha
52 * using the native compiler. However, with optimization on the array
53 * subscripted version which is left here won by ~40%.
54 * The results for gcc were similar: pointer 10% better unoptimized,
55 * subscripted 30% better when -O2.
56 * For both compilers -O3 was worse! gcc was ~1.8x slower than the
57 * native cc both optimized and not.
58 * I have not tested a loop unrolled drag, but I expect unrolling is
59 * exactly what the optimizer is doing, so why bother?
60 */
61
62 #include <math.h>
63 #include <stdarg.h>
64 #include <utilities/ascConfig.h>
65 #include <utilities/ascMalloc.h>
66 #include <utilities/ascPanic.h>
67 #include <utilities/mem.h>
68 #include <utilities/set.h>
69 #include <general/tm_time.h>
70 #include "mtx.h"
71 #include "linsolqr.h"
72 #include <general/mathmacros.h>
73
74 #ifdef __WIN32__
75 #define copysign _copysign
76 #endif /* __WIN32__ */
77
78 #define LINSOLQR_DEBUG FALSE
79 /* qr code debugging flag */
80 #define LINSOL_DEBUG FALSE
81 /* linsol debugging flag */
82
83 #define D_ZERO (real64)0.0
84 #define ZERO (int32)0
85 #define D_ONE (real64)1.0
86
87 /* housekeeping flags */
88 #define BUILD_DEAD_CODE 0
89 #undef BUILD_KIRK_CODE
90
91
92 #define RBADEBUG 0
93 /* turns on spew that will generate output files
94 * comparable to and in the same location as for rankiba2
95 * user specific hardcoded pathnames are involved.
96 * don't try this at home.
97 */
98 #if RBADEBUG
99 extern FILE *gscr;
100 #endif
101
102 /*
103 * timing messages disable flag.
104 */
105 int g_linsolqr_timing = 1;
106
107 /***************************************************************************\
108 Data structures for linsol
109 Any time you modify these, please verify linsolqr_size is still correct.
110 \***************************************************************************/
111 struct rhs_list {
112 real64 *rhs; /* Vector of rhs values */
113 real64 *varvalue; /* Solution of the linear system */
114 boolean solved; /* ? has the rhs been solved */
115 boolean transpose; /* ? transpose the coef matrix */
116 struct rhs_list *next;
117 };
118
119 struct qr_fill_t {
120 int32 cnt; /* number of real nonzeros in a column */
121 int32 col; /* column number prior to being sorted */
122 int32 fill; /* fill if this (or if col?) were the pivot column */
123 int32 overlap; /* scratch for fillable calculation. */
124 };
125 /* fill counting structure for sparse qr */
126
127 struct lu_auxdata { /* all indexed by cur row or cur col number */
128 real64 *pivlist; /* vector of pivots (diagonal of L for ranki) */
129 real64 *tmp; /* row elimination, dependency buffer */
130 int32 cap; /* current capacity of the vectors if not null */
131 };
132 /* structure for lu algorithms */
133
134 struct qr_auxdata { /* all indexed by cur row or cur col number */
135 mtx_matrix_t hhvects; /* slave matrix of sys->factors for holding u's */
136 mtx_sparse_t sp; /* sparse data space */
137 real64 *alpha; /* column norms for the Q\R factor matrix */
138 real64 *sigma; /* column norms for the S (R inverse) matrix */
139 real64 *tau; /* column tau of the Householder transforms */
140 real64 *hhcol; /* Householder transform scratch vector */
141 real64 *hhrow; /* Householder transform scratch vector */
142 real64 nu; /* current condition number of inverse */
143 real64 anu; /* condition number of coef */
144 real64 asubnu; /* condition number of active region in coef */
145 struct qr_fill_t *fill; /* fill counting structure */
146 mtx_region_t facreg; /* region over which qr is done if nonsquare */
147 int32 cap; /* current capacity of these vectors if not null */
148 };
149 /* structure for CondQR algorithms */
150
151 struct linsolqr_header {
152 int integrity;
153 enum factor_class fclass; /* Type of factoring expected */
154 enum factor_method fmethod; /* Method factoring expected */
155 enum reorder_method rmethod; /* Type of most recent reordering */
156 int32 capacity; /* Capacity of arrays */
157 mtx_matrix_t coef; /* Coefficient matrix */
158 struct rhs_list *rl; /* List of rhs vectors */
159 int rlength; /* Length of rhs list */
160 real64 pivot_zero; /* Smallest acceptable pivot */
161 real64 ptol; /* Pivot selection tolerance */
162 real64 ctol; /* Condition selection tolerance */
163 real64 dtol; /* Matrix entry drop tolerance */
164 mtx_matrix_t factors; /* Matrix with UL and dependence info (ranki),
165 or L and dependence info (ranki2),
166 or R and dependence info (qr methods) */
167 mtx_matrix_t inverse; /* Matrix with U multipliers (ranki2) or R^-1
168 (condqr), in both cases slave of factors */
169 boolean factored; /* ? has the matrix been factored */
170 boolean rowdeps; /* ? have row dependencies been calc'ed */
171 boolean coldeps; /* ? have col dependencies been calc'ed */
172 mtx_range_t rng; /* Pivot range */
173 mtx_region_t reg; /* Bounding region given */
174 int32 rank; /* Rank of the matrix */
175 real64 smallest_pivot; /* Smallest pivot accepted */
176 struct qr_auxdata *qrdata; /* Data vectors for qr methods */
177 struct lu_auxdata *ludata; /* Data vectors for lu methods */
178 };
179 /* linsol main structure */
180
181 /***************************************************************************\
182 string functions for linsol io
183 \***************************************************************************/
184
185 char *linsolqr_rmethods() {
186 static char names[]="Natural, SPK1, TSPK1";
187 return names;
188 }
189
190 char *linsolqr_fmethods() {
191 static char names[] =
192 "SPK1/RANKI,SPK1/RANKI+ROW,Fast-SPK1/RANKI,Fast-SPK1/RANKI+ROW,Fastest-SPK1/MR-RANKI,CondQR,CPQR";
193 return names;
194 }
195
196 /** **/
197 enum reorder_method linsolqr_rmethod_to_enum(char *name) {
198 if (strcmp(name,"SPK1")==0) return spk1;
199 if (strcmp(name,"TSPK1")==0) return tspk1;
200 if (strcmp(name,"Natural")==0) return natural;
201 return unknown_r;
202 }
203
204 enum factor_method linsolqr_fmethod_to_enum(char *name) {
205 if (strcmp(name,"KIRK-STUFF")==0) return ranki_ka;
206 if (strcmp(name,"SPK1/RANKI")==0) return ranki_kw;
207 if (strcmp(name,"SPK1/RANKI+ROW")==0) return ranki_jz;
208 if (strcmp(name,"Fast-SPK1/RANKI")==0) return ranki_kw2;
209 if (strcmp(name,"Fast-SPK1/RANKI+ROW")==0) return ranki_jz2;
210 if (strcmp(name,"Fastest-SPK1/MR-RANKI")==0) return ranki_ba2;
211 if (strcmp(name,"CondQR")==0) return cond_qr;
212 if (strcmp(name,"CPQR")==0) return plain_qr;
213 return unknown_f;
214 }
215
216 enum factor_class linsolqr_fmethod_to_fclass(enum factor_method fm)
217 {
218 switch (fm) {
219 /* ranki-like things */
220 case ranki_kw:
221 case ranki_jz:
222 case ranki_ba2:
223 case ranki_kw2:
224 case ranki_jz2:
225 case ranki_ka:
226 return ranki;
227 /* implemented qr things */
228 case plain_qr:
229 return s_qr;
230 /* other stuff unimplemented. all fall through */
231 case cond_qr:
232 case opt_qr:
233 case ls_qr:
234 case gauss_ba2:
235 case symmetric_lu:
236 case unknown_f:
237 default:
238 return unknown_c;
239 }
240 }
241
242 /** **/
243 char *linsolqr_enum_to_rmethod(enum reorder_method meth) {
244 switch (meth) {
245 case spk1: return "SPK1";
246 case tspk1: return "TSPK1";
247 case natural: return "Natural";
248 default: return "Unknown reordering method.";
249 }
250 }
251
252 char *linsolqr_enum_to_fmethod(enum factor_method meth) {
253 switch (meth) {
254 case ranki_kw: return "SPK1/RANKI";
255 case ranki_jz: return "SPK1/RANKI+ROW";
256 case ranki_ba2: return "Fastest-SPK1/MR-RANKI";
257 case ranki_kw2: return "Fast-SPK1/RANKI";
258 case ranki_jz2: return "Fast-SPK1/RANKI+ROW";
259 case ranki_ka: return "KIRK-STUFF";
260 case cond_qr: return "CondQR";
261 case plain_qr: return "CPQR";
262 default: return "Unknown factorization method.";
263 }
264 }
265
266 /** **/
267 char *linsolqr_rmethod_description(enum reorder_method meth) {
268 static char sspk1[]="SPK1 reordering ala Stadtherr.";
269 static char stspk1[]="SPK1 reordering column wise ala Stadtherr.";
270 static char nat[]="Ordering as received from user";
271 switch (meth) {
272 case spk1: return sspk1;
273 case tspk1: return stspk1;
274 case natural: return nat;
275 default: return "Unknown reordering method.";
276 }
277 }
278
279 char *linsolqr_fmethod_description(enum factor_method meth) {
280 static char ex_ranki[] =
281 "SPK1 reordering with RANKI LU factoring ala Stadtherr.";
282 static char ex_rankib[] =
283 "SPK1 reordering with RANKI LU factoring ala Stadtherr, but fast.";
284 static char ex_rankij[]="SPK1/RANKI LU with pseudo-complete pivoting";
285 static char ex_rankik[]="KIRK-STUFF/RANKI LU with pseudo-complete pivoting";
286 static char ex_condqr[]="Sparse QR with condition controlled pivoting.";
287 static char ex_plainqr[]="Sparse QR with column pivoting.";
288 switch (meth) {
289 case ranki_kw: return ex_ranki;
290 case ranki_kw2: return ex_ranki;
291 case ranki_ba2: return ex_rankib;
292 case ranki_jz: return ex_rankij;
293 case ranki_jz2: return ex_rankij;
294 case ranki_ka: return ex_rankik;
295 case cond_qr: return ex_condqr;
296 case plain_qr: return ex_plainqr;
297 default: return "Unknown factorization method.";
298 }
299 }
300
301 /***************************************************************************\
302 internal check routines
303 \***************************************************************************/
304 #define OK ((int)439828676)
305 #define DESTROYED ((int)839276847)
306 #define CHECK_SYSTEM(a) check_system((a),__FILE__,__LINE__)
307 static int check_system(linsolqr_system_t sys, char *file, int line)
308 /**
309 *** Checks the system handle. Return 0 if ok, 1 otherwise.
310 **/
311 {
312 UNUSED_PARAMETER(file);
313
314 if( ISNULL(sys) ) {
315 FPRINTF(stderr,"ERROR: (linsolqr) check_system\n");
316 FPRINTF(stderr," NULL system handle.\n");
317 return 1;
318 }
319
320 switch( sys->integrity ) {
321 case OK:
322 return 0;
323 case DESTROYED:
324 FPRINTF(stderr,"ERROR: (linsolqr) check_system, line %d\n",line);
325 FPRINTF(stderr," System was recently destroyed.\n");
326 break;
327 default:
328 FPRINTF(stderr,"ERROR: (linsolqr) check_system, line %d\n",line);
329 FPRINTF(stderr," System was reused or never created.\n");
330 break;
331 }
332 return 1;
333 }
334
335 /***************************************************************************\
336 external calls (and some of their internals)
337 \***************************************************************************/
338 static void destroy_rhs_list(struct rhs_list *rl)
339 /**
340 *** Destroys rhs list.
341 **/
342 {
343 while( NOTNULL(rl) ) {
344 struct rhs_list *p;
345 p = rl;
346 rl = rl->next;
347 if( NOTNULL(p->varvalue) )
348 ascfree( (POINTER)(p->varvalue) );
349 ascfree( (POINTER)p );
350 }
351 }
352
353 static struct rhs_list *find_rhs(struct rhs_list *rl,real64 *rhs)
354 /**
355 *** Searches for rhs in rhs list, returning it or NULL if not found.
356 **/
357 {
358 for( ; NOTNULL(rl) ; rl = rl->next )
359 if( rl->rhs == rhs )
360 break;
361 return(rl);
362 }
363
364 static struct lu_auxdata *create_ludata()
365 /**
366 *** Creates an empty zeroed ludata struct.
367 *** Filled up by insure_lu_capacity.
368 **/
369 {
370 struct lu_auxdata *d;
371 d=(struct lu_auxdata *)asccalloc(1,sizeof(struct lu_auxdata));
372 return d;
373 }
374
375 static struct qr_auxdata *create_qrdata()
376 /**
377 *** Creates an empty zeroed qrdata struct.
378 *** Filled up by insure_qr_capacity.
379 **/
380 {
381 struct qr_auxdata *d;
382 d=(struct qr_auxdata *)asccalloc(1,sizeof(struct qr_auxdata));
383 return d;
384 }
385
386 static void destroy_ludata(struct lu_auxdata *d) {
387 /**
388 *** Conditionally destroys a ludata struct and its parts.
389 **/
390 if (NOTNULL(d)) {
391 if (NOTNULL(d->pivlist)) ascfree(d->pivlist);
392 d->pivlist=NULL;
393 if (NOTNULL(d->tmp)) ascfree(d->tmp);
394 d->tmp=NULL;
395 ascfree(d);
396 }
397 }
398
399 static void destroy_qrdata(struct qr_auxdata *d) {
400 /**
401 *** Conditionally destroys a qrdata struct and its parts.
402 **/
403 if (NOTNULL(d)) {
404 if (NOTNULL(d->alpha)) ascfree(d->alpha);
405 if (NOTNULL(d->sigma)) ascfree(d->sigma);
406 if (NOTNULL(d->tau)) ascfree(d->tau);
407 if (NOTNULL(d->hhcol)) ascfree(d->hhcol);
408 if (NOTNULL(d->hhrow)) ascfree(d->hhrow);
409 if (NOTNULL(d->fill)) ascfree(d->fill);
410
411 ascfree(d);
412 }
413 }
414
415 linsolqr_system_t linsolqr_create()
416 {
417 linsolqr_system_t sys;
418
419 sys = (linsolqr_system_t)ascmalloc( sizeof(struct linsolqr_header) );
420 sys->fclass = unknown_c;
421 sys->fmethod = unknown_f;
422 sys->rmethod = unknown_r;
423 sys->integrity = OK;
424 sys->capacity = 0;
425 sys->coef = NULL;
426 sys->rl = NULL;
427 sys->rlength = 0;
428 sys->pivot_zero = 1e-12; /* default value */
429 sys->ptol = 0.1; /* default value */
430 sys->ctol = 0.1; /* default value */
431 sys->dtol = LINQR_DROP_TOLERANCE; /* default value */
432 sys->factors = NULL;
433 sys->inverse = NULL;
434 sys->factored = FALSE;
435 sys->rowdeps = FALSE;
436 sys->coldeps = FALSE;
437 sys->rng.low = sys->rng.high = -1;
438 sys->reg.row.low = sys->reg.row.high = -1;
439 sys->reg.col.low = sys->reg.col.high = -1;
440 sys->rank = -1;
441 sys->smallest_pivot = MAXDOUBLE;
442 sys->qrdata = NULL;
443 sys->ludata = NULL;
444 return(sys);
445 }
446
447 void linsolqr_destroy(linsolqr_system_t sys)
448 {
449 if(CHECK_SYSTEM(sys)) {
450 FPRINTF(stderr,"Bad linsolqr_system_t found. Not destroyed.\n");
451 return;
452 }
453 if( NOTNULL(sys->coef) ) {
454 FPRINTF(stderr,"Notice: (linsolqr) non-NULL coefficient matrix\n");
455 FPRINTF(stderr," in linsolqr system not being\n");
456 FPRINTF(stderr," destroyed with linsolqr_system_t.\n");
457 }
458 if( NOTNULL(sys->inverse) )
459 mtx_destroy(sys->inverse);
460 if( NOTNULL(sys->factors) )
461 mtx_destroy(sys->factors);
462 destroy_rhs_list(sys->rl);
463 destroy_qrdata(sys->qrdata);
464 destroy_ludata(sys->ludata);
465 sys->integrity = DESTROYED;
466 ascfree( (POINTER)sys );
467 }
468
469 void linsolqr_set_matrix(linsolqr_system_t sys,mtx_matrix_t mtx)
470 {
471 if(CHECK_SYSTEM(sys)) {
472 FPRINTF(stderr,"Bad linsolqr_system_t found. coef mtx not set.\n");
473 return;
474 }
475 sys->coef = mtx;
476 }
477
478
479 void linsolqr_set_region(linsolqr_system_t sys,mtx_region_t region)
480 {
481 if(CHECK_SYSTEM(sys)) {
482 FPRINTF(stderr,"Bad linsolqr_system_t found. coef mtx not set.\n");
483 return;
484 }
485 sys->reg = region;
486 }
487
488
489 mtx_matrix_t linsolqr_get_matrix(linsolqr_system_t sys)
490 {
491 CHECK_SYSTEM(sys);
492 return( sys->coef );
493 }
494
495 mtx_region_t *linsolqr_get_region(linsolqr_system_t sys)
496 {
497 return &(sys->reg);
498 }
499
500 void linsolqr_add_rhs(linsolqr_system_t sys,
501 real64 *rhs,
502 boolean transpose)
503 {
504 struct rhs_list *rl;
505
506 if(CHECK_SYSTEM(sys)) {
507 FPRINTF(stderr,"Bad linsolqr_system_t found. rhs not added.\n");
508 return;
509 }
510 rl = find_rhs(sys->rl,rhs);
511 if( NOTNULL(rl) ) {
512 return;
513 } else {
514 if( NOTNULL(rhs) ) {
515 rl = (struct rhs_list *)ascmalloc( sizeof(struct rhs_list) );
516 rl->rhs = rhs;
517 rl->varvalue = NULL;
518 rl->solved = FALSE;
519 rl->transpose = transpose;
520 rl->next = sys->rl;
521 sys->rl = rl;
522 ++(sys->rlength);
523 if (sys->capacity>0) {
524 rl->varvalue =
525 ASC_NEW_ARRAY(real64,sys->capacity);
526 }
527 }
528 }
529 }
530
531 void linsolqr_remove_rhs(linsolqr_system_t sys,real64 *rhs)
532 {
533 struct rhs_list **q;
534
535 if(CHECK_SYSTEM(sys)) {
536 FPRINTF(stderr,"Bad linsolqr_system_t found. rhs not removed.\n");
537 return;
538 }
539 for( q = &(sys->rl) ; NOTNULL(*q) ; q = &((*q)->next) )
540 if( (*q)->rhs == rhs )
541 break;
542 if( NOTNULL(*q) ) {
543 struct rhs_list *p;
544 p = *q;
545 *q = p->next;
546 if( NOTNULL(p->varvalue) )
547 ascfree( (POINTER)(p->varvalue) );
548 ascfree( (POINTER)p );
549 --(sys->rlength);
550 } else if( NOTNULL(rhs) ) {
551 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_remove_rhs\n");
552 FPRINTF(stderr," Rhs does not exist.\n");
553 }
554 }
555
556 int32 linsolqr_number_of_rhs(linsolqr_system_t sys)
557 {
558 CHECK_SYSTEM(sys);
559 return( sys->rlength );
560 }
561
562 real64 *linsolqr_get_rhs(linsolqr_system_t sys,int n)
563 {
564 struct rhs_list *rl;
565 int count;
566
567 CHECK_SYSTEM(sys);
568
569 count = sys->rlength - 1 - n;
570 if( count < 0 ) return(NULL);
571 for( rl = sys->rl ; count > 0 && NOTNULL(rl) ; rl = rl->next )
572 --count;
573 return( ISNULL(rl) ? NULL : rl->rhs );
574 }
575
576 void linsolqr_matrix_was_changed(linsolqr_system_t sys)
577 {
578 if(CHECK_SYSTEM(sys)) {
579 FPRINTF(stderr,
580 "Bad linsolqr_system_t found. matrix change message ignored.\n");
581 return;
582 }
583 sys->rowdeps = sys->coldeps = sys->factored = FALSE;
584 }
585
586 void linsolqr_rhs_was_changed(linsolqr_system_t sys, real64 *rhs)
587 {
588 struct rhs_list *rl;
589
590 if(CHECK_SYSTEM(sys)) {
591 FPRINTF(stderr,"Bad linsolqr_system_t found. rhs change ignored.\n");
592 return;
593 }
594 rl = find_rhs(sys->rl,rhs);
595 if( NOTNULL(rl) ) {
596 rl->solved = FALSE;
597 } else if( NOTNULL(rhs) ) {
598 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_rhs_was_modified\n");
599 FPRINTF(stderr," Rhs does not exist.\n");
600 }
601 }
602
603 void linsolqr_set_pivot_zero(linsolqr_system_t sys,real64 pivot_zero)
604 {
605 if(CHECK_SYSTEM(sys)) {
606 FPRINTF(stderr,"Bad linsolqr_system_t found. set_pivot_zero ignored.\n");
607 return;
608 }
609 if( pivot_zero < D_ZERO ) {
610 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_pivot_zero\n");
611 FPRINTF(stderr," Invalid pivot zero of %g\n",pivot_zero);
612 } else {
613 sys->pivot_zero = pivot_zero;
614 linsolqr_matrix_was_changed(sys);
615 }
616 }
617
618 real64 linsolqr_pivot_zero(linsolqr_system_t sys)
619 {
620 CHECK_SYSTEM(sys);
621 return( sys->pivot_zero );
622 }
623
624 void linsolqr_set_pivot_tolerance(linsolqr_system_t sys, real64 ptol)
625 {
626 if(CHECK_SYSTEM(sys)) {
627 FPRINTF(stderr,"Bad linsolqr_system_t found. set_pivot_tol ignored.\n");
628 return;
629 }
630 if( ptol < D_ZERO || ptol > D_ONE ) {
631 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_pivot_tolerance\n");
632 FPRINTF(stderr," Invalid pivot tolerance of %g\n",ptol);
633 } else {
634 sys->ptol = ptol;
635 linsolqr_matrix_was_changed(sys);
636 }
637 }
638
639 void linsolqr_set_condition_tolerance(linsolqr_system_t sys, real64 ctol)
640 {
641 if(CHECK_SYSTEM(sys)) {
642 FPRINTF(stderr,
643 "Bad linsolqr_system_t found. set_condition_tolerance ignored.\n");
644 return;
645 }
646 if( ctol < D_ZERO || ctol > D_ONE ) {
647 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_condition_tolerance\n");
648 FPRINTF(stderr," Invalid condition tolerance of %g\n",ctol);
649 } else {
650 sys->ctol = ctol;
651 linsolqr_matrix_was_changed(sys);
652 }
653 }
654
655 void linsolqr_set_drop_tolerance(linsolqr_system_t sys, real64 dtol)
656 {
657 if(CHECK_SYSTEM(sys)) {
658 FPRINTF(stderr,
659 "Bad linsolqr_system_t found. set_drop_tolerance ignored.\n");
660 return;
661 }
662 if( dtol < D_ZERO || dtol > D_ONE ) {
663 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_drop_tolerance\n");
664 FPRINTF(stderr," Invalid drop tolerance of %g\n",dtol);
665 } else {
666 sys->dtol = dtol;
667 linsolqr_matrix_was_changed(sys);
668 }
669 }
670
671 real64 linsolqr_pivot_tolerance(linsolqr_system_t sys)
672 {
673 CHECK_SYSTEM(sys);
674 return( sys->ptol );
675 }
676
677 real64 linsolqr_condition_tolerance(linsolqr_system_t sys)
678 {
679 CHECK_SYSTEM(sys);
680 return( sys->ctol );
681 }
682
683 real64 linsolqr_drop_tolerance(linsolqr_system_t sys)
684 {
685 CHECK_SYSTEM(sys);
686 return( sys->dtol );
687 }
688
689 extern enum factor_class linsolqr_fclass(linsolqr_system_t sys)
690 {
691 CHECK_SYSTEM(sys);
692 return( sys->fclass );
693 }
694 extern enum factor_method linsolqr_fmethod(linsolqr_system_t sys)
695 {
696 CHECK_SYSTEM(sys);
697 return( sys->fmethod );
698 }
699 extern enum reorder_method linsolqr_rmethod(linsolqr_system_t sys)
700 {
701 CHECK_SYSTEM(sys);
702 return( sys->rmethod );
703 }
704
705 int32 linsolqr_rank(linsolqr_system_t sys)
706 {
707 CHECK_SYSTEM(sys);
708 if( !sys->factored ) {
709 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_rank\n");
710 FPRINTF(stderr," System not factored yet.\n");
711 }
712 return(sys->rank);
713 }
714
715 real64 linsolqr_smallest_pivot(linsolqr_system_t sys)
716 {
717 CHECK_SYSTEM(sys);
718 #if LINSOL_DEBUG
719 if( !sys->factored ) {
720 FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_smallest_pivot\n");
721 FPRINTF(stderr," System not factored yet.\n");
722 }
723 #endif
724 return(sys->smallest_pivot);
725 }
726
727 /***************************************************************************\
728 Commonly used internal functions for sparse linear solvers based on mtx.
729 void insure_capacity(sys)
730 void insure_lu_capacity(sys)
731 void insure_qr_capacity(sys)
732 void forward_substitute(sys,rvec,transpose)
733 void backward_substitute(sys,rvec,transpose)
734 macro SQR(x)
735 int find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi)
736 \***************************************************************************/
737
738 static real64 *raise_capacity(real64 *vec,
739 int32 oldcap,
740 int32 newcap)
741 /**
742 *** Raises the capacity of the array and returns a new array.
743 *** It is assumed that oldcap < newcap. vec is destroyed or
744 *** returned as appropriate.
745 *** If !NDEBUG, the vector expanded is also set to 0.
746 **/
747 {
748 real64 *newvec=NULL;
749 #ifndef NDEBUG
750 int i;
751 #endif
752 if (newcap < oldcap) {
753 #ifndef NDEBUG
754 for (i = 0; i < newcap; i++) {
755 newvec[i] = 0.0;
756 }
757 #endif
758 return vec;
759 }
760 if (NOTNULL(vec)) {
761 /* don't call realloc on null with newcap 0 or it frees */
762 newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64)));
763 } else {
764 newvec=(newcap > 0 ?
765 ASC_NEW_ARRAY(real64,newcap ) : NULL);
766 }
767 #ifndef NDEBUG
768 for (i = 0; i < newcap; i++) {
769 newvec[i] = 0.0;
770 }
771 #endif
772 return newvec;
773 }
774
775 static int32 *raise_qr_int_capacity(int32 *vec,
776 int32 oldcap,
777 int32 newcap)
778 /**
779 *** Raises the capacity of the index array and returns a new array.
780 *** It is assumed that oldcap < newcap. vec is destroyed or
781 *** returned as appropriate. vec returned is zeroed.
782 *** calling with newcap=0 does not force deallocation.
783 **/
784 {
785 int32 *newvec=NULL;
786 if (newcap < oldcap)
787 return vec;
788 if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
789 newvec=(int32 *)ascrealloc(vec,(newcap * sizeof(int32)));
790 mtx_zero_int32(vec,newcap);
791 } else {
792 newvec= (newcap > 0 ?
793 (int32 *)asccalloc( newcap , sizeof(int32) ) : NULL);
794 }
795 return newvec;
796 }
797
798 static real64 *raise_qr_real_capacity(real64 *vec,
799 int32 oldcap,
800 int32 newcap)
801 /**
802 *** Raises the capacity of the real array and returns a new array.
803 *** It is assumed that oldcap < newcap. vec is destroyed or
804 *** returned as appropriate. vec returned is zeroed.
805 *** calling with newcap=0 does not force deallocation.
806 **/
807 {
808 real64 *newvec=NULL;
809 if (newcap < oldcap)
810 return vec;
811 if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
812 newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64)));
813 mtx_zero_real64(vec,newcap);
814 } else {
815 newvec= (newcap > 0 ?
816 (real64 *)asccalloc( newcap , sizeof(real64) ) : NULL);
817 }
818 return newvec;
819 }
820
821 static struct qr_fill_t *raise_qr_fill_capacity(struct qr_fill_t *vec,
822 int32 oldcap,
823 int32 newcap)
824 /**
825 *** Raises the capacity of the fill array and returns a new array.
826 *** It is assumed that oldcap < newcap. vec is destroyed or
827 *** returned as appropriate. vec returned is zeroed.
828 *** calling with newcap=0 does not force deallocation.
829 **/
830 {
831 struct qr_fill_t *newvec=NULL;
832 if (newcap < oldcap)
833 return vec;
834 if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */
835 newvec=(struct qr_fill_t *)
836 ascrealloc(vec,(newcap * sizeof(struct qr_fill_t)));
837 mtx_zero_char((char *)vec,newcap*sizeof(struct qr_fill_t));
838 } else {
839 newvec= (newcap > 0 ?
840 (struct qr_fill_t *)asccalloc( newcap , sizeof(struct qr_fill_t) ) :
841 NULL);
842 }
843 return newvec;
844 }
845
846 static void insure_capacity(linsolqr_system_t sys)
847 /**
848 *** Insures that the capacity of all of the solution vectors
849 *** for each rhs is large enough.
850 *** The above implies a malloc if varvalue is null.
851 *** Assumes varvalue are at sys->capacity already.
852 **/
853 {
854 int32 req_cap;
855 req_cap = mtx_capacity(sys->coef);
856
857 if( req_cap > sys->capacity ) {
858 struct rhs_list *rl;
859
860 for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
861 rl->varvalue = raise_capacity(rl->varvalue,sys->capacity,req_cap);
862 sys->capacity = req_cap;
863 }
864 }
865
866 static void insure_lu_capacity(linsolqr_system_t sys)
867 /**
868 *** Insures that the capacity of all of the ludata vectors.
869 *** The above implies a malloc if vector is null.
870 *** If not null, implies an extension if needed.
871 **/
872 {
873 int32 req_cap;
874 if (!sys || !(sys->coef) || !(sys->ludata)) {
875 FPRINTF(stderr,"linsolqr: insure_lu_capacity called with NULL pointer");
876 return;
877 }
878 req_cap = mtx_capacity(sys->coef);
879
880 if( req_cap > sys->ludata->cap ) {
881 sys->ludata->pivlist =
882 raise_capacity(sys->ludata->pivlist,sys->ludata->cap,req_cap);
883 sys->ludata->tmp =
884 raise_capacity(sys->ludata->tmp,sys->ludata->cap,req_cap);
885 sys->ludata->cap = req_cap;
886 }
887 }
888
889 static void insure_qr_capacity(linsolqr_system_t sys)
890 /**
891 *** Insures that the capacity of all of the qrdata vectors
892 *** is large enough.
893 *** The above implies a calloc if vector is null.
894 *** If not null, implies an extension and zeroing of the vector.
895 *** Also zeroes the simple elements of the qrdata.
896 **/
897 {
898 int32 req_cap;
899 if (!sys || !(sys->coef) || !(sys->qrdata)) {
900 FPRINTF(stderr,"linsolqr: insure_qr_capacity called with NULL pointer");
901 return;
902 }
903 req_cap = mtx_capacity(sys->coef);
904
905 if( req_cap > sys->qrdata->cap ) {
906 sys->qrdata->alpha =
907 raise_qr_real_capacity(sys->qrdata->alpha,sys->qrdata->cap,req_cap);
908 sys->qrdata->sigma =
909 raise_qr_real_capacity(sys->qrdata->sigma,sys->qrdata->cap,req_cap);
910 sys->qrdata->tau =
911 raise_qr_real_capacity(sys->qrdata->tau,sys->qrdata->cap,req_cap);
912 sys->qrdata->hhcol =
913 raise_qr_real_capacity(sys->qrdata->hhcol,sys->qrdata->cap,req_cap);
914 sys->qrdata->hhrow =
915 raise_qr_real_capacity(sys->qrdata->hhrow,sys->qrdata->cap,req_cap);
916
917 sys->qrdata->sp.data =
918 raise_qr_real_capacity(sys->qrdata->sp.data,sys->qrdata->cap,req_cap);
919 sys->qrdata->sp.idata =
920 raise_qr_int_capacity(sys->qrdata->sp.idata,sys->qrdata->cap,req_cap);
921
922 sys->qrdata->fill =
923 raise_qr_fill_capacity(sys->qrdata->fill,sys->qrdata->cap,req_cap);
924
925 sys->qrdata->cap = req_cap;
926 sys->qrdata->sp.cap = req_cap;
927 sys->qrdata->sp.len = 0;
928 }
929 sys->qrdata->nu=D_ZERO;
930 sys->qrdata->anu=D_ZERO;
931 sys->qrdata->asubnu=D_ZERO;
932 }
933
934 static void forward_substitute(linsolqr_system_t sys,
935 real64 *arr,
936 boolean transpose)
937 /**
938 *** Forward substitute. It is assumed that the L (or U) part of
939 *** sys->factors is computed. This function converts c to x in place. The
940 *** values are stored in arr indexed by original row number (or original
941 *** column number).
942 ***
943 *** transpose = FALSE: transpose = TRUE:
944 *** T
945 *** L x = c U x = c
946 ***
947 *** The following formula holds:
948 *** 0<=k<r ==> x(k) = [c(k) - L(k,(0..k-1)) dot x(0..k-1)] / L(k,k)
949 *** or
950 *** 0<=k<r ==> x(k) = [c(k) - U((0..k-1),k) dot x(0..k-1)] / U(k,k)
951 ***
952 *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k]
953 *** and in matrix.
954 **/
955 {
956 mtx_range_t dot_rng;
957 mtx_coord_t nz;
958 real64 sum, *pivlist;
959 mtx_matrix_t mtx;
960 int32 dotlim;
961 boolean nonzero_found=FALSE;
962
963 mtx=sys->factors;
964 pivlist=sys->ludata->pivlist;
965 dot_rng.low = sys->rng.low;
966 dotlim=dot_rng.low+sys->rank;
967 if (transpose) { /* arr is indexed by original column number */
968 for( nz.col=dot_rng.low; nz.col < dotlim; ++(nz.col) ) {
969 register int32 org_col;
970
971 dot_rng.high = nz.col - 1;
972 org_col = mtx_col_to_org(mtx,nz.col);
973 if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
974 if (nonzero_found) {
975 sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE);
976 /* arr[org_col] = (arr[org_col] - sum) / D_ONE */;
977 arr[org_col] -=sum;
978 }
979 }
980
981 } else { /* arr is indexed by original row number */
982 for( nz.row=dot_rng.low; nz.row < dotlim; ++(nz.row) ) {
983 register int32 org_row;
984
985 dot_rng.high = nz.row - 1;
986 org_row = mtx_row_to_org(mtx,nz.row);
987 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
988 if (nonzero_found) {
989 sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE);
990 /*
991 nz.col = nz.row;
992 arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz);
993 */
994 arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row];
995 }
996 }
997 }
998 }
999
1000 static void backward_substitute(linsolqr_system_t sys,
1001 real64 *arr,
1002 boolean transpose)
1003 /**
1004 *** Backward substitute. It is assumed that the U (or L) part of
1005 *** sys->factors is computed. This function converts rhs to c in place. The
1006 *** values are stored in arr indexed by original row number (or original
1007 *** column number).
1008 ***
1009 *** transpose = FALSE: transpose = TRUE:
1010 *** T
1011 *** U c = rhs L c = rhs
1012 ***
1013 *** The following formula holds:
1014 *** transpose=FALSE: (the usual for J.dx=-f where rhs is -f)
1015 *** 0<=k<r ==> c(k) = [rhs(k) - U(k,(k+1..r-1)) dot c(k+1..r-1)] / U(k,k)
1016 *** working up from the bottom.
1017 *** or
1018 *** transpose=TRUE:
1019 *** 0<=k<r ==> c(k) = [rhs(k) - L((k+1..r-1),k) dot c(k+1..r-1)] / L(k,k)
1020 *** working right to left.
1021 ***
1022 *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k]
1023 *** and in matrix.
1024 **/
1025 {
1026 mtx_range_t dot_rng;
1027 mtx_coord_t nz;
1028 real64 sum, *pivlist;
1029 mtx_matrix_t mtx;
1030 int32 dotlim;
1031 boolean nonzero_found=FALSE; /* once TRUE, substitution must be done
1032 over remaining rows/cols */
1033
1034 dot_rng.high = sys->rng.low + sys->rank - 1;
1035 dotlim=sys->rng.low;
1036 mtx=sys->factors;
1037 pivlist=sys->ludata->pivlist;
1038 if (transpose) { /* arr is indexed by original column number */
1039 for( nz.col = dot_rng.high ; nz.col >= dotlim ; --(nz.col) ) {
1040 register int32 org_col;
1041
1042 dot_rng.low = nz.col + 1;
1043
1044 org_col = mtx_col_to_org(mtx,nz.col);
1045 if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
1046 if (nonzero_found) {
1047 sum= mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE);
1048 /*
1049 reminders:
1050 nz.row = nz.col;
1051 arr[org_col] = (arr[org_col] - sum) / mtx_value(mtx,&nz);
1052 */
1053 arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col];
1054 }
1055 }
1056 } else { /* arr is indexed by original row number */
1057 /* we are working from the bottom up */
1058 for( nz.row = dot_rng.high ; nz.row >= dotlim ; --(nz.row) ) {
1059 register int32 org_row;
1060
1061 dot_rng.low = nz.row + 1;
1062 org_row = mtx_row_to_org(mtx,nz.row);
1063
1064 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
1065 if (nonzero_found) {
1066 sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE);
1067 /*
1068 reminders:
1069 nz.row = nz.col;
1070 arr[org_row] = (arr[org_row] - sum) / D_ONE;
1071 */
1072 arr[org_row] -= sum;
1073 }
1074 }
1075 }
1076 }
1077
1078 #ifdef SQR
1079 #undef SQR
1080 #endif
1081 #define SQR(x) ((x)*(x))
1082 /*
1083 * int32 find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi);
1084 * real64 *vec,*rvec,tol;
1085 * int32 len, *ivec, *maxi;
1086 *
1087 * Search array vec of positive numbers for the first entry, k, which passes
1088 * (vec[k] >= tol*vecmax) where vecmax is the largest value in vec.
1089 * vec is an array len long. rvec, ivec are (worst case) len long.
1090 * *maxi will be set to the index of the first occurence of the largest
1091 * number in vec which is >= eps.
1092 * 0.0 <= tol <= 1.
1093 * If tol <=0, no search is done (*maxi = 0, k = 0).
1094 * Eps is an absolute number which vec values must be >= to before
1095 * being eligible for the tol test.
1096 *
1097 * Best case, highly nonlinear feature:
1098 * if on entry *maxi = len, we will do a simplified search for the
1099 * first k which satisfies tol and eps tests, based on the value
1100 * stored at vec[len-1].
1101 *
1102 * We could allocate and deallocate rvec/ivec since they are
1103 * temporaries, but it is more efficient for caller to manage that.
1104 * If tol>=1.0, rvec and ivec are ignored and may be NULL.
1105 * NOTE: we are going on value vec[i], not fabs(vec[i]).
1106 * Returns k.
1107 * Remember: GIGO.
1108 * Failure modes- if all numbers are <= 0.0, k and *maxi will be returned 0.
1109 * Failure modes- if all numbers are < eps, k and *maxi will be returned 0.
1110 *
1111 * find_pivot_index, should we implement, would search an int vector.
1112 *
1113 * Hint: if you know you have not increased any values in vec and have not
1114 * changed the value at vec[maxi] before a subsequent search, then call
1115 * with len = previous maxi+1 to reduce the search time.
1116 */
1117 static int32 find_pivot_number(const real64 *vec,
1118 const int32 len,
1119 const real64 tol,
1120 const real64 eps,
1121 int32 * const ivec,
1122 real64 * const rvec,
1123 int32 * const maxi)
1124 {
1125 int32 j,k;
1126 if (tol <= D_ZERO ) {
1127 *maxi = 0;
1128 return 0;
1129 }
1130 if (tol >= D_ONE) {
1131 register real64 biggest = MAX(eps,D_ZERO);
1132 /* get the biggest, period */
1133 k = 0;
1134 for (j=0; j < len; j++) {
1135 if (vec[j] > biggest) {
1136 biggest = vec[j];
1137 k = j;
1138 }
1139 }
1140 *maxi = k;
1141 } else {
1142 int32 bigi;
1143 bigi = 0;
1144 rvec[0] = eps;
1145 ivec[0] = 0;
1146 if (*maxi==len) {
1147 /* cheap search against eps and tol, no list. */
1148 register real64 thold;
1149 thold = tol * vec[len-1];
1150 if (thold >= eps) {
1151 for (k = 0; k < len && vec[k] < thold; k++);
1152 } else {
1153 for (k = 0; k < len && vec[k] < eps; k++);
1154 }
1155 /* adjust maxi to still point at last location, as indicated by len */
1156 (*maxi)--;
1157 } else {
1158 real64 rlast;
1159 rlast = eps;
1160 /* create short list */
1161 for (j=0; j < len; j++) {
1162 while (j < len && vec[j] <= rlast) j++; /* skip to next max */
1163 if (j < len) {
1164 ivec[bigi] = j;
1165 rvec[bigi] = vec[j];
1166 bigi++; /* bigi ends up being len of rvec or, if vec all<= eps, 0 */
1167 rlast = rvec[bigi-1];
1168 }
1169 }
1170 if (bigi == 0) {
1171 *maxi = k = 0;
1172 } else {
1173 register real64 thold;
1174 /* Note: if bigi is enormous, we should do a binary search,
1175 not linear. */
1176 *maxi = ivec[bigi-1];
1177 thold = tol * rvec[bigi-1];
1178 /* get pivot from shortlist, searching backward */
1179 if (thold >= eps) {
1180 for (k = bigi-1; k >= 0 && rvec[k] >= thold; k--);
1181 } else {
1182 for (k = bigi-1; k >= 0 && rvec[k] >= eps; k--);
1183 }
1184 /* translate pivot to vec index */
1185 k = ivec[k+1];
1186 }
1187 }
1188 }
1189 return k;
1190 }
1191
1192 static void calc_dependent_rows_ranki1(linsolqr_system_t sys)
1193 {
1194 mtx_coord_t nz;
1195 real64 value;
1196 mtx_range_t colrange;
1197 mtx_range_t rowrange;
1198 real64 *lc;
1199 mtx_matrix_t mtx;
1200
1201 sys->rowdeps = TRUE;
1202 if( ( (sys->reg.row.low == sys->rng.low) &&
1203 ( sys->reg.row.high == sys->rng.low+sys->rank-1 )
1204 ) || sys->rank==0 )
1205 return;
1206
1207 lc = sys->ludata->tmp;
1208 colrange.low = sys->rng.low;
1209 colrange.high = colrange.low + sys->rank - 1;
1210 rowrange.low = sys->rng.high;
1211 rowrange.high = sys->rng.low+sys->rank;
1212 mtx=sys->factors;
1213
1214 nz.row = sys->reg.row.low;
1215 for( ; nz.row <= sys->reg.row.high; nz.row++ ) {
1216 if( nz.row == sys->rng.low ) {
1217 nz.row = rowrange.high-1;
1218 continue;
1219 }
1220 mtx_zero_real64(lc,(sys->capacity));
1221 mtx_org_row_vec(mtx,nz.row,lc,&colrange);
1222 if( nz.row < rowrange.high || nz.row > rowrange.low )
1223 backward_substitute(sys,lc,TRUE);
1224 forward_substitute(sys,lc,TRUE);
1225 mtx_clear_row(mtx,nz.row,&colrange);
1226 for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) {
1227 value = lc[mtx_col_to_org(mtx,nz.col)];
1228 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
1229 }
1230 }
1231 }
1232
1233
1234 static void calc_dependent_cols_ranki1(linsolqr_system_t sys)
1235 {
1236 mtx_coord_t nz;
1237 real64 value;
1238 mtx_range_t rowrange;
1239 mtx_range_t colrange;
1240 real64 *lc;
1241 mtx_matrix_t mtx;
1242
1243 sys->coldeps = TRUE;
1244 if( ( (sys->reg.col.low == sys->rng.low) &&
1245 ( sys->reg.col.high == sys->rng.low+sys->rank-1 )
1246 ) || sys->rank==0 )
1247 return;
1248
1249 lc = sys->ludata->tmp;
1250 rowrange.low = sys->rng.low;
1251 rowrange.high = rowrange.low + sys->rank - 1;
1252 colrange.high = sys->rng.low+sys->rank;
1253 colrange.low = sys->rng.high;
1254 mtx=sys->factors;
1255
1256 nz.col = sys->reg.col.low;
1257 for( ; nz.col <= sys->reg.col.high; nz.col++ ) {
1258 if( nz.col == sys->rng.low ) {
1259 nz.col = colrange.high-1;
1260 continue;
1261 }
1262 mtx_zero_real64(lc,sys->capacity);
1263 mtx_org_col_vec(mtx,nz.col,lc,&rowrange);
1264 if( nz.col < colrange.high || nz.col > colrange.low )
1265 backward_substitute(sys,lc,FALSE);
1266 forward_substitute(sys,lc,FALSE);
1267 mtx_clear_col(mtx,nz.col,&rowrange);
1268 for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) {
1269 value = lc[mtx_row_to_org(mtx,nz.row)];
1270 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
1271 }
1272 }
1273 }
1274
1275 /**
1276 *** Given a matrix and a diagonal range, sets the diagonal elements to 0
1277 *** but does not delete them in the range low to high inclusive.
1278 *** worst cost= k*(nonzeros in rows of range treated).
1279 *** likely cost= k*(nonzeros in column pivoted rows of range treated)
1280 *** since you can smartly put in the diagonal last and find
1281 *** it first on many occasions.
1282 *** This is a good bit cheaper than deleting the elements unless one
1283 *** expects thousands of solves.
1284 **/
1285 static void zero_diagonal_elements(mtx_matrix_t mtx,
1286 int32 low, int32 high)
1287 {
1288 mtx_coord_t nz;
1289 for (nz.row = low; nz.row <= high; nz.row++) {
1290 nz.col = nz.row;
1291 mtx_set_value(mtx,&nz,0.0);
1292 }
1293 }
1294
1295 static void zero_unpivoted_vars(linsolqr_system_t sys,
1296 real64 *varvalues,
1297 boolean transpose)
1298 /**
1299 *** Sets the values of unpivoted variables to zero.
1300 **/
1301 {
1302 int32 ndx,order;
1303 mtx_matrix_t mtx;
1304
1305 mtx = sys->factors;
1306 order = mtx_order(mtx);
1307 for( ndx = 0 ; ndx < sys->rng.low ; ++ndx )
1308 if (transpose)
1309 varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO;
1310 else
1311 varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO;
1312
1313 for( ndx = sys->rng.low + sys->rank ; ndx < order ; ++ndx )
1314 if (transpose)
1315 varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO;
1316 else
1317 varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO;
1318 }
1319
1320 #if LINSOL_DEBUG
1321 static void debug_out_factors(FILE *fp,linsolqr_system_t sys)
1322 /**
1323 *** Outputs permutation and values of the nonzero elements in the
1324 *** factor matrix square region given by sys->rng.
1325 **/
1326 {
1327 mtx_region_t reg;
1328 reg.row.low=reg.col.low=sys->rng.low;
1329 reg.row.high=reg.col.high=sys->rng.high;
1330 mtx_write_region(fp,sys->factors,&reg);
1331 }
1332 #endif /* LINSOL_DEBUG */
1333
1334 /***************************************************************************\
1335 Reordering functions for SPK1, and possibly for other schemes to be
1336 implemented later.
1337 The stuff here is almost, but not quite, black magic. Don't tinker with it.
1338 \***************************************************************************/
1339
1340 /*********************************
1341 begin of spk1 stuff
1342 *********************************/
1343 struct reorder_list { /* List of rows/columns and their counts. */
1344 int32 ndx;
1345 int32 count;
1346 struct reorder_list *next;
1347 };
1348
1349 struct reorder_vars {
1350 mtx_matrix_t mtx;
1351 mtx_region_t reg; /* Current active region */
1352 int32 colhigh; /* Original value of reg.col.high */
1353 struct reorder_list *tlist; /* Array of (enough) list elements */
1354 int32 *rowcount; /* rowcount[reg.row.low .. reg.row.high] */
1355 };
1356
1357 static void adjust_row_count(struct reorder_vars *vars,int32 removed_col)
1358 /**
1359 *** Adjusts the row counts to account for the (to be) removed column.
1360 **/
1361 {
1362 mtx_coord_t nz;
1363 real64 value;
1364 nz.col = removed_col;
1365 nz.row = mtx_FIRST;
1366 while( value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row)),
1367 nz.row != mtx_LAST )
1368 --(vars->rowcount[nz.row]);
1369 }
1370
1371 static void assign_row_and_col(struct reorder_vars *vars,
1372 int32 row,
1373 int32 col)
1374 /**
1375 *** Assigns the given row to the given column, moving the row and column
1376 *** to the beginning of the active region and removing them (readjusting
1377 *** the region). The row counts are NOT adjusted. If col == mtx_NONE,
1378 *** then no column is assigned and the row is moved to the end of the
1379 *** active block instead. Otherwise, it is assumed that the column
1380 *** is active.
1381 **/
1382 {
1383 if( col == mtx_NONE ) {
1384 mtx_swap_rows(vars->mtx,row,vars->reg.row.high);
1385 vars->rowcount[row] = vars->rowcount[vars->reg.row.high];
1386 --(vars->reg.row.high);
1387 } else {
1388 mtx_swap_rows(vars->mtx,row,vars->reg.row.low);
1389 vars->rowcount[row] = vars->rowcount[vars->reg.row.low];
1390 mtx_swap_cols(vars->mtx,col,vars->reg.col.low);
1391 ++(vars->reg.row.low);
1392 ++(vars->reg.col.low);
1393 }
1394 }
1395
1396 static void push_column_on_stack(struct reorder_vars *vars,int32 col)
1397 /**
1398 *** Pushes the given column onto the stack. It is assumed that the
1399 *** column is active. Row counts are adjusted.
1400 **/
1401 {
1402 adjust_row_count(vars,col);
1403 mtx_swap_cols(vars->mtx,col,vars->reg.col.high);
1404 --(vars->reg.col.high);
1405 }
1406
1407 static int32 pop_column_from_stack(struct reorder_vars *vars)
1408 /**
1409 *** Pops the column on the "top" of the stack off of the stack and
1410 *** returns the column index, where it now lies in the active region.
1411 *** If the stack is empty, mtx_NONE is returned. Row counts are NOT
1412 *** adjusted (this together with a subsequent assignment of this column
1413 *** ==> no row count adjustment necessary).
1414 **/
1415 {
1416 if( vars->reg.col.high < vars->colhigh )
1417 return(++(vars->reg.col.high));
1418 else
1419 return( mtx_NONE );
1420 }
1421
1422 static void assign_null_rows(struct reorder_vars *vars)
1423 /**
1424 *** Assigns empty rows, moving them to the assigned region. It is
1425 *** assumed that row counts are correct. Columns are assigned off the
1426 *** stack.
1427 **/
1428 {
1429 int32 row;
1430
1431 for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1432 if( vars->rowcount[row] == 0 )
1433 assign_row_and_col(vars , row , pop_column_from_stack(vars));
1434 }
1435
1436 static void forward_triangularize(struct reorder_vars *vars)
1437 /**
1438 *** Forward triangularizes the region, assigning singleton rows with their
1439 *** one and only incident column until there are no more. The row counts
1440 *** must be correct, and they are updated.
1441 **/
1442 {
1443 boolean change;
1444 mtx_coord_t nz;
1445 real64 value;
1446
1447 do {
1448 change = FALSE;
1449 for( nz.row = vars->reg.row.low ;
1450 nz.row <= vars->reg.row.high && vars->rowcount[nz.row] != 1;
1451 ++nz.row ) ;
1452 if( nz.row <= vars->reg.row.high ) {
1453 /* found singleton row */
1454 nz.col = mtx_FIRST; /* this is somehow coming back with nz.col -1 */
1455 value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col));
1456 adjust_row_count(vars,nz.col);
1457 assign_row_and_col(vars,nz.row,nz.col);
1458 change = TRUE;
1459 }
1460 } while( change );
1461 }
1462
1463 static int32 select_row(struct reorder_vars *vars)
1464 /**
1465 *** Selects a row and returns its index. It is assumed that there is a
1466 *** row. Row counts must be correct. vars->tlist will be used.
1467 **/
1468 {
1469 int32 min_row_count;
1470 int32 max_col_count;
1471 int32 row;
1472 int32 i, nties=-2; /* # elements currently defined in vars->tlist */
1473 int32 sum;
1474 mtx_coord_t nz;
1475 real64 value;
1476 mtx_matrix_t mtx;
1477 mtx_range_t *colrng, *rowrng;
1478
1479 /* Set to something > any possible value */
1480 min_row_count = vars->reg.col.high-vars->reg.col.low+2;
1481 for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1482 if( vars->rowcount[row] <= min_row_count ) {
1483 if( vars->rowcount[row] < min_row_count ) {
1484 min_row_count = vars->rowcount[row];
1485 nties = 0;
1486 }
1487 vars->tlist[nties++].ndx = row;
1488 }
1489 /**
1490 *** vars->tlist[0..nties-1] is a list of row numbers which tie for
1491 *** minimum row count.
1492 **/
1493
1494 max_col_count = -1; /* < any possible value */
1495 i = 0;
1496 mtx = vars->mtx;
1497 colrng=&(vars->reg.col);
1498 rowrng=&(vars->reg.row);
1499 while( i < nties ) {
1500
1501 sum = 0;
1502 nz.row = vars->tlist[i].ndx;
1503 nz.col = mtx_FIRST;
1504 while( value = mtx_next_in_row(mtx,&nz,colrng),
1505 nz.col != mtx_LAST )
1506 sum += mtx_nonzeros_in_col(mtx,nz.col,rowrng);
1507 if( sum > max_col_count ) {
1508 max_col_count = sum;
1509 row = nz.row;
1510 }
1511 i++;
1512 }
1513 /**
1514 *** Now row contains the row with the minimum row count, which has the
1515 *** greatest total column count of incident columns among all rows with
1516 *** the same (minimum) row count. Select it.
1517 **/
1518 return(row);
1519 }
1520
1521 static void spk1_reorder(struct reorder_vars *vars)
1522 /**
1523 *** Reorders the assigned matrix vars->mtx within the specified bounding
1524 *** block region vars->reg. The region is split into 6 subregions during
1525 *** reordering: the rows are divided in two, and the columns divided in
1526 *** three. Initially everything is in the active subregion. Ultimately,
1527 *** everything will be assigned.
1528 ***
1529 *** <-- assigned -->|<-- active-->|<-- on stack -->|
1530 *** ----+----------------+-------------+----------------+
1531 *** a | | | |
1532 *** s | | | |
1533 *** s | | | |
1534 *** i | (SQUARE) | | |
1535 *** g | | | |
1536 *** n | | | |
1537 *** e | | | |
1538 *** d | | | |
1539 *** ----+----------------+-------------+----------------+
1540 *** a | | | |
1541 *** c | | ACTIVE | |
1542 *** t | | REGION | |
1543 *** i | | | |
1544 *** v | | | |
1545 *** e | | | |
1546 *** ----+----------------+-------------+----------------+
1547 ***
1548 *** The algorithm is roughly as follows:
1549 *** (1) select a row (by some criterion).
1550 *** (2) push columns incident on that row onto the stack in decreasing
1551 *** order of their length.
1552 *** (3) pop first column off the stack and assign it to the selected
1553 *** row.
1554 *** (4) forward-triangularize (assign singleton rows with their one
1555 *** and only incident column, until no longer possible).
1556 ***
1557 *** (1)-(4) should be repeated until the active subregion becomes empty.
1558 ***
1559 *** Everything above was written as though the entire matrix is
1560 *** involved. In reality, only the relevant square region is involved.
1561 **/
1562 {
1563 int32 row, size;
1564 int32 *rowcount_array_origin;
1565 mtx_matrix_t mtx;
1566
1567 size = MAX(vars->reg.row.high,vars->reg.col.high) + 1;
1568 vars->tlist = size > 0 ? (struct reorder_list *)
1569 ascmalloc( size*sizeof(struct reorder_list) ) : NULL;
1570 vars->rowcount = rowcount_array_origin = size > 0 ? (int32 *)
1571 ascmalloc( size*sizeof(int32) ) : NULL;
1572 mtx = vars->mtx;
1573
1574 vars->colhigh = vars->reg.col.high;
1575 /* Establish row counts */
1576 for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row )
1577 vars->rowcount[row] =
1578 mtx_nonzeros_in_row(mtx,row,&(vars->reg.col));
1579
1580 while(TRUE) {
1581 struct reorder_list *head;
1582 int32 nelts; /* # elements "allocated" from vars->tlist */
1583 mtx_coord_t nz;
1584 real64 value;
1585
1586 forward_triangularize(vars);
1587 assign_null_rows(vars);
1588 if( vars->reg.row.low>vars->reg.row.high ||
1589 vars->reg.col.low>vars->reg.col.high ) {
1590 /* Active region is now empty, done */
1591 if( NOTNULL(vars->tlist) )
1592 ascfree( vars->tlist );
1593 if( NOTNULL(rowcount_array_origin) )
1594 ascfree( rowcount_array_origin );
1595 return;
1596 }
1597
1598 head = NULL;
1599 nelts = 0;
1600 nz.row = select_row(vars);
1601 nz.col = mtx_FIRST;
1602 while( value = mtx_next_in_row(mtx,&nz,&(vars->reg.col)),
1603 nz.col != mtx_LAST ) {
1604 struct reorder_list **q,*p;
1605
1606 p = &(vars->tlist[nelts++]);
1607 p->ndx = mtx_col_to_org(mtx,nz.col);
1608 p->count = mtx_nonzeros_in_col(mtx,nz.col,&(vars->reg.row));
1609 for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) );
1610 p->next = *q;
1611 *q = p;
1612 }
1613 /**
1614 *** We now have a list of columns which intersect the selected row.
1615 *** The list is in order of decreasing column count.
1616 **/
1617
1618 /* Push incident columns on stack */
1619 for( ; NOTNULL(head) ; head = head->next )
1620 push_column_on_stack(vars,mtx_org_to_col(mtx,head->ndx));
1621
1622 /* Pop column off stack and assign to selected row */
1623 assign_row_and_col(vars , nz.row , pop_column_from_stack(vars));
1624 }
1625 /* Not reached. */
1626 }
1627
1628 /*********************************
1629 end of spk1 stuff
1630 *********************************/
1631 /*********************************
1632 begin of tspk1 stuff
1633 *********************************/
1634 struct creorder_list { /* List of columns/rows and their counts. */
1635 int32 ndx;
1636 int32 count;
1637 struct creorder_list *next;
1638 };
1639
1640 struct creorder_vars {
1641 mtx_matrix_t mtx;
1642 mtx_region_t reg; /* Current active region */
1643 int32 rowhigh; /* Original value of reg.row.high */
1644 struct creorder_list *tlist; /* Array of (enough) list elements */
1645 int32 *colcount; /* colcount[reg.col.low .. reg.col.high] */
1646 };
1647
1648 static void adjust_col_count(struct creorder_vars *vars,int32 removed_row)
1649 /**
1650 *** Adjusts the column counts to account for the (to be) removed row.
1651 **/
1652 {
1653 mtx_coord_t nz;
1654 real64 value;
1655 nz.row = removed_row;
1656 nz.col = mtx_FIRST;
1657 while( value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col)),
1658 nz.col != mtx_LAST )
1659 --(vars->colcount[nz.col]);
1660 }
1661
1662 static void assign_col_and_row(struct creorder_vars *vars,
1663 int32 col,
1664 int32 row)
1665 /**
1666 *** Assigns the given row to the given column, moving the row and column
1667 *** to the beginning of the active region and removing them (readjusting
1668 *** the region). The col counts are NOT adjusted. If col == mtx_NONE,
1669 *** then no column is assigned and the col is moved to the end of the
1670 *** active block instead. Otherwise, it is assumed that the row
1671 *** is active.
1672 **/
1673 {
1674 if( row == mtx_NONE ) {
1675 mtx_swap_cols(vars->mtx,col,vars->reg.col.high);
1676 vars->colcount[col] = vars->colcount[vars->reg.col.high];
1677 --(vars->reg.col.high);
1678 } else {
1679 mtx_swap_cols(vars->mtx,col,vars->reg.col.low);
1680 vars->colcount[col] = vars->colcount[vars->reg.col.low];
1681 mtx_swap_rows(vars->mtx,row,vars->reg.row.low);
1682 ++(vars->reg.col.low);
1683 ++(vars->reg.row.low);
1684 }
1685 }
1686
1687 static void push_row_on_stack(struct creorder_vars *vars,int32 row)
1688 /**
1689 *** Pushes the given row onto the stack. It is assumed that the
1690 *** row is active. Col counts are adjusted.
1691 **/
1692 {
1693 adjust_col_count(vars,row);
1694 mtx_swap_rows(vars->mtx,row,vars->reg.row.high);
1695 --(vars->reg.row.high);
1696 }
1697
1698 static int32 pop_row_from_stack(struct creorder_vars *vars)
1699 /**
1700 *** Pops the row on the "top" of the stack off of the stack and
1701 *** returns the row index, where it now lies in the active region.
1702 *** If the stack is empty, mtx_NONE is returned. Col counts are NOT
1703 *** adjusted (this together with a subsequent assignment of this row
1704 *** ==> no col count adjustment necessary).
1705 **/
1706 {
1707 if( vars->reg.row.high < vars->rowhigh )
1708 return(++(vars->reg.row.high));
1709 else
1710 return( mtx_NONE );
1711 }
1712
1713 static void assign_null_cols(struct creorder_vars *vars)
1714 /**
1715 *** Assigns empty cols, moving them to the assigned region. It is
1716 *** assumed that col counts are correct. Rows are assigned off the
1717 *** stack.
1718 **/
1719 {
1720 int32 col;
1721
1722 for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1723 if( vars->colcount[col] == 0 )
1724 assign_col_and_row(vars , col , pop_row_from_stack(vars));
1725 }
1726
1727 static void cforward_triangularize(struct creorder_vars *vars)
1728 /**
1729 *** Forward triangularizes the region, assigning singleton columns with their
1730 *** one and only incident row until there are no more. The column counts
1731 *** must be correct, and they are updated.
1732 **/
1733 {
1734 boolean change;
1735
1736 do {
1737 mtx_coord_t nz;
1738 real64 value;
1739 change = FALSE;
1740 for( nz.col = vars->reg.col.low ;
1741 nz.col <= vars->reg.col.high && vars->colcount[nz.col] != 1;
1742 ++nz.col ) ;
1743 if( nz.col <= vars->reg.col.high ) {
1744 /* found singleton col */
1745 nz.row = mtx_FIRST;
1746 value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row));
1747 adjust_col_count(vars,nz.row);
1748 assign_col_and_row(vars,nz.col,nz.row);
1749 change = TRUE;
1750 }
1751 } while( change );
1752 }
1753
1754 static int32 select_col(struct creorder_vars *vars)
1755 /**
1756 *** Selects a col and returns its index. It is assumed that there is a
1757 *** col. Col counts must be correct. vars->tlist will be used.
1758 **/
1759 {
1760 int32 min_col_count;
1761 int32 max_row_count;
1762 int32 col;
1763 int32 i, nties=-2; /* # elements currently defined in vars->tlist */
1764 int32 sum;
1765 mtx_coord_t nz;
1766 real64 value;
1767 mtx_matrix_t mtx;
1768 mtx_range_t *colrng, *rowrng;
1769
1770 /* Set to something > any possible value */
1771 min_col_count = vars->reg.row.high-vars->reg.row.low+2;
1772 for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1773 if( vars->colcount[col] <= min_col_count ) {
1774 if( vars->colcount[col] < min_col_count ) {
1775 min_col_count = vars->colcount[col];
1776 nties = 0;
1777 }
1778 vars->tlist[nties++].ndx = col;
1779 }
1780 /**
1781 *** vars->tlist[0..nties-1] is a list of row numbers which tie for
1782 *** minimum col count.
1783 **/
1784
1785 max_row_count = -1; /* < any possible value */
1786 i = 0;
1787 mtx = vars->mtx;
1788 rowrng=&(vars->reg.row);
1789 colrng=&(vars->reg.col);
1790 while( i < nties ) {
1791
1792 sum = 0;
1793 nz.row = mtx_FIRST;
1794 nz.col = vars->tlist[i].ndx;
1795 while( value = mtx_next_in_col(mtx,&nz,rowrng),
1796 nz.row != mtx_LAST )
1797 sum += mtx_nonzeros_in_row(mtx,nz.row,colrng);
1798 if( sum > max_row_count ) {
1799 max_row_count = sum;
1800 col = nz.col;
1801 }
1802 i++;
1803 }
1804 /**
1805 *** Now col contains the col with the minimum col count, which has the
1806 *** greatest total row count of incident rows among all cols with
1807 *** the same (minimum) col count. Select it.
1808 **/
1809 return(col);
1810 }
1811
1812 static void tspk1_reorder(struct creorder_vars *vars)
1813 /**
1814 *** Transpose the picture and explanation that follows:
1815 *** Reorders the assigned matrix vars->mtx within the specified bounding
1816 *** block region vars->reg. The region is split into 6 subregions during
1817 *** reordering: the rows are divided in two, and the columns divided in
1818 *** three. Initially everything is in the active subregion. Ultimately,
1819 *** everything will be assigned.
1820 ***
1821 *** <-- assigned -->|<-- active-->|<-- on stack -->|
1822 *** ----+----------------+-------------+----------------+
1823 *** a | | | |
1824 *** s | | | |
1825 *** s | | | |
1826 *** i | (SQUARE) | | |
1827 *** g | | | |
1828 *** n | | | |
1829 *** e | | | |
1830 *** d | | | |
1831 *** ----+----------------+-------------+----------------+
1832 *** a | | | |
1833 *** c | | ACTIVE | |
1834 *** t | | REGION | |
1835 *** i | | | |
1836 *** v | | | |
1837 *** e | | | |
1838 *** ----+----------------+-------------+----------------+
1839 ***
1840 *** The algorithm is roughly as follows:
1841 *** (1) select a row (by some criterion).
1842 *** (2) push columns incident on that row onto the stack in decreasing
1843 *** order of their length.
1844 *** (3) pop first column off the stack and assign it to the selected
1845 *** row.
1846 *** (4) forward-triangularize (assign singleton rows with their one
1847 *** and only incident column, until no longer possible).
1848 ***
1849 *** (1)-(4) should be repeated until the active subregion becomes empty.
1850 ***
1851 *** Everything above was written as though the entire matrix is
1852 *** involved. In reality, only the relevant square region is involved.
1853 **/
1854 {
1855 int32 col, size;
1856 int32 *colcount_array_origin;
1857 mtx_matrix_t mtx;
1858
1859 size = vars->reg.col.high - vars->reg.col.low + 1;
1860 size = MAX(size,vars->reg.row.high - vars->reg.row.low + 1);
1861 vars->tlist = size > 0 ? (struct creorder_list *)
1862 ascmalloc( size*sizeof(struct creorder_list) ) : NULL;
1863 colcount_array_origin = size > 0 ? (int32 *)
1864 ascmalloc( size*sizeof(int32) ) : NULL;
1865 vars->colcount =
1866 NOTNULL(colcount_array_origin) ?
1867 colcount_array_origin - vars->reg.col.low : NULL;
1868 mtx = vars->mtx;
1869
1870 vars->rowhigh = vars->reg.row.high;
1871 /* Establish col counts */
1872 for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col )
1873 vars->colcount[col] =
1874 mtx_nonzeros_in_col(mtx,col,&(vars->reg.row));
1875
1876 while(TRUE) {
1877 struct creorder_list *head;
1878 int32 nelts; /* # elements "allocated" from vars->tlist */
1879 mtx_coord_t nz;
1880 real64 value;
1881
1882 cforward_triangularize(vars);
1883 assign_null_cols(vars);
1884 if( vars->reg.col.low > vars->reg.col.high ||
1885 vars->reg.row.low > vars->reg.row.high ) {
1886 /* Active region is now empty, done */
1887 if( NOTNULL(vars->tlist) )
1888 ascfree( vars->tlist );
1889 if( NOTNULL(colcount_array_origin) )
1890 ascfree( colcount_array_origin );
1891 return;
1892 }
1893
1894 head = NULL;
1895 nelts = 0;
1896 nz.row = mtx_FIRST;
1897 nz.col = select_col(vars);
1898 while( value = mtx_next_in_col(mtx,&nz,&(vars->reg.row)),
1899 nz.row != mtx_LAST ) {
1900 struct creorder_list **q,*p;
1901
1902 p = &(vars->tlist[nelts++]);
1903 p->ndx = mtx_row_to_org(mtx,nz.row);
1904 p->count = mtx_nonzeros_in_row(mtx,nz.row,&(vars->reg.col));
1905 for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) );
1906 p->next = *q;
1907 *q = p;
1908 }
1909 /**
1910 *** We now have a list of columns which intersect the selected row.
1911 *** The list is in order of decreasing column count.
1912 **/
1913
1914 /* Push incident rows on stack */
1915 for( ; NOTNULL(head) ; head = head->next )
1916 push_row_on_stack(vars,mtx_org_to_row(mtx,head->ndx));
1917
1918 /* Pop row off stack and assign to selected col */
1919 assign_col_and_row(vars , nz.col , pop_row_from_stack(vars));
1920 }
1921 /* Not reached. */
1922 }
1923
1924 /*********************************
1925 end of tspk1 stuff
1926 *********************************/
1927
1928 static boolean nonempty_row(mtx_matrix_t mtx,int32 row)
1929 /**
1930 *** ? row not empty in mtx.
1931 **/
1932 {
1933 mtx_coord_t nz;
1934 real64 value;
1935 value = mtx_next_in_row(mtx, mtx_coord(&nz,row,mtx_FIRST), mtx_ALL_COLS);
1936 return( nz.col != mtx_LAST );
1937 }
1938
1939 static boolean nonempty_col(mtx_matrix_t mtx,int32 col)
1940 /**
1941 *** ? column not empty in mtx.
1942 **/
1943 {
1944 mtx_coord_t nz;
1945 real64 value;
1946 value = mtx_next_in_col(mtx, mtx_coord(&nz,mtx_FIRST,col), mtx_ALL_ROWS);
1947 return( nz.row != mtx_LAST );
1948 }
1949
1950 static void determine_pivot_range(linsolqr_system_t sys)
1951 /**
1952 *** Calculates sys->rng from sys->coef.
1953 **/
1954 {
1955 sys->reg.row.low = sys->reg.row.high = -1;
1956 sys->reg.col.low = sys->reg.col.high = -1;
1957
1958 for( sys->rng.high=mtx_order(sys->coef) ; --(sys->rng.high) >= 0 ; ) {
1959 if( nonempty_row(sys->coef,sys->rng.high) &&
1960 sys->reg.row.high < 0 )
1961 sys->reg.row.high = sys->rng.high;
1962 if( nonempty_col(sys->coef,sys->rng.high) &&
1963 sys->reg.col.high < 0 )
1964 sys->reg.col.high = sys->rng.high;
1965 if( nonempty_row(sys->coef,sys->rng.high) &&
1966 nonempty_col(sys->coef,sys->rng.high) )
1967 break;
1968 }
1969
1970 for( sys->rng.low=0 ; sys->rng.low <= sys->rng.high ; ++(sys->rng.low) ) {
1971 if( nonempty_row(sys->coef,sys->rng.low) &&
1972 sys->reg.row.low < 0 )
1973 sys->reg.row.low = sys->rng.low;
1974 if( nonempty_col(sys->coef,sys->rng.low) &&
1975 sys->reg.col.low < 0 )
1976 sys->reg.col.low = sys->rng.low;
1977 if( nonempty_row(sys->coef,sys->rng.low) &&
1978 nonempty_col(sys->coef,sys->rng.low) )
1979 break;
1980 }
1981 }
1982
1983 static void square_region(linsolqr_system_t sys,mtx_region_t *region)
1984 /**
1985 *** Get the largest square confined to the diagonal within the region given
1986 *** and set sys->rng accordingly.
1987 **/
1988 {
1989 sys->reg = *region;
1990 sys->rng.low = MAX(region->row.low,region->col.low);
1991 sys->rng.high = MIN(region->row.high,region->col.high);
1992 }
1993
1994 static int ranki_reorder(linsolqr_system_t sys,mtx_region_t *region)
1995 /**
1996 *** The region to reorder is first isolated by truncating the region
1997 *** provided to the largest square region confined to the matrix diagonal.
1998 *** It is presumed it will contain no empty rows or columns and will
1999 *** provide the basis of candidate pivots when factoring.
2000 **/
2001 {
2002 struct reorder_vars vars;
2003 CHECK_SYSTEM(sys);
2004 if (sys->fclass != ranki) {
2005 FPRINTF(stderr,"linsolqr_reorder: spk1 reorder called on system\n");
2006 FPRINTF(stderr," with inappropriate factor class\n");
2007 return 1;
2008 }
2009 if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2010 else square_region(sys,region);
2011
2012 vars.mtx = sys->coef;
2013 vars.reg.row.low = vars.reg.col.low = sys->rng.low;
2014 vars.reg.row.high = vars.reg.col.high = sys->rng.high;
2015 spk1_reorder(&vars);
2016 return 0;
2017 }
2018
2019 static int tranki_reorder(linsolqr_system_t sys,mtx_region_t *region)
2020 /**
2021 *** The region to reorder is first isolated by truncating the region
2022 *** provided to the largest square region confined to the matrix diagonal.
2023 *** It is presumed it will contain no empty rows or columns and will
2024 *** provide the basis of candidate pivots when factoring.
2025 **/
2026 {
2027 struct creorder_vars vars;
2028 CHECK_SYSTEM(sys);
2029 if ( !(sys->fclass==ranki || sys->fclass==s_qr) ) {
2030 FPRINTF(stderr,"linsolqr_reorder: tspk1 reorder called on system\n");
2031 FPRINTF(stderr," with inappropriate factor method\n");
2032 return 1;
2033 }
2034 if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2035 else square_region(sys,region);
2036
2037 vars.mtx = sys->coef;
2038 vars.reg.row.low = vars.reg.col.low = sys->rng.low;
2039 vars.reg.row.high = vars.reg.col.high = sys->rng.high;
2040 tspk1_reorder(&vars);
2041 return 0;
2042 }
2043
2044 /***************************************************************************\
2045 End of reordering functions for SPK1.
2046 \***************************************************************************/
2047
2048 /***************************************************************************\
2049 RANKI implementation functions.
2050 \***************************************************************************/
2051
2052 static void eliminate_row(mtx_matrix_t mtx,
2053 mtx_range_t *rng,
2054 int32 row, /* row to eliminate */
2055 real64 *tmp, /* temporary array */
2056 real64 *pivots) /* prior pivots array */
2057 /**
2058 *** Eliminates the given row to the left of the diagonal element, assuming
2059 *** valid pivots for all of the diagonal elements above it (the elements
2060 *** above those diagonal elements, if any exist, are assumed to be U
2061 *** elements and therefore ignored). The temporary array is used by this
2062 *** function to do its job. tmp[k], where rng->low <= k <= rng->high must
2063 *** be defined (allocated) but need not be initialized.
2064 *** pivots[k] where rng->low <= k <=rng->high must be allocated and
2065 *** pivots[rng->low] to pivots[row-1] must contain the previous pivots.
2066 **/
2067 {
2068 mtx_coord_t nz;
2069 mtx_range_t left_to_eliminate,high_cols,tmp_cols;
2070 boolean do_series=FALSE;
2071
2072 high_cols.low = row;
2073 high_cols.high = rng->high;
2074 left_to_eliminate.low = rng->low;
2075 left_to_eliminate.high = row - 1;
2076
2077 /* Move non-zeros left of pivot from matrix to full array */
2078 mtx_zero_real64(tmp+rng->low,(row-rng->low));
2079 nz.row = row;
2080 mtx_cur_row_vec(mtx,row,tmp,&left_to_eliminate);
2081 mtx_clear_row(mtx,row,&left_to_eliminate);
2082
2083 /* eliminates nzs from pivot, one by one, filling tmp with multipliers */
2084 do_series= ( (row-1) >= (rng->low) );
2085 if (do_series) {
2086 mtx_add_row_series_init(mtx,row,FALSE);
2087 /* do nothing between here and the END call to _init which removes
2088 elements from row */
2089 }
2090 for( nz.row = row-1 ; nz.row >= rng->low ; --(nz.row) ) {
2091 if( tmp[nz.row] == D_ZERO )
2092 continue; /* Nothing to do for this row */
2093
2094 nz.col = nz.row;
2095 tmp[nz.row] /= pivots[nz.row];
2096 /* tmp[nz.row] now equals multiplier */
2097
2098 /* Perform "mtx_add_row" for full array part of the row */
2099 left_to_eliminate.high = nz.row - 1;
2100 mtx_cur_vec_add_row(mtx,tmp,nz.row,-tmp[nz.row],
2101 &left_to_eliminate,FALSE);
2102
2103 /* Perform "mtx_add_row" on remaining part of row */
2104 mtx_add_row_series(nz.row,-tmp[nz.row],&high_cols);
2105 }
2106 if (do_series) {
2107 mtx_add_row_series_init(mtx,mtx_NONE,TRUE);
2108 }
2109
2110 mtx_fill_cur_row_vec(mtx,row,tmp,mtx_range(&tmp_cols,rng->low,row-1));
2111
2112 }
2113
2114
2115 /* not in use it seems */
2116 #if BUILD_DEAD_CODE /* yes, baa */
2117 static int32 top_of_spike(linsolqr_system_t sys,int32 col)
2118 /**
2119 *** Determines the top row (row of lowest index) in a possible spike
2120 *** above the diagonal element in the given column. If there is no spike,
2121 *** then (row = ) col is returned.
2122 **/
2123 {
2124 mtx_range_t above_diagonal;
2125 mtx_coord_t nz;
2126 real64 value;
2127 int32 top_row;
2128
2129 above_diagonal.low = sys->rng.low;
2130 above_diagonal.high = col-1;
2131 top_row = nz.col = col;
2132 nz.row = mtx_FIRST;
2133 while( value = mtx_next_in_col(sys->factors,&nz,&above_diagonal),
2134 nz.row != mtx_LAST )
2135 if( nz.row < top_row )
2136 top_row = nz.row;
2137 return( top_row );
2138 }
2139 #endif /* BUILD_DEAD_CODE */
2140
2141 static boolean col_is_a_spike(linsolqr_system_t sys,int32 col)
2142 /**
2143 *** Determines if the col is a spike, characterized by having any
2144 *** nonzeros above the diagonal. By construction there shouldn't
2145 *** be any numeric 0 nonzeros above the diagonal, so we don't prune
2146 *** them out here.
2147 **/
2148 {
2149 mtx_range_t above_diagonal;
2150 mtx_coord_t nz;
2151
2152 above_diagonal.low = sys->rng.low;
2153 above_diagonal.high = col-1;
2154 nz.col = col;
2155 nz.row = mtx_FIRST;
2156 /* this loop is cheaper than it looks */
2157 while( (void)mtx_next_in_col(sys->factors,&nz,&above_diagonal),
2158 nz.row != mtx_LAST ) {
2159 if( nz.row < col ) return TRUE;
2160 }
2161 return( FALSE );
2162 }
2163
2164 /* see implementation notes before modifying this function! */
2165 static void number_drag(real64 *vec, int32 rfrom, int32 rto)
2166 {
2167 int32 i;
2168 real64 tmp;
2169 if (rto <rfrom) {
2170 tmp=vec[rfrom];
2171 for (i=rfrom; i> rto; i--) {
2172 vec[i]=vec[i-1]; /* rotate right */
2173 }
2174 vec[rto]=tmp;
2175 return;
2176 }
2177 if (rto > rfrom) {
2178 tmp=vec[rfrom];
2179 for (i=rfrom; i< rto; i++) {
2180 vec[i]=vec[i+1]; /* rotate left */
2181 }
2182 vec[rto]=tmp;
2183 }
2184 }
2185
2186 static void rankikw_factor(linsolqr_system_t sys)
2187 /**
2188 *** (the following description also applies to rankijz_factor which is
2189 *** different only in pivot selection strategy.)
2190 *** This is the heart of the linear equation solver. This function
2191 *** factorizes the matrix into a lower (L) and upper (U) triangular
2192 *** matrix. sys->smallest_pivot and sys->rank are calculated. The
2193 *** RANKI method is utilized. At the end of elimination, the matrix A
2194 *** is factored into A = U L, where U and L are stored as follows:
2195 ***
2196 *** <----- r ----> <- n-r ->
2197 *** +--------------+---------+
2198 *** | | |
2199 *** | U | |
2200 *** | | |
2201 *** | L | | r
2202 *** | | |
2203 *** +--------------+---------+
2204 *** | | |
2205 *** | | 0 | n-r
2206 *** | | |
2207 *** +--------------+---------+
2208 ***
2209 *** The rows and columns have been permuted so that all of the pivoted
2210 *** original rows and columns are found in the first r rows and columns
2211 *** of the region. The last n-r rows and columns are unpivoted. U has
2212 *** 1's on its diagonal, whereas L's diagonal is stored in the matrix.
2213 ***
2214 *** Everything above was written as though the entire matrix is
2215 *** involved. In reality, only the relevant square region is involved.
2216 **/
2217 {
2218 mtx_coord_t nz;
2219 int32 last_row;
2220 mtx_range_t pivot_candidates;
2221 real64 *tmp;
2222 real64 pivot, *pivots;
2223 int32 length;
2224 mtx_matrix_t mtx;
2225
2226 length = sys->rng.high - sys->rng.low + 1;
2227 tmp = sys->ludata->tmp;
2228 /* eliminate row takes care of zeroing the relevant region and won't
2229 change values outside of it. */
2230 pivots=sys->ludata->pivlist;
2231 mtx=sys->factors;
2232
2233 sys->smallest_pivot = MAXDOUBLE;
2234 last_row = pivot_candidates.high = sys->rng.high;
2235 for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
2236
2237 pivot_candidates.low = nz.col = nz.row;
2238 pivots[nz.row]=pivot = mtx_value(mtx,&nz);
2239 pivot = fabs(pivot);
2240 if( pivot > sys->pivot_zero &&
2241 pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) &&
2242 !col_is_a_spike(sys,nz.row) ) {
2243 /* Good pivot and not a spike: continue with next row */
2244 if( pivot < sys->smallest_pivot )
2245 sys->smallest_pivot = pivot;
2246 ++(nz.row);
2247 continue;
2248 }
2249 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2250 /**
2251 *** Row is a spike row or will
2252 *** be when a necessary column
2253 *** exchange occurs.
2254 **/
2255 eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots);
2256 /* pivot will be largest of those available. get size and value */
2257 pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row]));
2258 if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */
2259 /* Dependent row, drag to the end */
2260 mtx_drag(mtx,nz.row,last_row);
2261 number_drag(pivots,nz.row,last_row);
2262 --last_row;
2263 #undef KAA_DEBUG
2264 #ifdef KAA_DEBUG
2265 FPRINTF(stderr,"Warning: Row %d is dependent with pivot %20.8g\n",
2266 nz.row,pivot);
2267 #endif /* KAA_DEBUG */
2268 } else {
2269 /* Independent row: nz contains best pivot */
2270 /* Move pivot to diagonal */
2271 mtx_swap_cols(mtx,nz.row,nz.col);
2272 mtx_drag( mtx , nz.row , sys->rng.low );
2273 number_drag(pivots,nz.row,sys->rng.low);
2274 if( pivot < sys->smallest_pivot )
2275 sys->smallest_pivot = pivot;
2276 ++(nz.row);
2277 }
2278 }
2279
2280 sys->rank = last_row - sys->rng.low + 1;
2281 }
2282
2283
2284 static void rankijz_factor(linsolqr_system_t sys)
2285 /**
2286 *** This is an alternate pivoting strategy introduced by Joe Zaher.
2287 *** it looks down and across for good pivots rather than just across.
2288 ***
2289 **/
2290 {
2291 real64 biggest;
2292 mtx_coord_t nz, best;
2293 mtx_region_t candidates;
2294 real64 *tmp;
2295 real64 pivot, *pivots;
2296 int32 length;
2297 mtx_matrix_t mtx;
2298
2299 length = sys->rng.high - sys->rng.low + 1;
2300 tmp = sys->ludata->tmp;
2301 /* eliminate row takes care of zeroing the relevant region and won't
2302 change values outside of it. */
2303 pivots=sys->ludata->pivlist;
2304 mtx=sys->factors;
2305
2306 sys->smallest_pivot = MAXDOUBLE;
2307 candidates.row.high = sys->rng.high;
2308 candidates.col.high = sys->rng.high;
2309 for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) {
2310 nz.col = nz.row;
2311 pivots[nz.row] = pivot = mtx_value(mtx,&nz);
2312 pivot = fabs(pivot);
2313 candidates.row.low = nz.row;
2314 candidates.col.low = nz.row;
2315 if( !col_is_a_spike(sys,nz.row) ) {
2316 best.col = nz.row;
2317 mtx_col_max(mtx,&best,&(candidates.row),&biggest);
2318 if( fabs(biggest) >= sys->pivot_zero ) {
2319 if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) {
2320 mtx_swap_rows(mtx,nz.row,best.row);
2321 pivots[nz.row] = biggest;
2322 pivot = fabs(biggest);
2323 }
2324 if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot;
2325 ++(nz.row);
2326 continue;
2327 }
2328 }
2329 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2330 /**
2331 *** Row is a spike row or will
2332 *** be when a necessary column
2333 *** exchange occurs.
2334 **/
2335 eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots);
2336 /* pivot will be largest of those available. get size and value */
2337 pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row]));
2338 /* Move pivot to diagonal */
2339 if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2340 /* Dependent row, drag nz to lower right */
2341 mtx_drag(mtx,nz.row,candidates.row.high);
2342 number_drag(pivots,nz.row,candidates.row.high);
2343 --(candidates.row.high);
2344 } else {
2345 /* Independent row, drag nz to upper left */
2346 mtx_swap_cols(mtx,nz.row,nz.col);
2347 mtx_drag( mtx , nz.row , sys->rng.low );
2348 number_drag(pivots,nz.row,sys->rng.low);
2349 if( pivot < sys->smallest_pivot )
2350 sys->smallest_pivot = pivot;
2351 ++(nz.row);
2352 }
2353 }
2354
2355 sys->rank = candidates.row.high - sys->rng.low + 1;
2356 }
2357
2358
2359 #define KAA_DEBUG 0
2360 #if KAA_DEBUG
2361 int ranki_entry(linsolqr_system_t sys,mtx_region_t *region)
2362 #else
2363 static int ranki_entry(linsolqr_system_t sys,mtx_region_t *region)
2364 #endif
2365 /**
2366 *** The region to factor is first isolated by truncating the region
2367 *** provided to the largest square region confined to the matrix diagonal.
2368 *** It is presumed it will contain no empty rows or columns and that it has
2369 *** been previously reordered using linsolqr_reorder(sys,region,spk1)
2370 *** or a sane variant of spk1.
2371 *** This is the entry point for all ranki based strategies, regardless of
2372 *** pivot selection subtleties.
2373 **/
2374 {
2375 struct rhs_list *rl;
2376 double time;
2377
2378 CHECK_SYSTEM(sys);
2379 if( sys->factored )
2380 return 0;
2381 switch(sys->fmethod) {
2382 case ranki_kw:
2383 case ranki_jz:
2384 case ranki_ka: /* add new methods here */
2385 break;
2386 default:
2387 return 1;
2388 }
2389
2390 if(ISNULL(sys->ludata))
2391 return 1;
2392
2393 if( NOTNULL(sys->inverse) )
2394 mtx_destroy(sys->inverse);
2395 sys->inverse=NULL;
2396 if( NOTNULL(sys->factors) )
2397 mtx_destroy(sys->factors);
2398 if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys);
2399 else square_region(sys,region);
2400
2401 sys->factors = mtx_copy_region(sys->coef,region);
2402 sys->rank = -1;
2403 sys->smallest_pivot = MAXDOUBLE;
2404 for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
2405 rl->solved = FALSE;
2406 insure_capacity(sys);
2407 insure_lu_capacity(sys);
2408
2409 time = tm_cpu_time();
2410 switch(sys->fmethod) {
2411 case ranki_ka:
2412 case ranki_kw:
2413 rankikw_factor(sys);
2414 break;
2415 case ranki_jz:
2416 default:
2417 rankijz_factor(sys);
2418 }
2419 /* no longer done automatically as noone usually cares.
2420 calc_dependent_rows_ranki1(sys);
2421 calc_dependent_cols_ranki1(sys);
2422 */
2423 sys->factored = TRUE;
2424
2425 #undef KAA_DEBUG
2426 #if KAA_DEBUG
2427 time = tm_cpu_time() - time;
2428 FPRINTF(stderr,"Time for Inversion = %f\n",time);
2429 FPRINTF(stderr,"Non-zeros in Inverse = %d\n",
2430 mtx_nonzeros_in_region(sys->factors,region));
2431 #endif
2432 return 0;
2433 }
2434
2435 static int ranki_solve(linsolqr_system_t sys, struct rhs_list *rl)
2436 {
2437 backward_substitute(sys,rl->varvalue,rl->transpose);
2438 forward_substitute(sys,rl->varvalue,rl->transpose);
2439 zero_unpivoted_vars(sys,rl->varvalue,rl->transpose);
2440 return 0;
2441 }
2442
2443
2444 /*
2445 *********************************************************************
2446 * Start of the 2 bodied factorization schemes.
2447 *
2448 * ranki2_entry will be used to access these routines.
2449 * there is now code called:
2450 * rankikw2_factor
2451 * rankijz2_factor
2452 * eliminate_row2
2453 * ranki2_entry
2454 * forward_substitute2
2455 * backward_substitute2
2456 * ranki2_solve
2457 *
2458 * We have fixed the calc_col_dependent and calc_row_dependent
2459 * routines.
2460 *********************************************************************
2461 */
2462
2463 extern int32 mtx_number_ops;
2464
2465 /* this function does no permutation of any sort */
2466 static
2467 void eliminate_row2(mtx_matrix_t mtx,
2468 mtx_matrix_t upper_mtx,
2469 mtx_range_t *rng,
2470 int32 row, /* row to eliminate */
2471 real64 *tmp, /* temporary array */
2472 real64 *pivots, /* prior pivots array */
2473 real64 dtol /* drop tolerance */
2474 )
2475 {
2476 mtx_coord_t nz;
2477 int j,low,high;
2478 double tmpval;
2479
2480 /*
2481 * Move all non-zeros from current row to full array.
2482 * The full array would have been initialized before,
2483 * we must put it back in the clean state when we leave.
2484 * All operations are done over mtx_ALL_COLS.
2485 *
2486 * Note: because of an addrow over mtx_ALL_COLS, entries
2487 * of tmp outside rng may have nonzeros put in them if
2488 * sys->factors has nonzeros in the outlying columns.
2489 * If enough of these pile up out there during elimination
2490 * of a block and the compiler treats double overflow as
2491 * a signalable error, we will have a floating point
2492 * exception.
2493 * Currently sys->factors is a copy of a region from
2494 * the sys->coef matrix, so we should not have anything
2495 * out there to pile up. If we make a variant which
2496 * does not copy the coef matrix but uses it directly,
2497 * this code needs to be revisited.
2498 */
2499 mtx_steal_cur_row_vec(mtx,row,tmp,mtx_ALL_COLS);
2500
2501 /*
2502 * Eliminates nzs from pivot, one by one, filling tmp with multipliers
2503 * We now operate on the entire row, since we are not writing the
2504 * multipliers back to the matrix.
2505 */
2506
2507 low = rng->low;
2508 high = rng->high;
2509 for (j = row-1 ; j >= low ; --(j) ) {
2510 if (tmp[j] == D_ZERO)
2511 continue; /* Nothing to do for this row */
2512 tmpval = tmp[j]/pivots[j]; /* Compute multiplier */
2513
2514 /*
2515 * tmpval is now the multiplier. We use it to eliminate the row,
2516 * but backpatch it to tmp, *after* we do the elimination, as
2517 * mtx_cur_vec_add_row over all columns will stomp on tmp[j]
2518 */
2519 mtx_cur_vec_add_row(mtx,tmp,j,-tmpval,mtx_ALL_COLS,FALSE);
2520 tmp[j] = tmpval; /* patch the diagonal */
2521 }
2522
2523 /*
2524 * Fill up the upper triangular matrix.
2525 * refill the range [low .. row-1]. Remember that we have
2526 * to zero all nnz's in tmp.
2527 */
2528 nz.row = row;
2529 for (j=low;j<row;j++) {
2530 if (tmp[j] != D_ZERO) {
2531 nz.col = j;
2532 mtx_fill_value(upper_mtx,&nz,tmp[j]);
2533 #if RBADEBUG
2534 FPRINTF(gscr,"fillingU: or %d oc %d (cc %d) %24.18g\n",
2535 mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), nz.col, tmp[j]);
2536 #endif
2537 tmp[j] = D_ZERO;
2538 }
2539 }
2540 /*
2541 * refill the range [row .. high]. We *wont* keep
2542 * the diagonal as this is sorted out by the pivot
2543 * array, but here we must keep it, sigh.
2544 * At this stage we don't know that the diagonal is the
2545 * pivot, as that is determined after elimination done.
2546 * Outer loop must delete the ultimate pivot element.
2547 * Odds are good, however, that it is the current diagonal,
2548 * so put in the diagonal last.
2549 */
2550 low = row;
2551 for (j=high;j>=low;j--) {
2552 if (tmp[j] != D_ZERO) {
2553 if (fabs(tmp[j]) > dtol) {
2554 nz.col = j;
2555 mtx_fill_value(mtx,&nz,tmp[j]);
2556 #if RBADEBUG
2557 FPRINTF(gscr,"fillingL: or %d oc %d %24.18g\n",
2558 mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), tmp[j]);
2559 #endif
2560 }
2561 tmp[j] = D_ZERO; /* zero element regardless */
2562 }
2563 }
2564 tmp[row] = D_ZERO;
2565 }
2566
2567 /*
2568 * This function is the same as rankikw_factor except
2569 * that it uses 2 matrices; one to store L and one for U.
2570 * As such it uses eliminate_row2 rather than eliminate_row.
2571 * Note there has been a change in the way ptol is used,
2572 * now selecting nearest passing, rather than taking max.
2573 * We assume there is NO incidence outside the region to be factored
2574 * in sys->factors.
2575 */
2576 static void rankikw2_factor(linsolqr_system_t sys)
2577 {
2578 mtx_coord_t nz;
2579 int32 last_row, defect;
2580 mtx_range_t pivot_candidates;
2581 real64 *tmp;
2582 real64 pivot, *pivots;
2583 mtx_matrix_t mtx, upper_mtx;
2584
2585
2586 /**
2587 ** tmp and pivots are cur indexed, tmp by cols, pivots by rows.
2588 ** rather than allocating tmp every time, which can get expensive,
2589 ** we allocate it once *elsewhere* and zero it all here.
2590 ** Because we don't know the state of it on entry (due to exceptions,
2591 ** etc, the rezeroing is unavoidable.
2592 ** Eliminate_row2 is responsible for returning the working region
2593 ** zeroed.
2594 **/
2595
2596 tmp = sys->ludata->tmp;
2597 mtx_zero_real64(tmp,sys->capacity);
2598 pivots = sys->ludata->pivlist;
2599 mtx = sys->factors;
2600 upper_mtx = sys->inverse;
2601 defect = 0;
2602 #if RBADEBUG
2603 gscr = fopen("/usr1/ballan/tmp/lu/rkw","w+");
2604 #endif
2605
2606 sys->smallest_pivot = MAXDOUBLE;
2607 last_row = pivot_candidates.high = sys->rng.high;
2608 for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
2609
2610 #if (RBADEBUG && 0)
2611 /* remove the && 0 and you better have a lot for free
2612 * disk space
2613 */
2614 mtx_write_region_human_orgrows(gscr,mtx,mtx_ENTIRE_MATRIX);
2615 #endif
2616
2617 pivot_candidates.low = nz.col = nz.row;
2618 pivots[nz.row]=pivot = mtx_value(mtx,&nz);
2619 pivot = fabs(pivot);
2620 if( pivot > sys->pivot_zero &&
2621 pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) &&
2622 !col_is_a_spike(sys,nz.row) ) {
2623 /* Good pivot and not a spike: continue with next row */
2624 if( pivot < sys->smallest_pivot ) {
2625 sys->smallest_pivot = pivot;
2626 }
2627 #if RBADEBUG
2628 FPRINTF(gscr,"Cheap pivot col %d (org %d)\n",
2629 nz.row, mtx_col_to_org(mtx,nz.row));
2630 if (nz.row ==179) {
2631 FPRINTF(stderr,"ben, stop here\n");
2632 }
2633 #endif
2634 ++(nz.row);
2635 continue;
2636 }
2637 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2638 /**
2639 *** Row is a spike row or will
2640 *** be when a necessary column
2641 *** exchange occurs.
2642 **/
2643 eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol);
2644 /* pivot will be leftmost of those that pass ptol and eps, or 0.0. */
2645 if (defect) {
2646 /* unfortunately, can't use mtx_ALL_COLS in search now because we must
2647 ignore columns that were dragged out with previous singular rows. */
2648 pivot = mtx_get_pivot_col(mtx,&nz,&pivot_candidates,&(pivots[nz.row]),
2649 sys->ptol,sys->pivot_zero);
2650 } else {
2651 /* no singular cols, and eliminate moved all left of diag crap out, so */
2652 pivot = mtx_get_pivot_col(mtx,&nz,mtx_ALL_COLS,&(pivots[nz.row]),
2653 sys->ptol,sys->pivot_zero);
2654 }
2655 #if RBADEBUG
2656 FPRINTF(gscr,"Pivot column found %d (org %d)\n",
2657 nz.col, mtx_col_to_org(mtx,nz.col));
2658 #endif
2659 if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2660 /*
2661 * Dependent row, drag to the end. The upper_mtx is a slave
2662 * of mtx, and will be dragged automatically.
2663 */
2664 mtx_drag(mtx, nz.row, last_row);
2665 number_drag(pivots, nz.row, last_row);
2666 --last_row;
2667 defect = 1;
2668 } else {
2669 /* Independent row: nz contains selected pivot */
2670 mtx_swap_cols(mtx, nz.row, nz.col); /* this Fixes U as well */
2671 /* Move pivot to diagonal */
2672 mtx_drag(mtx, nz.row, sys->rng.low ); /* this Fix U as well */
2673 number_drag(pivots, nz.row, sys->rng.low);
2674 if( pivot < sys->smallest_pivot )
2675 sys->smallest_pivot = pivot;
2676 ++(nz.row);
2677 }
2678 }
2679 #if RBADEBUG
2680 /* get rid of this asap*/
2681 {
2682 FILE *fp;
2683 int32 cc;
2684
2685 fp = fopen("/usr1/ballan/tmp/lu/kw2p","w+");
2686 FPRINTF(fp,"kw2 final pivot sequence:\n");
2687 for (cc= sys->rng.low; cc <= last_row; cc++) {
2688 FPRINTF(fp,"orgrow,orgcol = %d,%d,%24.18g\n",
2689 mtx_row_to_org(mtx,cc),
2690 mtx_col_to_org(mtx,cc), pivots[cc]);
2691 }
2692 fclose(fp);
2693 }
2694
2695 #endif
2696
2697 zero_diagonal_elements(sys->factors,sys->rng.low,sys->rng.high);
2698 sys->rank = last_row - sys->rng.low + 1;
2699
2700 #if RBADEBUG
2701 {
2702 FILE *fp;
2703 fp = fopen("/usr1/ballan/tmp/lu/kw2m","w+");
2704 mtx_write_region_human_rows(fp,mtx,mtx_ENTIRE_MATRIX);
2705 fclose(fp);
2706 fp = fopen("/usr1/ballan/tmp/lu/kw2um","w+");
2707 mtx_write_region_human_rows(fp,upper_mtx,mtx_ENTIRE_MATRIX);
2708 fclose(fp);
2709 }
2710 #endif
2711 #if RBADEBUG
2712 fclose(gscr);
2713 #endif
2714 }
2715
2716 /*
2717 * This function is the same as rankijz_factor except
2718 * that it uses 2 matrices; one to store L and one for U.
2719 * As such it uses eliminate_row2 rather than eliminate_row.
2720 *
2721 * We are now using the slave matrix feature of the code, and
2722 * thus we all drag/swaps automatically affect the slave (upper_mtx)
2723 * as well.
2724 */
2725 static void rankijz2_factor(linsolqr_system_t sys)
2726 {
2727 real64 biggest;
2728 mtx_coord_t nz, best;
2729 mtx_region_t candidates;
2730 real64 *tmp;
2731 real64 pivot, *pivots;
2732 mtx_matrix_t mtx, upper_mtx;
2733 /**
2734 ** tmp and pivots are cur indexed, tmp by cols, pivots by rows.
2735 ** rather than allocating tmp every time, which can get expensive,
2736 ** we allocate it once elsewhere and zero it all here.
2737 ** Because we don't know the state of it on entry (due to exceptions,
2738 ** etc), the rezeroing is unavoidable.
2739 ** Eliminate_row2 is responsible for returning the working region
2740 ** zeroed.
2741 **/
2742 tmp = sys->ludata->tmp;
2743 mtx_zero_real64(tmp,sys->capacity);
2744 pivots=sys->ludata->pivlist;
2745 mtx = sys->factors;
2746 upper_mtx = sys->inverse;
2747
2748 sys->smallest_pivot = MAXDOUBLE;
2749 candidates.row.high = sys->rng.high;
2750 candidates.col.high = sys->rng.high;
2751 for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) {
2752 nz.col = nz.row;
2753 pivots[nz.row] = pivot = mtx_value(mtx,&nz);
2754 pivot = fabs(pivot);
2755 candidates.row.low = nz.row;
2756 candidates.col.low = nz.row;
2757 if( !col_is_a_spike(sys,nz.row) ) {
2758 best.col = nz.row;
2759 mtx_col_max(mtx,&best,&(candidates.row),&biggest);
2760 /* make sure we aren't missing someone really nice in the column */
2761 if( fabs(biggest) >= sys->pivot_zero ) {
2762 if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) {
2763 mtx_swap_rows(mtx,nz.row,best.row); /* Fixes U as well */
2764 pivots[nz.row] = biggest;
2765 pivot = fabs(biggest);
2766 }
2767 if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot;
2768 ++(nz.row);
2769 continue;
2770 }
2771 }
2772 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
2773 /**
2774 *** Row is a spike row or will
2775 *** be when a necessary column
2776 *** exchange occurs.
2777 **/
2778 eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol);
2779 /* pivot will be largest of those available. get size and value */
2780 pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row]));
2781 /* Move pivot to diagonal */
2782 if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */
2783 /* Dependent row, drag nz to lower right */
2784 mtx_drag(mtx,nz.row,candidates.row.high);
2785 number_drag(pivots,nz.row,candidates.row.high);
2786 --(candidates.row.high);
2787 } else {
2788 /* Independent row, drag nz to upper left */
2789 mtx_swap_cols(mtx,nz.row,nz.col); /* this Fixes U as well */
2790 mtx_drag(mtx ,nz.row ,sys->rng.low);
2791
2792 number_drag(pivots,nz.row,sys->rng.low);
2793 if( pivot < sys->smallest_pivot )
2794 sys->smallest_pivot = pivot;
2795 ++(nz.row);
2796 }
2797 }
2798 zero_diagonal_elements(sys->factors,sys->rng.low,sys->rng.high);
2799 sys->rank = candidates.row.high - sys->rng.low + 1;
2800 }
2801
2802 /*
2803 * See the comments attached to forward_substitute.
2804 * This code otherwise is the same. It uses a 2 bodied
2805 * matrix as well as making use of mtx_ALL_COLS and
2806 * mtx_ALL_ROWS whereever possible.
2807 * We make the following additional assumptions here:
2808 * (if they do not hold, do NOT use this function)
2809 * - sys->inverse (U) has no incidence that is not multipliers
2810 * (and the diagonal of 1s is NOT in sys->inverse.)
2811 * As of 10/95, this is how ranki2 U is constructed.
2812 * - sys->factors (L) has no incidence on the upper triangle,
2813 * including the diagonal, or outside the factored region.
2814 * relaxation: incidence anywhere allowed if value = 0.0
2815 * since 0 doesn't contribute to a dot product
2816 * and the only thing we do with triangles is dot them.
2817 * - There may be singular rows and columns in the factorization,
2818 * but any additions coming from these rows/columns during
2819 * mtx_ALL_*O*S operations will not contribute to sums because the
2820 * user zeroed the arr entries corresponding to these before
2821 * calling this function.
2822 */
2823 static
2824 void forward_substitute2(linsolqr_system_t sys,
2825 real64 *arr,
2826 boolean transpose)
2827 {
2828 mtx_coord_t nz;
2829 real64 sum, *pivlist;
2830 mtx_matrix_t mtx;
2831 int32 dotlim;
2832 boolean nonzero_found=FALSE;
2833
2834 pivlist=sys->ludata->pivlist;
2835 dotlim = sys->rng.low+sys->rank;
2836 if (transpose) { /* arr is indexed by original column number */
2837 mtx=sys->inverse;
2838 for( nz.col=sys->rng.low; nz.col < dotlim; ++(nz.col) ) {
2839 register int32 org_col;
2840
2841 org_col = mtx_col_to_org(mtx,nz.col);
2842 if (arr[org_col]!=D_ZERO) nonzero_found=TRUE;
2843 if (nonzero_found) {
2844 sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,mtx_ALL_ROWS,TRUE);
2845 /* arr[org_col] = (arr[org_col] - sum) / D_ONE */;
2846 arr[org_col] -= sum;
2847 }
2848 }
2849 } else { /* arr is indexed by original row number */
2850 mtx=sys->factors;
2851 for( nz.row=sys->rng.low; nz.row < dotlim; ++(nz.row) ) {
2852 register int32 org_row;
2853
2854 org_row = mtx_row_to_org(mtx,nz.row);
2855 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
2856 if (nonzero_found) {
2857 sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,mtx_ALL_COLS,TRUE);
2858 /*
2859 nz.col = nz.row;
2860 arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz);
2861 */
2862 arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row];
2863 }
2864 }
2865 }
2866 }
2867
2868 /*
2869 * See the comments attached to backward_substitute and
2870 * forward_substitute2.
2871 * When solving for the transpose, then we are actually
2872 * running of the lower triangle, hence we use sys->factors.
2873 * Otherwise we use sys->inverse which stores U.
2874 */
2875 static
2876 void backward_substitute2(linsolqr_system_t sys,
2877 real64 *arr,
2878 boolean transpose)
2879 {
2880 mtx_coord_t nz;
2881 real64 sum, *pivlist;
2882 mtx_matrix_t mtx;
2883 int32 dotlim;
2884 boolean nonzero_found=FALSE; /* once TRUE, substitution must be done
2885 over remaining rows/cols */
2886
2887 dotlim=sys->rng.low;
2888 pivlist=sys->ludata->pivlist;
2889 if (transpose) { /* arr is indexed by original column number */
2890 mtx = sys->factors;
2891 for( nz.col = sys->rng.low+sys->rank-1; nz.col >= dotlim ; --(nz.col) ) {
2892 register int32 org_col;
2893
2894 org_col = mtx_col_to_org(mtx,nz.col);
2895 if (arr[org_col] != D_ZERO) nonzero_found=TRUE;
2896 if (nonzero_found) {
2897 sum = mtx_col_dot_full_org_vec(mtx,nz.col,arr,mtx_ALL_ROWS,TRUE);
2898 arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col];
2899 }
2900 }
2901 } else { /* arr is indexed by original row number */
2902 /* we are working from the bottom up */
2903 mtx = sys->inverse;
2904 for( nz.row = sys->rng.low+sys->rank-1; nz.row >= dotlim ; --(nz.row) ) {
2905 register int32 org_row;
2906
2907 org_row = mtx_row_to_org(mtx,nz.row);
2908 if (arr[org_row]!=D_ZERO) nonzero_found=TRUE;
2909 if (nonzero_found) {
2910 sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,mtx_ALL_COLS,TRUE);
2911 arr[org_row] -= sum;
2912 }
2913 }
2914 }
2915 }
2916
2917 static void calc_dependent_rows_ranki2(linsolqr_system_t sys)
2918 {
2919 mtx_coord_t nz;
2920 real64 value;
2921 mtx_range_t colrange;
2922 mtx_range_t rowrange;
2923 real64 *lc;
2924 mtx_matrix_t mtx;
2925
2926 sys->rowdeps = TRUE;
2927 if( ( (sys->reg.row.low == sys->rng.low) &&
2928 ( sys->reg.row.high == sys->rng.low+sys->rank-1 )
2929 ) || sys->rank==0 )
2930 return;
2931
2932 lc = sys->ludata->tmp;
2933 colrange.low = sys->rng.low;
2934 colrange.high = colrange.low + sys->rank - 1;
2935 rowrange.low = sys->rng.high;
2936 rowrange.high = sys->rng.low+sys->rank;
2937 mtx=sys->factors;
2938
2939 nz.row = sys->reg.row.low;
2940 for( ; nz.row <= sys->reg.row.high; nz.row++ ) {
2941 if( nz.row == sys->rng.low ) {
2942 nz.row = rowrange.high-1;
2943 continue;
2944 }
2945 mtx_zero_real64(lc,(sys->capacity));
2946 /* must zero the whole thing to use the backward_substitute2 right */
2947 mtx_org_row_vec(mtx,nz.row,lc,&colrange);
2948 if( nz.row < rowrange.high || nz.row > rowrange.low )
2949 backward_substitute2(sys,lc,TRUE);
2950 forward_substitute2(sys,lc,TRUE);
2951 mtx_clear_row(mtx,nz.row,&colrange);
2952 for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) {
2953 value = lc[mtx_col_to_org(mtx,nz.col)];
2954 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
2955 }
2956 }
2957 }
2958
2959 static void calc_dependent_cols_ranki2(linsolqr_system_t sys)
2960 {
2961 mtx_coord_t nz;
2962 real64 value;
2963 mtx_range_t rowrange;
2964 mtx_range_t colrange;
2965 real64 *lc;
2966 mtx_matrix_t mtx;
2967
2968 sys->coldeps = TRUE;
2969 if( ( (sys->reg.col.low == sys->rng.low) &&
2970 ( sys->reg.col.high == sys->rng.low+sys->rank-1 )
2971 ) || sys->rank==0 )
2972 return;
2973
2974 lc = sys->ludata->tmp;
2975 rowrange.low = sys->rng.low;
2976 rowrange.high = rowrange.low + sys->rank - 1;
2977 colrange.high = sys->rng.low+sys->rank;
2978 colrange.low = sys->rng.high;
2979 mtx=sys->factors;
2980
2981 nz.col = sys->reg.col.low;
2982 for( ; nz.col <= sys->reg.col.high; nz.col++ ) {
2983 if( nz.col == sys->rng.low ) {
2984 nz.col = colrange.high-1;
2985 continue;
2986 }
2987 mtx_zero_real64(lc,sys->capacity);
2988 mtx_org_col_vec(mtx,nz.col,lc,&rowrange);
2989 if( nz.col < colrange.high || nz.col > colrange.low )
2990 backward_substitute2(sys,lc,FALSE);
2991 forward_substitute2(sys,lc,FALSE);
2992 mtx_clear_col(mtx,nz.col,&rowrange);
2993 for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) {
2994 value = lc[mtx_row_to_org(mtx,nz.row)];
2995 if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value);
2996 }
2997 }
2998 }
2999
3000 static int ranki2_solve(linsolqr_system_t sys, struct rhs_list *rl)
3001 {
3002 /* zero any unsolved for vars first so they don't contaminate
3003 mtx_ALL_*O*S dot products.
3004 */
3005 zero_unpivoted_vars(sys,rl->varvalue,rl->transpose);
3006 backward_substitute2(sys,rl->varvalue,rl->transpose);
3007 forward_substitute2(sys,rl->varvalue,rl->transpose);
3008 return 0;
3009 }
3010
3011 /* include rankiba2 implementation. Note that this is VERY VERY bad style
3012 and needs to be fixed immediately by splitting up the linsolqr file.
3013 This is our fastest (far and away) ranki implementation -- drag free.
3014 */
3015 #if 1
3016 #include "rankiba2.c"
3017 #endif
3018
3019 #ifdef BUILD_KIRK_CODE
3020 /*
3021 * this forward declaration is necessary to keept the
3022 * system happy until we can organize this file
3023 */
3024
3025 static
3026 int kirk1_factor(linsolqr_system_t sys,
3027 mtx_region_t *A11,
3028 int kirk_method);
3029
3030 #endif /* BUILD_KIRK_CODE */
3031
3032 /*
3033 * This is the entry point for all the ranki2_* schemes
3034 * which make use of a 2 bodied matrix.
3035 * it does not respond to ranki_kw and ranki_jz as if they
3036 * were calls for ranki_kw2 and ranki_jz2. with the new
3037 * factorization classes , there's really no call to be
3038 * sneaking around the header functions.
3039 * ranki_ka goes here as well. check sys->fmethod to see whos
3040 * expected to be in use.
3041 */
3042 int ranki2_entry(linsolqr_system_t sys, mtx_region_t *region)
3043 {
3044 struct rhs_list *rl;
3045 double time;
3046
3047 CHECK_SYSTEM(sys);
3048 if (sys->factored) return 0;
3049 switch(sys->fmethod) {
3050 case ranki_ba2:
3051 case ranki_kw2:
3052 case ranki_jz2: /* add new methods here */
3053 #ifdef BUILD_KIRK_CODE
3054 case ranki_ka:
3055 #endif /* BUILD_KIRK_CODE */
3056 break;
3057 default:
3058 return 1;
3059 }
3060
3061 if (ISNULL(sys->ludata)) return 1;
3062 if (NOTNULL(sys->inverse)) mtx_destroy(sys->inverse);
3063 sys->inverse = NULL;
3064 if (NOTNULL(sys->factors)) mtx_destroy(sys->factors);
3065 if (region == mtx_ENTIRE_MATRIX) determine_pivot_range(sys);
3066 else square_region(sys,region);
3067
3068 sys->factors = mtx_copy_region(sys->coef,region);
3069 sys->inverse = mtx_create_slave(sys->factors);
3070 sys->rank = -1;
3071 sys->smallest_pivot = MAXDOUBLE;
3072 for (rl = sys->rl ; NOTNULL(rl) ; rl = rl->next)
3073 rl->solved = FALSE;
3074 insure_capacity(sys);
3075 insure_lu_capacity(sys);
3076
3077 time = tm_cpu_time();
3078 switch(sys->fmethod) {
3079 case ranki_ba2:
3080 rankiba2_factor(sys);
3081 break;
3082 case ranki_kw2:
3083 rankikw2_factor(sys);
3084 break;
3085 case ranki_jz2:
3086 rankijz2_factor(sys);
3087 break;
3088
3089 #ifdef BUILD_KIRK_CODE
3090 case ranki_ka:
3091 kirk1_factor(sys,region,2);
3092 break;
3093 #endif /* BUILD_KIRK_CODE */
3094
3095 default:
3096 return 1;
3097 }
3098 sys->factored = TRUE;
3099
3100 #define KAA_DEBUG 1
3101 #if KAA_DEBUG
3102 if (g_linsolqr_timing) {
3103 int anz;
3104 int fnz;
3105 anz = mtx_nonzeros_in_region(sys->coef,region);
3106 fnz = mtx_nonzeros_in_region(sys->factors,region) +
3107 mtx_nonzeros_in_region(sys->inverse,0);
3108 time = tm_cpu_time() - time;
3109 FPRINTF(stderr,"A-NNZ: %d Factor time: %f Fill %g\n",
3110 anz,time,( anz>0 ? (double)fnz/(double)anz : 0));
3111 }
3112 #endif /* KAA_DEBUG */
3113 #undef KAA_DEBUG
3114 return 0;
3115 }
3116
3117 /***************************************************************************\
3118 End of RANKI implementation functions.
3119 \***************************************************************************/
3120
3121
3122 #ifdef BUILD_KIRK_CODE
3123 /*
3124 ***************************************************************************
3125 * Start of kirk_* routines
3126 *
3127 * kirk1_factor:
3128 * This routine is based on rankikw2_factor, except that it takes
3129 * an additional region, so as to do some *global* pivot restriction.
3130 * This is for the case of a single border for the entire matrix.
3131 *
3132 ***************************************************************************
3133 */
3134
3135 /*
3136 * This is just a dummy structure so as to get the size
3137 * correct. It is *exactly the same size as mtx_linklist,
3138 * until we can sort out the software engineering issues.
3139 */
3140 struct dlinklist {
3141 int index;
3142 struct dlinklist *prev;
3143 };
3144
3145 static
3146 int kirk1_factor(linsolqr_system_t sys,
3147 mtx_region_t *A11,
3148 int kirk_method)
3149 {
3150 mtx_coord_t nz;
3151 int32 last_row;
3152 mtx_range_t pivot_candidates;
3153 real64 *tmp,*tmp_array_origin;
3154 real64 pivot, *pivots;
3155 int32 length;
3156 mtx_matrix_t mtx, upper_mtx;
3157 real64 maxa;
3158
3159 int32 *inserted = NULL; /* stuff for the link list */
3160 mem_store_t eltbuffer = NULL;
3161
3162 #ifdef NOP_DEBUG
3163 mtx_number_ops = 0;
3164 #endif /* NOP_DEBUG */
3165
3166 length = sys->rng.high + 1;
3167 tmp_array_origin = length > 0 ?
3168 (real64 *)asccalloc( length,sizeof(real64) ) : NULL;
3169 tmp = tmp_array_origin;
3170 pivots=sys->ludata->pivlist;
3171 mtx = sys->factors;
3172 upper_mtx = sys->inverse;
3173 inserted = ASC_NEW_ARRAY_CLEAR(int32,length);
3174 eltbuffer = mem_create_store(2,256,sizeof(struct dlinklist),
3175 2,256);
3176
3177 sys->smallest_pivot = MAXDOUBLE;
3178 last_row = pivot_candidates.high = sys->rng.high;
3179 for( nz.row = sys->rng.low ; nz.row <= last_row ; ) {
3180
3181 pivot_candidates.low = nz.col = nz.row;
3182 pivots[nz.row]=pivot = mtx_value(mtx,&nz);
3183 pivot = fabs(pivot);
3184 maxa = mtx_row_max(mtx,&nz,&pivot_candidates,NULL);
3185 if ((pivot > sys->pivot_zero) && (pivot >= sys->ptol*maxa) &&
3186 !col_is_a_spike(sys,nz.row)) {
3187 if (pivot < sys->smallest_pivot)
3188 sys->smallest_pivot = pivot;
3189 ++(nz.row);
3190 continue;
3191 }
3192 /* pivots for rows nz.row back to sys->rng->low are stored in pivots */
3193 /**
3194 *** Row is a spike row or will
3195 *** be when a necessary column
3196 *** exchange occurs.
3197 **/
3198 if (kirk_method==1)
3199 eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol);
3200 else{
3201 mtx_eliminate_row2(mtx,upper_mtx,&(sys->rng),
3202 nz.row,tmp,pivots,inserted,eltbuffer);
3203 mem_clear_store(eltbuffer);
3204 }
3205 /* pivot will be largest of those available. get size and value */
3206 pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row]));
3207 if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */
3208 /*
3209 * Dependent row, drag to the end. The upper_mtx is a slave
3210 * of mtx, and will be dragged automatically.
3211 */
3212 mtx_drag(mtx,nz.row,last_row);
3213 number_drag(pivots,nz.row,last_row);
3214 --last_row;
3215 #ifdef KAA_DEBUG
3216 FPRINTF(stderr,"Warning: Row %d is dependent with pivot %20.8g\n",
3217 nz.row,pivot);
3218 #endif /* KAA_DEBUG */
3219 } else {
3220 /* Independent row: nz co