Parent Directory
|
Revision Log
Setting up web subdirectory in repository
1 | /* |
2 | * linsol II: Ascend Sparse Linear Solvers |
3 | * by Benjamin Allan |
4 | * Created: 2/6/90 |
5 | * Version: $Revision: 1.26 $ |
6 | * Version control file: $RCSfile: linsolqr.c,v $ |
7 | * Date last modified: $Date: 2000/01/25 02:26:58 $ |
8 | * Last modified by: $Author: ballan $ |
9 | * |
10 | * This file is part of the SLV solver. |
11 | * |
12 | * Copyright (C) 1994 Benjamin Andrew Allan |
13 | * |
14 | * Based on linsol: |
15 | * Copyright (C) 1990 Karl Michael Westerberg |
16 | * Copyright (C) 1993 Joseph Zaher |
17 | * Copyright (C) 1994 Joseph Zaher, Benjamin Andrew Allan |
18 | * and on sqr.pas v1.5: |
19 | * Copyright (C) 1994 Boyd Safrit |
20 | * |
21 | * The SLV solver is free software; you can redistribute |
22 | * it and/or modify it under the terms of the GNU General Public License as |
23 | * published by the Free Software Foundation; either version 2 of the |
24 | * License, or (at your option) any later version. |
25 | * |
26 | * The SLV solver is distributed in hope that it will be |
27 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
29 | * General Public License for more details. |
30 | * |
31 | * You should have received a copy of the GNU General Public License |
32 | * along with the program; if not, write to the Free Software Foundation, |
33 | * Inc., 675 Mass Ave, Cambridge, MA 02139 USA. Check the file named |
34 | * COPYING. COPYING is found in ../compiler. |
35 | */ |
36 | |
37 | /* |
38 | * Implementation Notes: |
39 | * 9/95: there are many places in linsolqr.c where lower level matrix |
40 | * operations have been replaced by higher level ones. These used to |
41 | * be flagged as such so that people who wanted to construct newer |
42 | * operations from the lowest level mtx operators could see how to |
43 | * use the lowlevel operators. Since it has been decided that the old |
44 | * linsol will NOT be discarded completely (we like having easy |
45 | * benchmarks to look better than), the user wishing to code new |
46 | * operations is directed back to linsol to see how lowlevel operations |
47 | * can be expensively done. |
48 | * |
49 | * 10/95: (BAA) I attempted to optimize number_drag through use of |
50 | * pointer arithmetic. Testing the resulting code without optimization |
51 | * turned on gave about 20% improvement over the original on an alpha |
52 | * using the native compiler. However, with optimization on the array |
53 | * subscripted version which is left here won by ~40%. |
54 | * The results for gcc were similar: pointer 10% better unoptimized, |
55 | * subscripted 30% better when -O2. |
56 | * For both compilers -O3 was worse! gcc was ~1.8x slower than the |
57 | * native cc both optimized and not. |
58 | * I have not tested a loop unrolled drag, but I expect unrolling is |
59 | * exactly what the optimizer is doing, so why bother? |
60 | */ |
61 | |
62 | #include <math.h> |
63 | #include <stdarg.h> |
64 | #include "utilities/ascConfig.h" |
65 | #include "utilities/ascMalloc.h" |
66 | #include "utilities/ascPanic.h" |
67 | #include "utilities/mem.h" |
68 | #include "utilities/set.h" |
69 | #include "general/tm_time.h" |
70 | #include "solver/mtx.h" |
71 | #include "solver/linsolqr.h" |
72 | |
73 | #ifdef __WIN32__ |
74 | #define copysign _copysign |
75 | #endif /* __WIN32__ */ |
76 | |
77 | #define LINSOLQR_DEBUG FALSE |
78 | /* qr code debugging flag */ |
79 | #define LINSOL_DEBUG FALSE |
80 | /* linsol debugging flag */ |
81 | |
82 | #define D_ZERO (real64)0.0 |
83 | #define ZERO (int32)0 |
84 | #define D_ONE (real64)1.0 |
85 | |
86 | /* housekeeping flags */ |
87 | #define BUILD_DEAD_CODE 0 |
88 | #undef BUILD_KIRK_CODE |
89 | |
90 | |
91 | #define RBADEBUG 0 |
92 | /* turns on spew that will generate output files |
93 | * comparable to and in the same location as for rankiba2 |
94 | * user specific hardcoded pathnames are involved. |
95 | * don't try this at home. |
96 | */ |
97 | #if RBADEBUG |
98 | extern FILE *gscr; |
99 | #endif |
100 | |
101 | /* |
102 | * timing messages disable flag. |
103 | */ |
104 | int g_linsolqr_timing = 1; |
105 | |
106 | /***************************************************************************\ |
107 | Data structures for linsol |
108 | Any time you modify these, please verify linsolqr_size is still correct. |
109 | \***************************************************************************/ |
110 | struct rhs_list { |
111 | real64 *rhs; /* Vector of rhs values */ |
112 | real64 *varvalue; /* Solution of the linear system */ |
113 | boolean solved; /* ? has the rhs been solved */ |
114 | boolean transpose; /* ? transpose the coef matrix */ |
115 | struct rhs_list *next; |
116 | }; |
117 | |
118 | struct qr_fill_t { |
119 | int32 cnt; /* number of real nonzeros in a column */ |
120 | int32 col; /* column number prior to being sorted */ |
121 | int32 fill; /* fill if this (or if col?) were the pivot column */ |
122 | int32 overlap; /* scratch for fillable calculation. */ |
123 | }; |
124 | /* fill counting structure for sparse qr */ |
125 | |
126 | struct lu_auxdata { /* all indexed by cur row or cur col number */ |
127 | real64 *pivlist; /* vector of pivots (diagonal of L for ranki) */ |
128 | real64 *tmp; /* row elimination, dependency buffer */ |
129 | int32 cap; /* current capacity of the vectors if not null */ |
130 | }; |
131 | /* structure for lu algorithms */ |
132 | |
133 | struct qr_auxdata { /* all indexed by cur row or cur col number */ |
134 | mtx_matrix_t hhvects; /* slave matrix of sys->factors for holding u's */ |
135 | mtx_sparse_t sp; /* sparse data space */ |
136 | real64 *alpha; /* column norms for the Q\R factor matrix */ |
137 | real64 *sigma; /* column norms for the S (R inverse) matrix */ |
138 | real64 *tau; /* column tau of the Householder transforms */ |
139 | real64 *hhcol; /* Householder transform scratch vector */ |
140 | real64 *hhrow; /* Householder transform scratch vector */ |
141 | real64 nu; /* current condition number of inverse */ |
142 | real64 anu; /* condition number of coef */ |
143 | real64 asubnu; /* condition number of active region in coef */ |
144 | struct qr_fill_t *fill; /* fill counting structure */ |
145 | mtx_region_t facreg; /* region over which qr is done if nonsquare */ |
146 | int32 cap; /* current capacity of these vectors if not null */ |
147 | }; |
148 | /* structure for CondQR algorithms */ |
149 | |
150 | struct linsolqr_header { |
151 | int integrity; |
152 | enum factor_class fclass; /* Type of factoring expected */ |
153 | enum factor_method fmethod; /* Method factoring expected */ |
154 | enum reorder_method rmethod; /* Type of most recent reordering */ |
155 | int32 capacity; /* Capacity of arrays */ |
156 | mtx_matrix_t coef; /* Coefficient matrix */ |
157 | struct rhs_list *rl; /* List of rhs vectors */ |
158 | int rlength; /* Length of rhs list */ |
159 | real64 pivot_zero; /* Smallest acceptable pivot */ |
160 | real64 ptol; /* Pivot selection tolerance */ |
161 | real64 ctol; /* Condition selection tolerance */ |
162 | real64 dtol; /* Matrix entry drop tolerance */ |
163 | mtx_matrix_t factors; /* Matrix with UL and dependence info (ranki), |
164 | or L and dependence info (ranki2), |
165 | or R and dependence info (qr methods) */ |
166 | mtx_matrix_t inverse; /* Matrix with U multipliers (ranki2) or R^-1 |
167 | (condqr), in both cases slave of factors */ |
168 | boolean factored; /* ? has the matrix been factored */ |
169 | boolean rowdeps; /* ? have row dependencies been calc'ed */ |
170 | boolean coldeps; /* ? have col dependencies been calc'ed */ |
171 | mtx_range_t rng; /* Pivot range */ |
172 | mtx_region_t reg; /* Bounding region given */ |
173 | int32 rank; /* Rank of the matrix */ |
174 | real64 smallest_pivot; /* Smallest pivot accepted */ |
175 | struct qr_auxdata *qrdata; /* Data vectors for qr methods */ |
176 | struct lu_auxdata *ludata; /* Data vectors for lu methods */ |
177 | }; |
178 | /* linsol main structure */ |
179 | |
180 | /***************************************************************************\ |
181 | string functions for linsol io |
182 | \***************************************************************************/ |
183 | |
184 | char *linsolqr_rmethods() { |
185 | static char names[]="Natural, SPK1, TSPK1"; |
186 | return names; |
187 | } |
188 | |
189 | char *linsolqr_fmethods() { |
190 | static char names[] = |
191 | "SPK1/RANKI,SPK1/RANKI+ROW,Fast-SPK1/RANKI,Fast-SPK1/RANKI+ROW,Fastest-SPK1/MR-RANKI,CondQR,CPQR"; |
192 | return names; |
193 | } |
194 | |
195 | /** **/ |
196 | enum reorder_method linsolqr_rmethod_to_enum(char *name) { |
197 | if (strcmp(name,"SPK1")==0) return spk1; |
198 | if (strcmp(name,"TSPK1")==0) return tspk1; |
199 | if (strcmp(name,"Natural")==0) return natural; |
200 | return unknown_r; |
201 | } |
202 | |
203 | enum factor_method linsolqr_fmethod_to_enum(char *name) { |
204 | if (strcmp(name,"KIRK-STUFF")==0) return ranki_ka; |
205 | if (strcmp(name,"SPK1/RANKI")==0) return ranki_kw; |
206 | if (strcmp(name,"SPK1/RANKI+ROW")==0) return ranki_jz; |
207 | if (strcmp(name,"Fast-SPK1/RANKI")==0) return ranki_kw2; |
208 | if (strcmp(name,"Fast-SPK1/RANKI+ROW")==0) return ranki_jz2; |
209 | if (strcmp(name,"Fastest-SPK1/MR-RANKI")==0) return ranki_ba2; |
210 | if (strcmp(name,"CondQR")==0) return cond_qr; |
211 | if (strcmp(name,"CPQR")==0) return plain_qr; |
212 | return unknown_f; |
213 | } |
214 | |
215 | enum factor_class linsolqr_fmethod_to_fclass(enum factor_method fm) |
216 | { |
217 | switch (fm) { |
218 | /* ranki-like things */ |
219 | case ranki_kw: |
220 | case ranki_jz: |
221 | case ranki_ba2: |
222 | case ranki_kw2: |
223 | case ranki_jz2: |
224 | case ranki_ka: |
225 | return ranki; |
226 | /* implemented qr things */ |
227 | case plain_qr: |
228 | return s_qr; |
229 | /* other stuff unimplemented. all fall through */ |
230 | case cond_qr: |
231 | case opt_qr: |
232 | case ls_qr: |
233 | case gauss_ba2: |
234 | case symmetric_lu: |
235 | case unknown_f: |
236 | default: |
237 | return unknown_c; |
238 | } |
239 | } |
240 | |
241 | /** **/ |
242 | char *linsolqr_enum_to_rmethod(enum reorder_method meth) { |
243 | switch (meth) { |
244 | case spk1: return "SPK1"; |
245 | case tspk1: return "TSPK1"; |
246 | case natural: return "Natural"; |
247 | default: return "Unknown reordering method."; |
248 | } |
249 | } |
250 | |
251 | char *linsolqr_enum_to_fmethod(enum factor_method meth) { |
252 | switch (meth) { |
253 | case ranki_kw: return "SPK1/RANKI"; |
254 | case ranki_jz: return "SPK1/RANKI+ROW"; |
255 | case ranki_ba2: return "Fastest-SPK1/MR-RANKI"; |
256 | case ranki_kw2: return "Fast-SPK1/RANKI"; |
257 | case ranki_jz2: return "Fast-SPK1/RANKI+ROW"; |
258 | case ranki_ka: return "KIRK-STUFF"; |
259 | case cond_qr: return "CondQR"; |
260 | case plain_qr: return "CPQR"; |
261 | default: return "Unknown factorization method."; |
262 | } |
263 | } |
264 | |
265 | /** **/ |
266 | char *linsolqr_rmethod_description(enum reorder_method meth) { |
267 | static char sspk1[]="SPK1 reordering ala Stadtherr."; |
268 | static char stspk1[]="SPK1 reordering column wise ala Stadtherr."; |
269 | static char nat[]="Ordering as received from user"; |
270 | switch (meth) { |
271 | case spk1: return sspk1; |
272 | case tspk1: return stspk1; |
273 | case natural: return nat; |
274 | default: return "Unknown reordering method."; |
275 | } |
276 | } |
277 | |
278 | char *linsolqr_fmethod_description(enum factor_method meth) { |
279 | static char ex_ranki[] = |
280 | "SPK1 reordering with RANKI LU factoring ala Stadtherr."; |
281 | static char ex_rankib[] = |
282 | "SPK1 reordering with RANKI LU factoring ala Stadtherr, but fast."; |
283 | static char ex_rankij[]="SPK1/RANKI LU with pseudo-complete pivoting"; |
284 | static char ex_rankik[]="KIRK-STUFF/RANKI LU with pseudo-complete pivoting"; |
285 | static char ex_condqr[]="Sparse QR with condition controlled pivoting."; |
286 | static char ex_plainqr[]="Sparse QR with column pivoting."; |
287 | switch (meth) { |
288 | case ranki_kw: return ex_ranki; |
289 | case ranki_kw2: return ex_ranki; |
290 | case ranki_ba2: return ex_rankib; |
291 | case ranki_jz: return ex_rankij; |
292 | case ranki_jz2: return ex_rankij; |
293 | case ranki_ka: return ex_rankik; |
294 | case cond_qr: return ex_condqr; |
295 | case plain_qr: return ex_plainqr; |
296 | default: return "Unknown factorization method."; |
297 | } |
298 | } |
299 | |
300 | /***************************************************************************\ |
301 | internal check routines |
302 | \***************************************************************************/ |
303 | #define OK ((int)439828676) |
304 | #define DESTROYED ((int)839276847) |
305 | #define CHECK_SYSTEM(a) check_system((a),__FILE__,__LINE__) |
306 | static int check_system(linsolqr_system_t sys, char *file, int line) |
307 | /** |
308 | *** Checks the system handle. Return 0 if ok, 1 otherwise. |
309 | **/ |
310 | { |
311 | if( ISNULL(sys) ) { |
312 | FPRINTF(stderr,"ERROR: (linsolqr) check_system\n"); |
313 | FPRINTF(stderr," NULL system handle.\n"); |
314 | return 1; |
315 | } |
316 | |
317 | switch( sys->integrity ) { |
318 | case OK: |
319 | return 0; |
320 | case DESTROYED: |
321 | FPRINTF(stderr,"ERROR: (linsolqr) check_system, line %d\n",line); |
322 | FPRINTF(stderr," System was recently destroyed.\n"); |
323 | break; |
324 | default: |
325 | FPRINTF(stderr,"ERROR: (linsolqr) check_system, line %d\n",line); |
326 | FPRINTF(stderr," System was reused or never created.\n"); |
327 | break; |
328 | } |
329 | return 1; |
330 | } |
331 | |
332 | /***************************************************************************\ |
333 | external calls (and some of their internals) |
334 | \***************************************************************************/ |
335 | static void destroy_rhs_list(struct rhs_list *rl) |
336 | /** |
337 | *** Destroys rhs list. |
338 | **/ |
339 | { |
340 | while( NOTNULL(rl) ) { |
341 | struct rhs_list *p; |
342 | p = rl; |
343 | rl = rl->next; |
344 | if( NOTNULL(p->varvalue) ) |
345 | ascfree( (POINTER)(p->varvalue) ); |
346 | ascfree( (POINTER)p ); |
347 | } |
348 | } |
349 | |
350 | static struct rhs_list *find_rhs(struct rhs_list *rl,real64 *rhs) |
351 | /** |
352 | *** Searches for rhs in rhs list, returning it or NULL if not found. |
353 | **/ |
354 | { |
355 | for( ; NOTNULL(rl) ; rl = rl->next ) |
356 | if( rl->rhs == rhs ) |
357 | break; |
358 | return(rl); |
359 | } |
360 | |
361 | static struct lu_auxdata *create_ludata() |
362 | /** |
363 | *** Creates an empty zeroed ludata struct. |
364 | *** Filled up by insure_lu_capacity. |
365 | **/ |
366 | { |
367 | struct lu_auxdata *d; |
368 | d=(struct lu_auxdata *)asccalloc(1,sizeof(struct lu_auxdata)); |
369 | return d; |
370 | } |
371 | |
372 | static struct qr_auxdata *create_qrdata() |
373 | /** |
374 | *** Creates an empty zeroed qrdata struct. |
375 | *** Filled up by insure_qr_capacity. |
376 | **/ |
377 | { |
378 | struct qr_auxdata *d; |
379 | d=(struct qr_auxdata *)asccalloc(1,sizeof(struct qr_auxdata)); |
380 | return d; |
381 | } |
382 | |
383 | static void destroy_ludata(struct lu_auxdata *d) { |
384 | /** |
385 | *** Conditionally destroys a ludata struct and its parts. |
386 | **/ |
387 | if (NOTNULL(d)) { |
388 | if (NOTNULL(d->pivlist)) ascfree(d->pivlist); |
389 | d->pivlist=NULL; |
390 | if (NOTNULL(d->tmp)) ascfree(d->tmp); |
391 | d->tmp=NULL; |
392 | ascfree(d); |
393 | } |
394 | } |
395 | |
396 | static void destroy_qrdata(struct qr_auxdata *d) { |
397 | /** |
398 | *** Conditionally destroys a qrdata struct and its parts. |
399 | **/ |
400 | if (NOTNULL(d)) { |
401 | if (NOTNULL(d->alpha)) ascfree(d->alpha); |
402 | if (NOTNULL(d->sigma)) ascfree(d->sigma); |
403 | if (NOTNULL(d->tau)) ascfree(d->tau); |
404 | if (NOTNULL(d->hhcol)) ascfree(d->hhcol); |
405 | if (NOTNULL(d->hhrow)) ascfree(d->hhrow); |
406 | if (NOTNULL(d->fill)) ascfree(d->fill); |
407 | |
408 | ascfree(d); |
409 | } |
410 | } |
411 | |
412 | linsolqr_system_t linsolqr_create() |
413 | { |
414 | linsolqr_system_t sys; |
415 | |
416 | sys = (linsolqr_system_t)ascmalloc( sizeof(struct linsolqr_header) ); |
417 | sys->fclass = unknown_c; |
418 | sys->fmethod = unknown_f; |
419 | sys->rmethod = unknown_r; |
420 | sys->integrity = OK; |
421 | sys->capacity = 0; |
422 | sys->coef = NULL; |
423 | sys->rl = NULL; |
424 | sys->rlength = 0; |
425 | sys->pivot_zero = 1e-12; /* default value */ |
426 | sys->ptol = 0.1; /* default value */ |
427 | sys->ctol = 0.1; /* default value */ |
428 | sys->dtol = LINQR_DROP_TOLERANCE; /* default value */ |
429 | sys->factors = NULL; |
430 | sys->inverse = NULL; |
431 | sys->factored = FALSE; |
432 | sys->rowdeps = FALSE; |
433 | sys->coldeps = FALSE; |
434 | sys->rng.low = sys->rng.high = -1; |
435 | sys->reg.row.low = sys->reg.row.high = -1; |
436 | sys->reg.col.low = sys->reg.col.high = -1; |
437 | sys->rank = -1; |
438 | sys->smallest_pivot = MAXDOUBLE; |
439 | sys->qrdata = NULL; |
440 | sys->ludata = NULL; |
441 | return(sys); |
442 | } |
443 | |
444 | void linsolqr_destroy(linsolqr_system_t sys) |
445 | { |
446 | if(CHECK_SYSTEM(sys)) { |
447 | FPRINTF(stderr,"Bad linsolqr_system_t found. Not destroyed.\n"); |
448 | return; |
449 | } |
450 | if( NOTNULL(sys->coef) ) { |
451 | FPRINTF(stderr,"Notice: (linsolqr) non-NULL coefficient matrix\n"); |
452 | FPRINTF(stderr," in linsolqr system not being\n"); |
453 | FPRINTF(stderr," destroyed with linsolqr_system_t.\n"); |
454 | } |
455 | if( NOTNULL(sys->inverse) ) |
456 | mtx_destroy(sys->inverse); |
457 | if( NOTNULL(sys->factors) ) |
458 | mtx_destroy(sys->factors); |
459 | destroy_rhs_list(sys->rl); |
460 | destroy_qrdata(sys->qrdata); |
461 | destroy_ludata(sys->ludata); |
462 | sys->integrity = DESTROYED; |
463 | ascfree( (POINTER)sys ); |
464 | } |
465 | |
466 | void linsolqr_set_matrix(linsolqr_system_t sys,mtx_matrix_t mtx) |
467 | { |
468 | if(CHECK_SYSTEM(sys)) { |
469 | FPRINTF(stderr,"Bad linsolqr_system_t found. coef mtx not set.\n"); |
470 | return; |
471 | } |
472 | sys->coef = mtx; |
473 | } |
474 | |
475 | |
476 | void linsolqr_set_region(linsolqr_system_t sys,mtx_region_t region) |
477 | { |
478 | if(CHECK_SYSTEM(sys)) { |
479 | FPRINTF(stderr,"Bad linsolqr_system_t found. coef mtx not set.\n"); |
480 | return; |
481 | } |
482 | sys->reg = region; |
483 | } |
484 | |
485 | |
486 | mtx_matrix_t linsolqr_get_matrix(linsolqr_system_t sys) |
487 | { |
488 | CHECK_SYSTEM(sys); |
489 | return( sys->coef ); |
490 | } |
491 | |
492 | mtx_region_t *linsolqr_get_region(linsolqr_system_t sys) |
493 | { |
494 | return &(sys->reg); |
495 | } |
496 | |
497 | void linsolqr_add_rhs(linsolqr_system_t sys, |
498 | real64 *rhs, |
499 | boolean transpose) |
500 | { |
501 | struct rhs_list *rl; |
502 | |
503 | if(CHECK_SYSTEM(sys)) { |
504 | FPRINTF(stderr,"Bad linsolqr_system_t found. rhs not added.\n"); |
505 | return; |
506 | } |
507 | rl = find_rhs(sys->rl,rhs); |
508 | if( NOTNULL(rl) ) { |
509 | return; |
510 | } else { |
511 | if( NOTNULL(rhs) ) { |
512 | rl = (struct rhs_list *)ascmalloc( sizeof(struct rhs_list) ); |
513 | rl->rhs = rhs; |
514 | rl->varvalue = NULL; |
515 | rl->solved = FALSE; |
516 | rl->transpose = transpose; |
517 | rl->next = sys->rl; |
518 | sys->rl = rl; |
519 | ++(sys->rlength); |
520 | if (sys->capacity>0) { |
521 | rl->varvalue = |
522 | (real64 *)ascmalloc(sys->capacity*sizeof(real64)); |
523 | } |
524 | } |
525 | } |
526 | } |
527 | |
528 | void linsolqr_remove_rhs(linsolqr_system_t sys,real64 *rhs) |
529 | { |
530 | struct rhs_list **q; |
531 | |
532 | if(CHECK_SYSTEM(sys)) { |
533 | FPRINTF(stderr,"Bad linsolqr_system_t found. rhs not removed.\n"); |
534 | return; |
535 | } |
536 | for( q = &(sys->rl) ; NOTNULL(*q) ; q = &((*q)->next) ) |
537 | if( (*q)->rhs == rhs ) |
538 | break; |
539 | if( NOTNULL(*q) ) { |
540 | struct rhs_list *p; |
541 | p = *q; |
542 | *q = p->next; |
543 | if( NOTNULL(p->varvalue) ) |
544 | ascfree( (POINTER)(p->varvalue) ); |
545 | ascfree( (POINTER)p ); |
546 | --(sys->rlength); |
547 | } else if( NOTNULL(rhs) ) { |
548 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_remove_rhs\n"); |
549 | FPRINTF(stderr," Rhs does not exist.\n"); |
550 | } |
551 | } |
552 | |
553 | int32 linsolqr_number_of_rhs(linsolqr_system_t sys) |
554 | { |
555 | CHECK_SYSTEM(sys); |
556 | return( sys->rlength ); |
557 | } |
558 | |
559 | real64 *linsolqr_get_rhs(linsolqr_system_t sys,int n) |
560 | { |
561 | struct rhs_list *rl; |
562 | int count; |
563 | |
564 | CHECK_SYSTEM(sys); |
565 | |
566 | count = sys->rlength - 1 - n; |
567 | if( count < 0 ) return(NULL); |
568 | for( rl = sys->rl ; count > 0 && NOTNULL(rl) ; rl = rl->next ) |
569 | --count; |
570 | return( ISNULL(rl) ? NULL : rl->rhs ); |
571 | } |
572 | |
573 | void linsolqr_matrix_was_changed(linsolqr_system_t sys) |
574 | { |
575 | if(CHECK_SYSTEM(sys)) { |
576 | FPRINTF(stderr, |
577 | "Bad linsolqr_system_t found. matrix change message ignored.\n"); |
578 | return; |
579 | } |
580 | sys->rowdeps = sys->coldeps = sys->factored = FALSE; |
581 | } |
582 | |
583 | void linsolqr_rhs_was_changed(linsolqr_system_t sys, real64 *rhs) |
584 | { |
585 | struct rhs_list *rl; |
586 | |
587 | if(CHECK_SYSTEM(sys)) { |
588 | FPRINTF(stderr,"Bad linsolqr_system_t found. rhs change ignored.\n"); |
589 | return; |
590 | } |
591 | rl = find_rhs(sys->rl,rhs); |
592 | if( NOTNULL(rl) ) { |
593 | rl->solved = FALSE; |
594 | } else if( NOTNULL(rhs) ) { |
595 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_rhs_was_modified\n"); |
596 | FPRINTF(stderr," Rhs does not exist.\n"); |
597 | } |
598 | } |
599 | |
600 | void linsolqr_set_pivot_zero(linsolqr_system_t sys,real64 pivot_zero) |
601 | { |
602 | if(CHECK_SYSTEM(sys)) { |
603 | FPRINTF(stderr,"Bad linsolqr_system_t found. set_pivot_zero ignored.\n"); |
604 | return; |
605 | } |
606 | if( pivot_zero < D_ZERO ) { |
607 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_pivot_zero\n"); |
608 | FPRINTF(stderr," Invalid pivot zero of %g\n",pivot_zero); |
609 | } else { |
610 | sys->pivot_zero = pivot_zero; |
611 | linsolqr_matrix_was_changed(sys); |
612 | } |
613 | } |
614 | |
615 | real64 linsolqr_pivot_zero(linsolqr_system_t sys) |
616 | { |
617 | CHECK_SYSTEM(sys); |
618 | return( sys->pivot_zero ); |
619 | } |
620 | |
621 | void linsolqr_set_pivot_tolerance(linsolqr_system_t sys, real64 ptol) |
622 | { |
623 | if(CHECK_SYSTEM(sys)) { |
624 | FPRINTF(stderr,"Bad linsolqr_system_t found. set_pivot_tol ignored.\n"); |
625 | return; |
626 | } |
627 | if( ptol < D_ZERO || ptol > D_ONE ) { |
628 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_pivot_tolerance\n"); |
629 | FPRINTF(stderr," Invalid pivot tolerance of %g\n",ptol); |
630 | } else { |
631 | sys->ptol = ptol; |
632 | linsolqr_matrix_was_changed(sys); |
633 | } |
634 | } |
635 | |
636 | void linsolqr_set_condition_tolerance(linsolqr_system_t sys, real64 ctol) |
637 | { |
638 | if(CHECK_SYSTEM(sys)) { |
639 | FPRINTF(stderr, |
640 | "Bad linsolqr_system_t found. set_condition_tolerance ignored.\n"); |
641 | return; |
642 | } |
643 | if( ctol < D_ZERO || ctol > D_ONE ) { |
644 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_condition_tolerance\n"); |
645 | FPRINTF(stderr," Invalid condition tolerance of %g\n",ctol); |
646 | } else { |
647 | sys->ctol = ctol; |
648 | linsolqr_matrix_was_changed(sys); |
649 | } |
650 | } |
651 | |
652 | void linsolqr_set_drop_tolerance(linsolqr_system_t sys, real64 dtol) |
653 | { |
654 | if(CHECK_SYSTEM(sys)) { |
655 | FPRINTF(stderr, |
656 | "Bad linsolqr_system_t found. set_drop_tolerance ignored.\n"); |
657 | return; |
658 | } |
659 | if( dtol < D_ZERO || dtol > D_ONE ) { |
660 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_drop_tolerance\n"); |
661 | FPRINTF(stderr," Invalid drop tolerance of %g\n",dtol); |
662 | } else { |
663 | sys->dtol = dtol; |
664 | linsolqr_matrix_was_changed(sys); |
665 | } |
666 | } |
667 | |
668 | real64 linsolqr_pivot_tolerance(linsolqr_system_t sys) |
669 | { |
670 | CHECK_SYSTEM(sys); |
671 | return( sys->ptol ); |
672 | } |
673 | |
674 | real64 linsolqr_condition_tolerance(linsolqr_system_t sys) |
675 | { |
676 | CHECK_SYSTEM(sys); |
677 | return( sys->ctol ); |
678 | } |
679 | |
680 | real64 linsolqr_drop_tolerance(linsolqr_system_t sys) |
681 | { |
682 | CHECK_SYSTEM(sys); |
683 | return( sys->dtol ); |
684 | } |
685 | |
686 | extern enum factor_class linsolqr_fclass(linsolqr_system_t sys) |
687 | { |
688 | CHECK_SYSTEM(sys); |
689 | return( sys->fclass ); |
690 | } |
691 | extern enum factor_method linsolqr_fmethod(linsolqr_system_t sys) |
692 | { |
693 | CHECK_SYSTEM(sys); |
694 | return( sys->fmethod ); |
695 | } |
696 | extern enum reorder_method linsolqr_rmethod(linsolqr_system_t sys) |
697 | { |
698 | CHECK_SYSTEM(sys); |
699 | return( sys->rmethod ); |
700 | } |
701 | |
702 | int32 linsolqr_rank(linsolqr_system_t sys) |
703 | { |
704 | CHECK_SYSTEM(sys); |
705 | if( !sys->factored ) { |
706 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_rank\n"); |
707 | FPRINTF(stderr," System not factored yet.\n"); |
708 | } |
709 | return(sys->rank); |
710 | } |
711 | |
712 | real64 linsolqr_smallest_pivot(linsolqr_system_t sys) |
713 | { |
714 | CHECK_SYSTEM(sys); |
715 | #if LINSOL_DEBUG |
716 | if( !sys->factored ) { |
717 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_smallest_pivot\n"); |
718 | FPRINTF(stderr," System not factored yet.\n"); |
719 | } |
720 | #endif |
721 | return(sys->smallest_pivot); |
722 | } |
723 | |
724 | /***************************************************************************\ |
725 | Commonly used internal functions for sparse linear solvers based on mtx. |
726 | void insure_capacity(sys) |
727 | void insure_lu_capacity(sys) |
728 | void insure_qr_capacity(sys) |
729 | void forward_substitute(sys,rvec,transpose) |
730 | void backward_substitute(sys,rvec,transpose) |
731 | macro SQR(x) |
732 | int find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi) |
733 | \***************************************************************************/ |
734 | |
735 | static real64 *raise_capacity(real64 *vec, |
736 | int32 oldcap, |
737 | int32 newcap) |
738 | /** |
739 | *** Raises the capacity of the array and returns a new array. |
740 | *** It is assumed that oldcap < newcap. vec is destroyed or |
741 | *** returned as appropriate. |
742 | *** If !NDEBUG, the vector expanded is also set to 0. |
743 | **/ |
744 | { |
745 | real64 *newvec=NULL; |
746 | int i; |
747 | if (newcap < oldcap) { |
748 | #ifndef NDEBUG |
749 | for (i = 0; i < newcap; i++) { |
750 | newvec[i] = 0.0; |
751 | } |
752 | #endif |
753 | return vec; |
754 | } |
755 | if (NOTNULL(vec)) { |
756 | /* don't call realloc on null with newcap 0 or it frees */ |
757 | newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64))); |
758 | } else { |
759 | newvec=(newcap > 0 ? |
760 | (real64 *)ascmalloc( newcap * sizeof(real64) ) : NULL); |
761 | } |
762 | #ifndef NDEBUG |
763 | for (i = 0; i < newcap; i++) { |
764 | newvec[i] = 0.0; |
765 | } |
766 | #endif |
767 | return newvec; |
768 | } |
769 | |
770 | static int32 *raise_qr_int_capacity(int32 *vec, |
771 | int32 oldcap, |
772 | int32 newcap) |
773 | /** |
774 | *** Raises the capacity of the index array and returns a new array. |
775 | *** It is assumed that oldcap < newcap. vec is destroyed or |
776 | *** returned as appropriate. vec returned is zeroed. |
777 | *** calling with newcap=0 does not force deallocation. |
778 | **/ |
779 | { |
780 | int32 *newvec=NULL; |
781 | if (newcap < oldcap) |
782 | return vec; |
783 | if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */ |
784 | newvec=(int32 *)ascrealloc(vec,(newcap * sizeof(int32))); |
785 | mtx_zero_int32(vec,newcap); |
786 | } else { |
787 | newvec= (newcap > 0 ? |
788 | (int32 *)asccalloc( newcap , sizeof(int32) ) : NULL); |
789 | } |
790 | return newvec; |
791 | } |
792 | |
793 | static real64 *raise_qr_real_capacity(real64 *vec, |
794 | int32 oldcap, |
795 | int32 newcap) |
796 | /** |
797 | *** Raises the capacity of the real array and returns a new array. |
798 | *** It is assumed that oldcap < newcap. vec is destroyed or |
799 | *** returned as appropriate. vec returned is zeroed. |
800 | *** calling with newcap=0 does not force deallocation. |
801 | **/ |
802 | { |
803 | real64 *newvec=NULL; |
804 | if (newcap < oldcap) |
805 | return vec; |
806 | if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */ |
807 | newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64))); |
808 | mtx_zero_real64(vec,newcap); |
809 | } else { |
810 | newvec= (newcap > 0 ? |
811 | (real64 *)asccalloc( newcap , sizeof(real64) ) : NULL); |
812 | } |
813 | return newvec; |
814 | } |
815 | |
816 | static struct qr_fill_t *raise_qr_fill_capacity(struct qr_fill_t *vec, |
817 | int32 oldcap, |
818 | int32 newcap) |
819 | /** |
820 | *** Raises the capacity of the fill array and returns a new array. |
821 | *** It is assumed that oldcap < newcap. vec is destroyed or |
822 | *** returned as appropriate. vec returned is zeroed. |
823 | *** calling with newcap=0 does not force deallocation. |
824 | **/ |
825 | { |
826 | struct qr_fill_t *newvec=NULL; |
827 | if (newcap < oldcap) |
828 | return vec; |
829 | if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */ |
830 | newvec=(struct qr_fill_t *) |
831 | ascrealloc(vec,(newcap * sizeof(struct qr_fill_t))); |
832 | mtx_zero_char((char *)vec,newcap*sizeof(struct qr_fill_t)); |
833 | } else { |
834 | newvec= (newcap > 0 ? |
835 | (struct qr_fill_t *)asccalloc( newcap , sizeof(struct qr_fill_t) ) : |
836 | NULL); |
837 | } |
838 | return newvec; |
839 | } |
840 | |
841 | static void insure_capacity(linsolqr_system_t sys) |
842 | /** |
843 | *** Insures that the capacity of all of the solution vectors |
844 | *** for each rhs is large enough. |
845 | *** The above implies a malloc if varvalue is null. |
846 | *** Assumes varvalue are at sys->capacity already. |
847 | **/ |
848 | { |
849 | int32 req_cap; |
850 | req_cap = mtx_capacity(sys->coef); |
851 | |
852 | if( req_cap > sys->capacity ) { |
853 | struct rhs_list *rl; |
854 | |
855 | for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next ) |
856 | rl->varvalue = raise_capacity(rl->varvalue,sys->capacity,req_cap); |
857 | sys->capacity = req_cap; |
858 | } |
859 | } |
860 | |
861 | static void insure_lu_capacity(linsolqr_system_t sys) |
862 | /** |
863 | *** Insures that the capacity of all of the ludata vectors. |
864 | *** The above implies a malloc if vector is null. |
865 | *** If not null, implies an extension if needed. |
866 | **/ |
867 | { |
868 | int32 req_cap; |
869 | if (!sys || !(sys->coef) || !(sys->ludata)) { |
870 | FPRINTF(stderr,"linsolqr: insure_lu_capacity called with NULL pointer"); |
871 | return; |
872 | } |
873 | req_cap = mtx_capacity(sys->coef); |
874 | |
875 | if( req_cap > sys->ludata->cap ) { |
876 | sys->ludata->pivlist = |
877 | raise_capacity(sys->ludata->pivlist,sys->ludata->cap,req_cap); |
878 | sys->ludata->tmp = |
879 | raise_capacity(sys->ludata->tmp,sys->ludata->cap,req_cap); |
880 | sys->ludata->cap = req_cap; |
881 | } |
882 | } |
883 | |
884 | static void insure_qr_capacity(linsolqr_system_t sys) |
885 | /** |
886 | *** Insures that the capacity of all of the qrdata vectors |
887 | *** is large enough. |
888 | *** The above implies a calloc if vector is null. |
889 | *** If not null, implies an extension and zeroing of the vector. |
890 | *** Also zeroes the simple elements of the qrdata. |
891 | **/ |
892 | { |
893 | int32 req_cap; |
894 | if (!sys || !(sys->coef) || !(sys->qrdata)) { |
895 | FPRINTF(stderr,"linsolqr: insure_qr_capacity called with NULL pointer"); |
896 | return; |
897 | } |
898 | req_cap = mtx_capacity(sys->coef); |
899 | |
900 | if( req_cap > sys->qrdata->cap ) { |
901 | sys->qrdata->alpha = |
902 | raise_qr_real_capacity(sys->qrdata->alpha,sys->qrdata->cap,req_cap); |
903 | sys->qrdata->sigma = |
904 | raise_qr_real_capacity(sys->qrdata->sigma,sys->qrdata->cap,req_cap); |
905 | sys->qrdata->tau = |
906 | raise_qr_real_capacity(sys->qrdata->tau,sys->qrdata->cap,req_cap); |
907 | sys->qrdata->hhcol = |
908 | raise_qr_real_capacity(sys->qrdata->hhcol,sys->qrdata->cap,req_cap); |
909 | sys->qrdata->hhrow = |
910 | raise_qr_real_capacity(sys->qrdata->hhrow,sys->qrdata->cap,req_cap); |
911 | |
912 | sys->qrdata->sp.data = |
913 | raise_qr_real_capacity(sys->qrdata->sp.data,sys->qrdata->cap,req_cap); |
914 | sys->qrdata->sp.idata = |
915 | raise_qr_int_capacity(sys->qrdata->sp.idata,sys->qrdata->cap,req_cap); |
916 | |
917 | sys->qrdata->fill = |
918 | raise_qr_fill_capacity(sys->qrdata->fill,sys->qrdata->cap,req_cap); |
919 | |
920 | sys->qrdata->cap = req_cap; |
921 | sys->qrdata->sp.cap = req_cap; |
922 | sys->qrdata->sp.len = 0; |
923 | } |
924 | sys->qrdata->nu=D_ZERO; |
925 | sys->qrdata->anu=D_ZERO; |
926 | sys->qrdata->asubnu=D_ZERO; |
927 | } |
928 | |
929 | static void forward_substitute(linsolqr_system_t sys, |
930 | real64 *arr, |
931 | boolean transpose) |
932 | /** |
933 | *** Forward substitute. It is assumed that the L (or U) part of |
934 | *** sys->factors is computed. This function converts c to x in place. The |
935 | *** values are stored in arr indexed by original row number (or original |
936 | *** column number). |
937 | *** |
938 | *** transpose = FALSE: transpose = TRUE: |
939 | *** T |
940 | *** L x = c U x = c |
941 | *** |
942 | *** The following formula holds: |
943 | *** 0<=k<r ==> x(k) = [c(k) - L(k,(0..k-1)) dot x(0..k-1)] / L(k,k) |
944 | *** or |
945 | *** 0<=k<r ==> x(k) = [c(k) - U((0..k-1),k) dot x(0..k-1)] / U(k,k) |
946 | *** |
947 | *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k] |
948 | *** and in matrix. |
949 | **/ |
950 | { |
951 | mtx_range_t dot_rng; |
952 | mtx_coord_t nz; |
953 | real64 sum, *pivlist; |
954 | mtx_matrix_t mtx; |
955 | int32 dotlim; |
956 | boolean nonzero_found=FALSE; |
957 | |
958 | mtx=sys->factors; |
959 | pivlist=sys->ludata->pivlist; |
960 | dot_rng.low = sys->rng.low; |
961 | dotlim=dot_rng.low+sys->rank; |
962 | if (transpose) { /* arr is indexed by original column number */ |
963 | for( nz.col=dot_rng.low; nz.col < dotlim; ++(nz.col) ) { |
964 | register int32 org_col; |
965 | |
966 | dot_rng.high = nz.col - 1; |
967 | org_col = mtx_col_to_org(mtx,nz.col); |
968 | if (arr[org_col]!=D_ZERO) nonzero_found=TRUE; |
969 | if (nonzero_found) { |
970 | sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE); |
971 | /* arr[org_col] = (arr[org_col] - sum) / D_ONE */; |
972 | arr[org_col] -=sum; |
973 | } |
974 | } |
975 | |
976 | } else { /* arr is indexed by original row number */ |
977 | for( nz.row=dot_rng.low; nz.row < dotlim; ++(nz.row) ) { |
978 | register int32 org_row; |
979 | |
980 | dot_rng.high = nz.row - 1; |
981 | org_row = mtx_row_to_org(mtx,nz.row); |
982 | if (arr[org_row]!=D_ZERO) nonzero_found=TRUE; |
983 | if (nonzero_found) { |
984 | sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE); |
985 | /* |
986 | nz.col = nz.row; |
987 | arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz); |
988 | */ |
989 | arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row]; |
990 | } |
991 | } |
992 | } |
993 | } |
994 | |
995 | static void backward_substitute(linsolqr_system_t sys, |
996 | real64 *arr, |
997 | boolean transpose) |
998 | /** |
999 | *** Backward substitute. It is assumed that the U (or L) part of |
1000 | *** sys->factors is computed. This function converts rhs to c in place. The |
1001 | *** values are stored in arr indexed by original row number (or original |
1002 | *** column number). |
1003 | *** |
1004 | *** transpose = FALSE: transpose = TRUE: |
1005 | *** T |
1006 | *** U c = rhs L c = rhs |
1007 | *** |
1008 | *** The following formula holds: |
1009 | *** transpose=FALSE: (the usual for J.dx=-f where rhs is -f) |
1010 | *** 0<=k<r ==> c(k) = [rhs(k) - U(k,(k+1..r-1)) dot c(k+1..r-1)] / U(k,k) |
1011 | *** working up from the bottom. |
1012 | *** or |
1013 | *** transpose=TRUE: |
1014 | *** 0<=k<r ==> c(k) = [rhs(k) - L((k+1..r-1),k) dot c(k+1..r-1)] / L(k,k) |
1015 | *** working right to left. |
1016 | *** |
1017 | *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k] |
1018 | *** and in matrix. |
1019 | **/ |
1020 | { |
1021 | mtx_range_t dot_rng; |
1022 | mtx_coord_t nz; |
1023 | real64 sum, *pivlist; |
1024 | mtx_matrix_t mtx; |
1025 | int32 dotlim; |
1026 | boolean nonzero_found=FALSE; /* once TRUE, substitution must be done |
1027 | over remaining rows/cols */ |
1028 | |
1029 | dot_rng.high = sys->rng.low + sys->rank - 1; |
1030 | dotlim=sys->rng.low; |
1031 | mtx=sys->factors; |
1032 | pivlist=sys->ludata->pivlist; |
1033 | if (transpose) { /* arr is indexed by original column number */ |
1034 | for( nz.col = dot_rng.high ; nz.col >= dotlim ; --(nz.col) ) { |
1035 | register int32 org_col; |
1036 | |
1037 | dot_rng.low = nz.col + 1; |
1038 | |
1039 | org_col = mtx_col_to_org(mtx,nz.col); |
1040 | if (arr[org_col]!=D_ZERO) nonzero_found=TRUE; |
1041 | if (nonzero_found) { |
1042 | sum= mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE); |
1043 | /* |
1044 | reminders: |
1045 | nz.row = nz.col; |
1046 | arr[org_col] = (arr[org_col] - sum) / mtx_value(mtx,&nz); |
1047 | */ |
1048 | arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col]; |
1049 | } |
1050 | } |
1051 | } else { /* arr is indexed by original row number */ |
1052 | /* we are working from the bottom up */ |
1053 | for( nz.row = dot_rng.high ; nz.row >= dotlim ; --(nz.row) ) { |
1054 | register int32 org_row; |
1055 | |
1056 | dot_rng.low = nz.row + 1; |
1057 | org_row = mtx_row_to_org(mtx,nz.row); |
1058 | |
1059 | if (arr[org_row]!=D_ZERO) nonzero_found=TRUE; |
1060 | if (nonzero_found) { |
1061 | sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE); |
1062 | /* |
1063 | reminders: |
1064 | nz.row = nz.col; |
1065 | arr[org_row] = (arr[org_row] - sum) / D_ONE; |
1066 | */ |
1067 | arr[org_row] -= sum; |
1068 | } |
1069 | } |
1070 | } |
1071 | } |
1072 | |
1073 | #ifdef SQR |
1074 | #undef SQR |
1075 | #endif |
1076 | #define SQR(x) ((x)*(x)) |
1077 | /* |
1078 | * int32 find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi); |
1079 | * real64 *vec,*rvec,tol; |
1080 | * int32 len, *ivec, *maxi; |
1081 | * |
1082 | * Search array vec of positive numbers for the first entry, k, which passes |
1083 | * (vec[k] >= tol*vecmax) where vecmax is the largest value in vec. |
1084 | * vec is an array len long. rvec, ivec are (worst case) len long. |
1085 | * *maxi will be set to the index of the first occurence of the largest |
1086 | * number in vec which is >= eps. |
1087 | * 0.0 <= tol <= 1. |
1088 | * If tol <=0, no search is done (*maxi = 0, k = 0). |
1089 | * Eps is an absolute number which vec values must be >= to before |
1090 | * being eligible for the tol test. |
1091 | * |
1092 | * Best case, highly nonlinear feature: |
1093 | * if on entry *maxi = len, we will do a simplified search for the |
1094 | * first k which satisfies tol and eps tests, based on the value |
1095 | * stored at vec[len-1]. |
1096 | * |
1097 | * We could allocate and deallocate rvec/ivec since they are |
1098 | * temporaries, but it is more efficient for caller to manage that. |
1099 | * If tol>=1.0, rvec and ivec are ignored and may be NULL. |
1100 | * NOTE: we are going on value vec[i], not fabs(vec[i]). |
1101 | * Returns k. |
1102 | * Remember: GIGO. |
1103 | * Failure modes- if all numbers are <= 0.0, k and *maxi will be returned 0. |
1104 | * Failure modes- if all numbers are < eps, k and *maxi will be returned 0. |
1105 | * |
1106 | * find_pivot_index, should we implement, would search an int vector. |
1107 | * |
1108 | * Hint: if you know you have not increased any values in vec and have not |
1109 | * changed the value at vec[maxi] before a subsequent search, then call |
1110 | * with len = previous maxi+1 to reduce the search time. |
1111 | */ |
1112 | static int32 find_pivot_number(const real64 *vec, |
1113 | const int32 len, |
1114 | const real64 tol, |
1115 | const real64 eps, |
1116 | int32 * const ivec, |
1117 | real64 * const rvec, |
1118 | int32 * const maxi) |
1119 | { |
1120 | int32 j,k; |
1121 | if (tol <= D_ZERO ) { |
1122 | *maxi = 0; |
1123 | return 0; |
1124 | } |
1125 | if (tol >= D_ONE) { |
1126 | register real64 biggest = MAX(eps,D_ZERO); |
1127 | /* get the biggest, period */ |
1128 | k = 0; |
1129 | for (j=0; j < len; j++) { |
1130 | if (vec[j] > biggest) { |
1131 | biggest = vec[j]; |
1132 | k = j; |
1133 | } |
1134 | } |
1135 | *maxi = k; |
1136 | } else { |
1137 | int32 bigi; |
1138 | bigi = 0; |
1139 | rvec[0] = eps; |
1140 | ivec[0] = 0; |
1141 | if (*maxi==len) { |
1142 | /* cheap search against eps and tol, no list. */ |
1143 | register real64 thold; |
1144 | thold = tol * vec[len-1]; |
1145 | if (thold >= eps) { |
1146 | for (k = 0; k < len && vec[k] < thold; k++); |
1147 | } else { |
1148 | for (k = 0; k < len && vec[k] < eps; k++); |
1149 | } |
1150 | /* adjust maxi to still point at last location, as indicated by len */ |
1151 | (*maxi)--; |
1152 | } else { |
1153 | real64 rlast; |
1154 | rlast = eps; |
1155 | /* create short list */ |
1156 | for (j=0; j < len; j++) { |
1157 | while (j < len && vec[j] <= rlast) j++; /* skip to next max */ |
1158 | if (j < len) { |
1159 | ivec[bigi] = j; |
1160 | rvec[bigi] = vec[j]; |
1161 | bigi++; /* bigi ends up being len of rvec or, if vec all<= eps, 0 */ |
1162 | rlast = rvec[bigi-1]; |
1163 | } |
1164 | } |
1165 | if (bigi == 0) { |
1166 | *maxi = k = 0; |
1167 | } else { |
1168 | register real64 thold; |
1169 | /* Note: if bigi is enormous, we should do a binary search, |
1170 | not linear. */ |
1171 | *maxi = ivec[bigi-1]; |
1172 | thold = tol * rvec[bigi-1]; |
1173 | /* get pivot from shortlist, searching backward */ |
1174 | if (thold >= eps) { |
1175 | for (k = bigi-1; k >= 0 && rvec[k] >= thold; k--); |
1176 | } else { |
1177 | for (k = bigi-1; k >= 0 && rvec[k] >= eps; k--); |
1178 | } |
1179 | /* translate pivot to vec index */ |
1180 | k = ivec[k+1]; |
1181 | } |
1182 | } |
1183 | } |
1184 | return k; |
1185 | } |
1186 | |
1187 | static void calc_dependent_rows_ranki1(linsolqr_system_t sys) |
1188 | { |
1189 | mtx_coord_t nz; |
1190 | real64 value; |
1191 | mtx_range_t colrange; |
1192 | mtx_range_t rowrange; |
1193 | real64 *lc; |
1194 | mtx_matrix_t mtx; |
1195 | |
1196 | sys->rowdeps = TRUE; |
1197 | if( ( (sys->reg.row.low == sys->rng.low) && |
1198 | ( sys->reg.row.high == sys->rng.low+sys->rank-1 ) |
1199 | ) || sys->rank==0 ) |
1200 | return; |
1201 | |
1202 | lc = sys->ludata->tmp; |
1203 | colrange.low = sys->rng.low; |
1204 | colrange.high = colrange.low + sys->rank - 1; |
1205 | rowrange.low = sys->rng.high; |
1206 | rowrange.high = sys->rng.low+sys->rank; |
1207 | mtx=sys->factors; |
1208 | |
1209 | nz.row = sys->reg.row.low; |
1210 | for( ; nz.row <= sys->reg.row.high; nz.row++ ) { |
1211 | if( nz.row == sys->rng.low ) { |
1212 | nz.row = rowrange.high-1; |
1213 | continue; |
1214 | } |
1215 | mtx_zero_real64(lc,(sys->capacity)); |
1216 | mtx_org_row_vec(mtx,nz.row,lc,&colrange); |
1217 | if( nz.row < rowrange.high || nz.row > rowrange.low ) |
1218 | backward_substitute(sys,lc,TRUE); |
1219 | forward_substitute(sys,lc,TRUE); |
1220 | mtx_clear_row(mtx,nz.row,&colrange); |
1221 | for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) { |
1222 | value = lc[mtx_col_to_org(mtx,nz.col)]; |
1223 | if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value); |
1224 | } |
1225 | } |
1226 | } |
1227 | |
1228 | |
1229 | static void calc_dependent_cols_ranki1(linsolqr_system_t sys) |
1230 | { |
1231 | mtx_coord_t nz; |
1232 | real64 value; |
1233 | mtx_range_t rowrange; |
1234 | mtx_range_t colrange; |
1235 | real64 *lc; |
1236 | mtx_matrix_t mtx; |
1237 | |
1238 | sys->coldeps = TRUE; |
1239 | if( ( (sys->reg.col.low == sys->rng.low) && |
1240 | ( sys->reg.col.high == sys->rng.low+sys->rank-1 ) |
1241 | ) || sys->rank==0 ) |
1242 | return; |
1243 | |
1244 | lc = sys->ludata->tmp; |
1245 | rowrange.low = sys->rng.low; |
1246 | rowrange.high = rowrange.low + sys->rank - 1; |
1247 | colrange.high = sys->rng.low+sys->rank; |
1248 | colrange.low = sys->rng.high; |
1249 | mtx=sys->factors; |
1250 | |
1251 | nz.col = sys->reg.col.low; |
1252 | for( ; nz.col <= sys->reg.col.high; nz.col++ ) { |
1253 | if( nz.col == sys->rng.low ) { |
1254 | nz.col = colrange.high-1; |
1255 | continue; |
1256 | } |
1257 | mtx_zero_real64(lc,sys->capacity); |
1258 | mtx_org_col_vec(mtx,nz.col,lc,&rowrange); |
1259 | if( nz.col < colrange.high || nz.col > colrange.low ) |
1260 | backward_substitute(sys,lc,FALSE); |
1261 | forward_substitute(sys,lc,FALSE); |
1262 | mtx_clear_col(mtx,nz.col,&rowrange); |
1263 | for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) { |
1264 | value = lc[mtx_row_to_org(mtx,nz.row)]; |
1265 | if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value); |
1266 | } |
1267 | } |
1268 | } |
1269 | |
1270 | /** |
1271 | *** Given a matrix and a diagonal range, sets the diagonal elements to 0 |
1272 | *** but does not delete them in the range low to high inclusive. |
1273 | *** worst cost= k*(nonzeros in rows of range treated). |
1274 | *** likely cost= k*(nonzeros in column pivoted rows of range treated) |
1275 | *** since you can smartly put in the diagonal last and find |
1276 | *** it first on many occasions. |
1277 | *** This is a good bit cheaper than deleting the elements unless one |
1278 | *** expects thousands of solves. |
1279 | **/ |
1280 | static void zero_diagonal_elements(mtx_matrix_t mtx, |
1281 | int32 low, int32 high) |
1282 | { |
1283 | mtx_coord_t nz; |
1284 | for (nz.row = low; nz.row <= high; nz.row++) { |
1285 | nz.col = nz.row; |
1286 | mtx_set_value(mtx,&nz,0.0); |
1287 | } |
1288 | } |
1289 | |
1290 | static void zero_unpivoted_vars(linsolqr_system_t sys, |
1291 | real64 *varvalues, |
1292 | boolean transpose) |
1293 | /** |
1294 | *** Sets the values of unpivoted variables to zero. |
1295 | **/ |
1296 | { |
1297 | int32 ndx,order; |
1298 | mtx_matrix_t mtx; |
1299 | |
1300 | mtx = sys->factors; |
1301 | order = mtx_order(mtx); |
1302 | for( ndx = 0 ; ndx < sys->rng.low ; ++ndx ) |
1303 | if (transpose) |
1304 | varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO; |
1305 | else |
1306 | varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO; |
1307 | |
1308 | for( ndx = sys->rng.low + sys->rank ; ndx < order ; ++ndx ) |
1309 | if (transpose) |
1310 | varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO; |
1311 | else |
1312 | varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO; |
1313 | } |
1314 | |
1315 | #if LINSOL_DEBUG |
1316 | static void debug_out_factors(FILE *fp,linsolqr_system_t sys) |
1317 | /** |
1318 | *** Outputs permutation and values of the nonzero elements in the |
1319 | *** factor matrix square region given by sys->rng. |
1320 | **/ |
1321 | { |
1322 | mtx_region_t reg; |
1323 | reg.row.low=reg.col.low=sys->rng.low; |
1324 | reg.row.high=reg.col.high=sys->rng.high; |
1325 | mtx_write_region(fp,sys->factors,®); |
1326 | } |
1327 | #endif /* LINSOL_DEBUG */ |
1328 | |
1329 | /***************************************************************************\ |
1330 | Reordering functions for SPK1, and possibly for other schemes to be |
1331 | implemented later. |
1332 | The stuff here is almost, but not quite, black magic. Don't tinker with it. |
1333 | \***************************************************************************/ |
1334 | |
1335 | /********************************* |
1336 | begin of spk1 stuff |
1337 | *********************************/ |
1338 | struct reorder_list { /* List of rows/columns and their counts. */ |
1339 | int32 ndx; |
1340 | int32 count; |
1341 | struct reorder_list *next; |
1342 | }; |
1343 | |
1344 | struct reorder_vars { |
1345 | mtx_matrix_t mtx; |
1346 | mtx_region_t reg; /* Current active region */ |
1347 | int32 colhigh; /* Original value of reg.col.high */ |
1348 | struct reorder_list *tlist; /* Array of (enough) list elements */ |
1349 | int32 *rowcount; /* rowcount[reg.row.low .. reg.row.high] */ |
1350 | }; |
1351 | |
1352 | static void adjust_row_count(struct reorder_vars *vars,int32 removed_col) |
1353 | /** |
1354 | *** Adjusts the row counts to account for the (to be) removed column. |
1355 | **/ |
1356 | { |
1357 | mtx_coord_t nz; |
1358 | real64 value; |
1359 | nz.col = removed_col; |
1360 | nz.row = mtx_FIRST; |
1361 | while( value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row)), |
1362 | nz.row != mtx_LAST ) |
1363 | --(vars->rowcount[nz.row]); |
1364 | } |
1365 | |
1366 | static void assign_row_and_col(struct reorder_vars *vars, |
1367 | int32 row, |
1368 | int32 col) |
1369 | /** |
1370 | *** Assigns the given row to the given column, moving the row and column |
1371 | *** to the beginning of the active region and removing them (readjusting |
1372 | *** the region). The row counts are NOT adjusted. If col == mtx_NONE, |
1373 | *** then no column is assigned and the row is moved to the end of the |
1374 | *** active block instead. Otherwise, it is assumed that the column |
1375 | *** is active. |
1376 | **/ |
1377 | { |
1378 | if( col == mtx_NONE ) { |
1379 | mtx_swap_rows(vars->mtx,row,vars->reg.row.high); |
1380 | vars->rowcount[row] = vars->rowcount[vars->reg.row.high]; |
1381 | --(vars->reg.row.high); |
1382 | } else { |
1383 | mtx_swap_rows(vars->mtx,row,vars->reg.row.low); |
1384 | vars->rowcount[row] = vars->rowcount[vars->reg.row.low]; |
1385 | mtx_swap_cols(vars->mtx,col,vars->reg.col.low); |
1386 | ++(vars->reg.row.low); |
1387 | ++(vars->reg.col.low); |
1388 | } |
1389 | } |
1390 | |
1391 | static void push_column_on_stack(struct reorder_vars *vars,int32 col) |
1392 | /** |
1393 | *** Pushes the given column onto the stack. It is assumed that the |
1394 | *** column is active. Row counts are adjusted. |
1395 | **/ |
1396 | { |
1397 | adjust_row_count(vars,col); |
1398 | mtx_swap_cols(vars->mtx,col,vars->reg.col.high); |
1399 | --(vars->reg.col.high); |
1400 | } |
1401 | |
1402 | static int32 pop_column_from_stack(struct reorder_vars *vars) |
1403 | /** |
1404 | *** Pops the column on the "top" of the stack off of the stack and |
1405 | *** returns the column index, where it now lies in the active region. |
1406 | *** If the stack is empty, mtx_NONE is returned. Row counts are NOT |
1407 | *** adjusted (this together with a subsequent assignment of this column |
1408 | *** ==> no row count adjustment necessary). |
1409 | **/ |
1410 | { |
1411 | if( vars->reg.col.high < vars->colhigh ) |
1412 | return(++(vars->reg.col.high)); |
1413 | else |
1414 | return( mtx_NONE ); |
1415 | } |
1416 | |
1417 | static void assign_null_rows(struct reorder_vars *vars) |
1418 | /** |
1419 | *** Assigns empty rows, moving them to the assigned region. It is |
1420 | *** assumed that row counts are correct. Columns are assigned off the |
1421 | *** stack. |
1422 | **/ |
1423 | { |
1424 | int32 row; |
1425 | |
1426 | for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row ) |
1427 | if( vars->rowcount[row] == 0 ) |
1428 | assign_row_and_col(vars , row , pop_column_from_stack(vars)); |
1429 | } |
1430 | |
1431 | static void forward_triangularize(struct reorder_vars *vars) |
1432 | /** |
1433 | *** Forward triangularizes the region, assigning singleton rows with their |
1434 | *** one and only incident column until there are no more. The row counts |
1435 | *** must be correct, and they are updated. |
1436 | **/ |
1437 | { |
1438 | boolean change; |
1439 | mtx_coord_t nz; |
1440 | real64 value; |
1441 | |
1442 | do { |
1443 | change = FALSE; |
1444 | for( nz.row = vars->reg.row.low ; |
1445 | nz.row <= vars->reg.row.high && vars->rowcount[nz.row] != 1; |
1446 | ++nz.row ) ; |
1447 | if( nz.row <= vars->reg.row.high ) { |
1448 | /* found singleton row */ |
1449 | nz.col = mtx_FIRST; /* this is somehow coming back with nz.col -1 */ |
1450 | value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col)); |
1451 | adjust_row_count(vars,nz.col); |
1452 | assign_row_and_col(vars,nz.row,nz.col); |
1453 | change = TRUE; |
1454 | } |
1455 | } while( change ); |
1456 | } |
1457 | |
1458 | static int32 select_row(struct reorder_vars *vars) |
1459 | /** |
1460 | *** Selects a row and returns its index. It is assumed that there is a |
1461 | *** row. Row counts must be correct. vars->tlist will be used. |
1462 | **/ |
1463 | { |
1464 | int32 min_row_count; |
1465 | int32 max_col_count; |
1466 | int32 row; |
1467 | int32 i, nties=-2; /* # elements currently defined in vars->tlist */ |
1468 | int32 sum; |
1469 | mtx_coord_t nz; |
1470 | real64 value; |
1471 | mtx_matrix_t mtx; |
1472 | mtx_range_t *colrng, *rowrng; |
1473 | |
1474 | /* Set to something > any possible value */ |
1475 | min_row_count = vars->reg.col.high-vars->reg.col.low+2; |
1476 | for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row ) |
1477 | if( vars->rowcount[row] <= min_row_count ) { |
1478 | if( vars->rowcount[row] < min_row_count ) { |
1479 | min_row_count = vars->rowcount[row]; |
1480 | nties = 0; |
1481 | } |
1482 | vars->tlist[nties++].ndx = row; |
1483 | } |
1484 | /** |
1485 | *** vars->tlist[0..nties-1] is a list of row numbers which tie for |
1486 | *** minimum row count. |
1487 | **/ |
1488 | |
1489 | max_col_count = -1; /* < any possible value */ |
1490 | i = 0; |
1491 | mtx = vars->mtx; |
1492 | colrng=&(vars->reg.col); |
1493 | rowrng=&(vars->reg.row); |
1494 | while( i < nties ) { |
1495 | |
1496 | sum = 0; |
1497 | nz.row = vars->tlist[i].ndx; |
1498 | nz.col = mtx_FIRST; |
1499 | while( value = mtx_next_in_row(mtx,&nz,colrng), |
1500 | nz.col != mtx_LAST ) |
1501 | sum += mtx_nonzeros_in_col(mtx,nz.col,rowrng); |
1502 | if( sum > max_col_count ) { |
1503 | max_col_count = sum; |
1504 | row = nz.row; |
1505 | } |
1506 | i++; |
1507 | } |
1508 | /** |
1509 | *** Now row contains the row with the minimum row count, which has the |
1510 | *** greatest total column count of incident columns among all rows with |
1511 | *** the same (minimum) row count. Select it. |
1512 | **/ |
1513 | return(row); |
1514 | } |
1515 | |
1516 | static void spk1_reorder(struct reorder_vars *vars) |
1517 | /** |
1518 | *** Reorders the assigned matrix vars->mtx within the specified bounding |
1519 | *** block region vars->reg. The region is split into 6 subregions during |
1520 | *** reordering: the rows are divided in two, and the columns divided in |
1521 | *** three. Initially everything is in the active subregion. Ultimately, |
1522 | *** everything will be assigned. |
1523 | *** |
1524 | *** <-- assigned -->|<-- active-->|<-- on stack -->| |
1525 | *** ----+----------------+-------------+----------------+ |
1526 | *** a | | | | |
1527 | *** s | | | | |
1528 | *** s | | | | |
1529 | *** i | (SQUARE) | | | |
1530 | *** g | | | | |
1531 | *** n | | | | |
1532 | *** e | | | | |
1533 | *** d | | | | |
1534 | *** ----+----------------+-------------+----------------+ |
1535 | *** a | | | | |
1536 | *** c | | ACTIVE | | |
1537 | *** t | | REGION | | |
1538 | *** i | | | | |
1539 | *** v | | | | |
1540 | *** e | | | | |
1541 | *** ----+----------------+-------------+----------------+ |
1542 | *** |
1543 | *** The algorithm is roughly as follows: |
1544 | *** (1) select a row (by some criterion). |
1545 | *** (2) push columns incident on that row onto the stack in decreasing |
1546 | *** order of their length. |
1547 | *** (3) pop first column off the stack and assign it to the selected |
1548 | *** row. |
1549 | *** (4) forward-triangularize (assign singleton rows with their one |
1550 | *** and only incident column, until no longer possible). |
1551 | *** |
1552 | *** (1)-(4) should be repeated until the active subregion becomes empty. |
1553 | *** |
1554 | *** Everything above was written as though the entire matrix is |
1555 | *** involved. In reality, only the relevant square region is involved. |
1556 | **/ |
1557 | { |
1558 | int32 row, size; |
1559 | int32 *rowcount_array_origin; |
1560 | mtx_matrix_t mtx; |
1561 | |
1562 | size = MAX(vars->reg.row.high,vars->reg.col.high) + 1; |
1563 | vars->tlist = size > 0 ? (struct reorder_list *) |
1564 | ascmalloc( size*sizeof(struct reorder_list) ) : NULL; |
1565 | vars->rowcount = rowcount_array_origin = size > 0 ? (int32 *) |
1566 | ascmalloc( size*sizeof(int32) ) : NULL; |
1567 | mtx = vars->mtx; |
1568 | |
1569 | vars->colhigh = vars->reg.col.high; |
1570 | /* Establish row counts */ |
1571 | for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row ) |
1572 | vars->rowcount[row] = |
1573 | mtx_nonzeros_in_row(mtx,row,&(vars->reg.col)); |
1574 | |
1575 | while(TRUE) { |
1576 | struct reorder_list *head; |
1577 | int32 nelts; /* # elements "allocated" from vars->tlist */ |
1578 | mtx_coord_t nz; |
1579 | real64 value; |
1580 | |
1581 | forward_triangularize(vars); |
1582 | assign_null_rows(vars); |
1583 | if( vars->reg.row.low>vars->reg.row.high || |
1584 | vars->reg.col.low>vars->reg.col.high ) { |
1585 | /* Active region is now empty, done */ |
1586 | if( NOTNULL(vars->tlist) ) |
1587 | ascfree( vars->tlist ); |
1588 | if( NOTNULL(rowcount_array_origin) ) |
1589 | ascfree( rowcount_array_origin ); |
1590 | return; |
1591 | } |
1592 | |
1593 | head = NULL; |
1594 | nelts = 0; |
1595 | nz.row = select_row(vars); |
1596 | nz.col = mtx_FIRST; |
1597 | while( value = mtx_next_in_row(mtx,&nz,&(vars->reg.col)), |
1598 | nz.col != mtx_LAST ) { |
1599 | struct reorder_list **q,*p; |
1600 | |
1601 | p = &(vars->tlist[nelts++]); |
1602 | p->ndx = mtx_col_to_org(mtx,nz.col); |
1603 | p->count = mtx_nonzeros_in_col(mtx,nz.col,&(vars->reg.row)); |
1604 | for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) ); |
1605 | p->next = *q; |
1606 | *q = p; |
1607 | } |
1608 | /** |
1609 | *** We now have a list of columns which intersect the selected row. |
1610 | *** The list is in order of decreasing column count. |
1611 | **/ |
1612 | |
1613 | /* Push incident columns on stack */ |
1614 | for( ; NOTNULL(head) ; head = head->next ) |
1615 | push_column_on_stack(vars,mtx_org_to_col(mtx,head->ndx)); |
1616 | |
1617 | /* Pop column off stack and assign to selected row */ |
1618 | assign_row_and_col(vars , nz.row , pop_column_from_stack(vars)); |
1619 | } |
1620 | /* Not reached. */ |
1621 | } |
1622 | |
1623 | /********************************* |
1624 | end of spk1 stuff |
1625 | *********************************/ |
1626 | /********************************* |
1627 | begin of tspk1 stuff |
1628 | *********************************/ |
1629 | struct creorder_list { /* List of columns/rows and their counts. */ |
1630 | int32 ndx; |
1631 | int32 count; |
1632 | struct creorder_list *next; |
1633 | }; |
1634 | |
1635 | struct creorder_vars { |
1636 | mtx_matrix_t mtx; |
1637 | mtx_region_t reg; /* Current active region */ |
1638 | int32 rowhigh; /* Original value of reg.row.high */ |
1639 | struct creorder_list *tlist; /* Array of (enough) list elements */ |
1640 | int32 *colcount; /* colcount[reg.col.low .. reg.col.high] */ |
1641 | }; |
1642 | |
1643 | static void adjust_col_count(struct creorder_vars *vars,int32 removed_row) |
1644 | /** |
1645 | *** Adjusts the column counts to account for the (to be) removed row. |
1646 | **/ |
1647 | { |
1648 | mtx_coord_t nz; |
1649 | real64 value; |
1650 | nz.row = removed_row; |
1651 | nz.col = mtx_FIRST; |
1652 | while( value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col)), |
1653 | nz.col != mtx_LAST ) |
1654 | --(vars->colcount[nz.col]); |
1655 | } |
1656 | |
1657 | static void assign_col_and_row(struct creorder_vars *vars, |
1658 | int32 col, |
1659 | int32 row) |
1660 | /** |
1661 | *** Assigns the given row to the given column, moving the row and column |
1662 | *** to the beginning of the active region and removing them (readjusting |
1663 | *** the region). The col counts are NOT adjusted. If col == mtx_NONE, |
1664 | *** then no column is assigned and the col is moved to the end of the |
1665 | *** active block instead. Otherwise, it is assumed that the row |
1666 | *** is active. |
1667 | **/ |
1668 | { |
1669 | if( row == mtx_NONE ) { |
1670 | mtx_swap_cols(vars->mtx,col,vars->reg.col.high); |
1671 | vars->colcount[col] = vars->colcount[vars->reg.col.high]; |
1672 | --(vars->reg.col.high); |
1673 | } else { |
1674 | mtx_swap_cols(vars->mtx,col,vars->reg.col.low); |
1675 | vars->colcount[col] = vars->colcount[vars->reg.col.low]; |
1676 | mtx_swap_rows(vars->mtx,row,vars->reg.row.low); |
1677 | ++(vars->reg.col.low); |
1678 | ++(vars->reg.row.low); |
1679 | } |
1680 | } |
1681 | |
1682 | static void push_row_on_stack(struct creorder_vars *vars,int32 row) |
1683 | /** |
1684 | *** Pushes the given row onto the stack. It is assumed that the |
1685 | *** row is active. Col counts are adjusted. |
1686 | **/ |
1687 | { |
1688 | adjust_col_count(vars,row); |
1689 | mtx_swap_rows(vars->mtx,row,vars->reg.row.high); |
1690 | --(vars->reg.row.high); |
1691 | } |
1692 | |
1693 | static int32 pop_row_from_stack(struct creorder_vars *vars) |
1694 | /** |
1695 | *** Pops the row on the "top" of the stack off of the stack and |
1696 | *** returns the row index, where it now lies in the active region. |
1697 | *** If the stack is empty, mtx_NONE is returned. Col counts are NOT |
1698 | *** adjusted (this together with a subsequent assignment of this row |
1699 | *** ==> no col count adjustment necessary). |
1700 | **/ |
1701 | { |
1702 | if( vars->reg.row.high < vars->rowhigh ) |
1703 | return(++(vars->reg.row.high)); |
1704 | else |
1705 | return( mtx_NONE ); |
1706 | } |
1707 | |
1708 | static void assign_null_cols(struct creorder_vars *vars) |
1709 | /** |
1710 | *** Assigns empty cols, moving them to the assigned region. It is |
1711 | *** assumed that col counts are correct. Rows are assigned off the |
1712 | *** stack. |
1713 | **/ |
1714 | { |
1715 | int32 col; |
1716 | |
1717 | for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col ) |
1718 | if( vars->colcount[col] == 0 ) |
1719 | assign_col_and_row(vars , col , pop_row_from_stack(vars)); |
1720 | } |
1721 | |
1722 | static void cforward_triangularize(struct creorder_vars *vars) |
1723 | /** |
1724 | *** Forward triangularizes the region, assigning singleton columns with their |
1725 | *** one and only incident row until there are no more. The column counts |
1726 | *** must be correct, and they are updated. |
1727 | **/ |
1728 | { |
1729 | boolean change; |
1730 | |
1731 | do { |
1732 | mtx_coord_t nz; |
1733 | real64 value; |
1734 | change = FALSE; |
1735 | for( nz.col = vars->reg.col.low ; |
1736 | nz.col <= vars->reg.col.high && vars->colcount[nz.col] != 1; |
1737 | ++nz.col ) ; |
1738 | if( nz.col <= vars->reg.col.high ) { |
1739 | /* found singleton col */ |
1740 | nz.row = mtx_FIRST; |
1741 | value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row)); |
1742 | adjust_col_count(vars,nz.row); |
1743 | assign_col_and_row(vars,nz.col,nz.row); |
1744 | change = TRUE; |
1745 | } |
1746 | } while( change ); |
1747 | } |
1748 | |
1749 | static int32 select_col(struct creorder_vars *vars) |
1750 | /** |
1751 | *** Selects a col and returns its index. It is assumed that there is a |
1752 | *** col. Col counts must be correct. vars->tlist will be used. |
1753 | **/ |
1754 | { |
1755 | int32 min_col_count; |
1756 | int32 max_row_count; |
1757 | int32 col; |
1758 | int32 i, nties=-2; /* # elements currently defined in vars->tlist */ |
1759 | int32 sum; |
1760 | mtx_coord_t nz; |
1761 | real64 value; |
1762 | mtx_matrix_t mtx; |
1763 | mtx_range_t *colrng, *rowrng; |
1764 | |
1765 | /* Set to something > any possible value */ |
1766 | min_col_count = vars->reg.row.high-vars->reg.row.low+2; |
1767 | for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col ) |
1768 | if( vars->colcount[col] <= min_col_count ) { |
1769 | if( vars->colcount[col] < min_col_count ) { |
1770 | min_col_count = vars->colcount[col]; |
1771 | nties = 0; |
1772 | } |
1773 | vars->tlist[nties++].ndx = col; |
1774 | } |
1775 | /** |
1776 | *** vars->tlist[0..nties-1] is a list of row numbers which tie for |
1777 | *** minimum col count. |
1778 | **/ |
1779 | |
1780 | max_row_count = -1; /* < any possible value */ |
1781 | i = 0; |
1782 | mtx = vars->mtx; |
1783 | rowrng=&(vars->reg.row); |
1784 | colrng=&(vars->reg.col); |
1785 | while( i < nties ) { |
1786 | |
1787 | sum = 0; |
1788 | nz.row = mtx_FIRST; |
1789 | nz.col = vars->tlist[i].ndx; |
1790 | while( value = mtx_next_in_col(mtx,&nz,rowrng), |
1791 | nz.row != mtx_LAST ) |
1792 | sum += mtx_nonzeros_in_row(mtx,nz.row,colrng); |
1793 | if( sum > max_row_count ) { |
1794 | max_row_count = sum; |
1795 | col = nz.col; |
1796 | } |
1797 | i++; |
1798 | } |
1799 | /** |
1800 | *** Now col contains the col with the minimum col count, which has the |
1801 | *** greatest total row count of incident rows among all cols with |
1802 | *** the same (minimum) col count. Select it. |
1803 | **/ |
1804 | return(col); |
1805 | } |
1806 | |
1807 | static void tspk1_reorder(struct creorder_vars *vars) |
1808 | /** |
1809 | *** Transpose the picture and explanation that follows: |
1810 | *** Reorders the assigned matrix vars->mtx within the specified bounding |
1811 | *** block region vars->reg. The region is split into 6 subregions during |
1812 | *** reordering: the rows are divided in two, and the columns divided in |
1813 | *** three. Initially everything is in the active subregion. Ultimately, |
1814 | *** everything will be assigned. |
1815 | *** |
1816 | *** <-- assigned -->|<-- active-->|<-- on stack -->| |
1817 | *** ----+----------------+-------------+----------------+ |
1818 | *** a | | | | |
1819 | *** s | | | | |
1820 | *** s | | | | |
1821 | *** i | (SQUARE) | | | |
1822 | *** g | | | | |
1823 | *** n | | | | |
1824 | *** e | | | | |
1825 | *** d | | | | |
1826 | *** ----+----------------+-------------+----------------+ |
1827 | *** a | | | | |
1828 | *** c | | ACTIVE | | |
1829 | *** t | | REGION | | |
1830 | *** i | | | | |
1831 | *** v | | | | |
1832 | *** e | | | | |
1833 | *** ----+----------------+-------------+----------------+ |
1834 | *** |
1835 | *** The algorithm is roughly as follows: |
1836 | *** (1) select a row (by some criterion). |
1837 | *** (2) push columns incident on that row onto the stack in decreasing |
1838 | *** order of their length. |
1839 | *** (3) pop first column off the stack and assign it to the selected |
1840 | *** row. |
1841 | *** (4) forward-triangularize (assign singleton rows with their one |
1842 | *** and only incident column, until no longer possible). |
1843 | *** |
1844 | *** (1)-(4) should be repeated until the active subregion becomes empty. |
1845 | *** |
1846 | *** Everything above was written as though the entire matrix is |
1847 | *** involved. In reality, only the relevant square region is involved. |
1848 | **/ |
1849 | { |
1850 | int32 col, size; |
1851 | int32 *colcount_array_origin; |
1852 | mtx_matrix_t mtx; |
1853 | |
1854 | size = vars->reg.col.high - vars->reg.col.low + 1; |
1855 | size = MAX(size,vars->reg.row.high - vars->reg.row.low + 1); |
1856 | vars->tlist = size > 0 ? (struct creorder_list *) |
1857 | ascmalloc( size*sizeof(struct creorder_list) ) : NULL; |
1858 | colcount_array_origin = size > 0 ? (int32 *) |
1859 | ascmalloc( size*sizeof(int32) ) : NULL; |
1860 | vars->colcount = |
1861 | NOTNULL(colcount_array_origin) ? |
1862 | colcount_array_origin - vars->reg.col.low : NULL; |
1863 | mtx = vars->mtx; |
1864 | |
1865 | vars->rowhigh = vars->reg.row.high; |
1866 | /* Establish col counts */ |
1867 | for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col ) |
1868 | vars->colcount[col] = |
1869 | mtx_nonzeros_in_col(mtx,col,&(vars->reg.row)); |
1870 | |
1871 | while(TRUE) { |
1872 | struct creorder_list *head; |
1873 | int32 nelts; /* # elements "allocated" from vars->tlist */ |
1874 | mtx_coord_t nz; |
1875 | real64 value; |
1876 | |
1877 | cforward_triangularize(vars); |
1878 | assign_null_cols(vars); |
1879 | if( vars->reg.col.low > vars->reg.col.high || |
1880 | vars->reg.row.low > vars->reg.row.high ) { |
1881 | /* Active region is now empty, done */ |
1882 | if( NOTNULL(vars->tlist) ) |
1883 | ascfree( vars->tlist ); |
1884 | if( NOTNULL(colcount_array_origin) ) |
1885 | ascfree( colcount_array_origin ); |
1886 | return; |
1887 | } |
1888 | |
1889 | head = NULL; |
1890 | nelts = 0; |
1891 | nz.row = mtx_FIRST; |
1892 | nz.col = select_col(vars); |
1893 | while( value = mtx_next_in_col(mtx,&nz,&(vars->reg.row)), |
1894 | nz.row != mtx_LAST ) { |
1895 | struct creorder_list **q,*p; |
1896 | |
1897 | p = &(vars->tlist[nelts++]); |
1898 | p->ndx = mtx_row_to_org(mtx,nz.row); |
1899 | p->count = mtx_nonzeros_in_row(mtx,nz.row,&(vars->reg.col)); |
1900 | for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) ); |
1901 | p->next = *q; |
1902 | *q = p; |
1903 | } |
1904 | /** |
1905 | *** We now have a list of columns which intersect the selected row. |
1906 | *** The list is in order of decreasing column count. |
1907 | **/ |
1908 | |
1909 | /* Push incident rows on stack */ |
1910 | for( ; NOTNULL(head) ; head = head->next ) |
1911 | push_row_on_stack(vars,mtx_org_to_row(mtx,head->ndx)); |
1912 | |
1913 | /* Pop row off stack and assign to selected col */ |
1914 | assign_col_and_row(vars , nz.col , pop_row_from_stack(vars)); |
1915 | } |
1916 | /* Not reached. */ |
1917 | } |
1918 | |
1919 | /********************************* |
1920 | end of tspk1 stuff |
1921 | *********************************/ |
1922 | |
1923 | static boolean nonempty_row(mtx_matrix_t mtx,int32 row) |
1924 | /** |
1925 | *** ? row not empty in mtx. |
1926 | **/ |
1927 | { |
1928 | mtx_coord_t nz; |
1929 | real64 value; |
1930 | value = mtx_next_in_row(mtx, mtx_coord(&nz,row,mtx_FIRST), mtx_ALL_COLS); |
1931 | return( nz.col != mtx_LAST ); |
1932 | } |
1933 | |
1934 | static boolean nonempty_col(mtx_matrix_t mtx,int32 col) |
1935 | /** |
1936 | *** ? column not empty in mtx. |
1937 | **/ |
1938 | { |
1939 | mtx_coord_t nz; |
1940 | real64 value; |
1941 | value = mtx_next_in_col(mtx, mtx_coord(&nz,mtx_FIRST,col), mtx_ALL_ROWS); |
1942 | return( nz.row != mtx_LAST ); |
1943 | } |
1944 | |
1945 | static void determine_pivot_range(linsolqr_system_t sys) |
1946 | /** |
1947 | *** Calculates sys->rng from sys->coef. |
1948 | **/ |
1949 | { |
1950 | sys->reg.row.low = sys->reg.row.high = -1; |
1951 | sys->reg.col.low = sys->reg.col.high = -1; |
1952 | |
1953 | for( sys->rng.high=mtx_order(sys->coef) ; --(sys->rng.high) >= 0 ; ) { |
1954 | if( nonempty_row(sys->coef,sys->rng.high) && |
1955 | sys->reg.row.high < 0 ) |
1956 | sys->reg.row.high = sys->rng.high; |
1957 | if( nonempty_col(sys->coef,sys->rng.high) && |
1958 | sys->reg.col.high < 0 ) |
1959 | sys->reg.col.high = sys->rng.high; |
1960 | if( nonempty_row(sys->coef,sys->rng.high) && |
1961 | nonempty_col(sys->coef,sys->rng.high) ) |
1962 | break; |
1963 | } |
1964 | |
1965 | for( sys->rng.low=0 ; sys->rng.low <= sys->rng.high ; ++(sys->rng.low) ) { |
1966 | if( nonempty_row(sys->coef,sys->rng.low) && |
1967 | sys->reg.row.low < 0 ) |
1968 | sys->reg.row.low = sys->rng.low; |
1969 | if( nonempty_col(sys->coef,sys->rng.low) && |
1970 | sys->reg.col.low < 0 ) |
1971 | sys->reg.col.low = sys->rng.low; |
1972 | if( nonempty_row(sys->coef,sys->rng.low) && |
1973 | nonempty_col(sys->coef,sys->rng.low) ) |
1974 | break; |
1975 | } |
1976 | } |
1977 | |
1978 | static void square_region(linsolqr_system_t sys,mtx_region_t *region) |
1979 | /** |
1980 | *** Get the largest square confined to the diagonal within the region given |
1981 | *** and set sys->rng accordingly. |
1982 | **/ |
1983 | { |
1984 | sys->reg = *region; |
1985 | sys->rng.low = MAX(region->row.low,region->col.low); |
1986 | sys->rng.high = MIN(region->row.high,region->col.high); |
1987 | } |
1988 | |
1989 | static int ranki_reorder(linsolqr_system_t sys,mtx_region_t *region) |
1990 | /** |
1991 | *** The region to reorder is first isolated by truncating the region |
1992 | *** provided to the largest square region confined to the matrix diagonal. |
1993 | *** It is presumed it will contain no empty rows or columns and will |
1994 | *** provide the basis of candidate pivots when factoring. |
1995 | **/ |
1996 | { |
1997 | struct reorder_vars vars; |
1998 | CHECK_SYSTEM(sys); |
1999 | if (sys->fclass != ranki) { |
2000 | FPRINTF(stderr,"linsolqr_reorder: spk1 reorder called on system\n"); |
2001 | FPRINTF(stderr," with inappropriate factor class\n"); |
2002 | return 1; |
2003 | } |
2004 | if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys); |
2005 | else square_region(sys,region); |
2006 | |
2007 | vars.mtx = sys->coef; |
2008 | vars.reg.row.low = vars.reg.col.low = sys->rng.low; |
2009 | vars.reg.row.high = vars.reg.col.high = sys->rng.high; |
2010 | spk1_reorder(&vars); |
2011 | return 0; |
2012 | } |
2013 | |
2014 | static int tranki_reorder(linsolqr_system_t sys,mtx_region_t *region) |
2015 | /** |
2016 | *** The region to reorder is first isolated by truncating the region |
2017 | *** provided to the largest square region confined to the matrix diagonal. |
2018 | *** It is presumed it will contain no empty rows or columns and will |
2019 | *** provide the basis of candidate pivots when factoring. |
2020 | **/ |
2021 | { |
2022 | struct creorder_vars vars; |
2023 | CHECK_SYSTEM(sys); |
2024 | if ( !(sys->fclass==ranki || sys->fclass==s_qr) ) { |
2025 | FPRINTF(stderr,"linsolqr_reorder: tspk1 reorder called on system\n"); |
2026 | FPRINTF(stderr," with inappropriate factor method\n"); |
2027 | return 1; |
2028 | } |
2029 | if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys); |
2030 | else square_region(sys,region); |
2031 | |
2032 | vars.mtx = sys->coef; |
2033 | vars.reg.row.low = vars.reg.col.low = sys->rng.low; |
2034 | vars.reg.row.high = vars.reg.col.high = sys->rng.high; |
2035 | tspk1_reorder(&vars); |
2036 | return 0; |
2037 | } |
2038 | |
2039 | /***************************************************************************\ |
2040 | End of reordering functions for SPK1. |
2041 | \***************************************************************************/ |
2042 | |
2043 | /***************************************************************************\ |
2044 | RANKI implementation functions. |
2045 | \***************************************************************************/ |
2046 | |
2047 | static void eliminate_row(mtx_matrix_t mtx, |
2048 | mtx_range_t *rng, |
2049 | int32 row, /* row to eliminate */ |
2050 | real64 *tmp, /* temporary array */ |
2051 | real64 *pivots) /* prior pivots array */ |
2052 | /** |
2053 | *** Eliminates the given row to the left of the diagonal element, assuming |
2054 | *** valid pivots for all of the diagonal elements above it (the elements |
2055 | *** above those diagonal elements, if any exist, are assumed to be U |
2056 | *** elements and therefore ignored). The temporary array is used by this |
2057 | *** function to do its job. tmp[k], where rng->low <= k <= rng->high must |
2058 | *** be defined (allocated) but need not be initialized. |
2059 | *** pivots[k] where rng->low <= k <=rng->high must be allocated and |
2060 | *** pivots[rng->low] to pivots[row-1] must contain the previous pivots. |
2061 | **/ |
2062 | { |
2063 | mtx_coord_t nz; |
2064 | mtx_range_t left_to_eliminate,high_cols,tmp_cols; |
2065 | boolean do_series=FALSE; |
2066 | |
2067 | high_cols.low = row; |
2068 | high_cols.high = rng->high; |
2069 | left_to_eliminate.low = rng->low; |
2070 | left_to_eliminate.high = row - 1; |
2071 | |
2072 | /* Move non-zeros left of pivot from matrix to full array */ |
2073 | mtx_zero_real64(tmp+rng->low,(row-rng->low)); |
2074 | nz.row = row; |
2075 | mtx_cur_row_vec(mtx,row,tmp,&left_to_eliminate); |
2076 | mtx_clear_row(mtx,row,&left_to_eliminate); |
2077 | |
2078 | /* eliminates nzs from pivot, one by one, filling tmp with multipliers */ |
2079 | do_series= ( (row-1) >= (rng->low) ); |
2080 | if (do_series) { |
2081 | mtx_add_row_series_init(mtx,row,FALSE); |
2082 | /* do nothing between here and the END call to _init which removes |
2083 | elements from row */ |
2084 | } |
2085 | for( nz.row = row-1 ; nz.row >= rng->low ; --(nz.row) ) { |
2086 | if( tmp[nz.row] == D_ZERO ) |
2087 | continue; /* Nothing to do for this row */ |
2088 | |
2089 | nz.col = nz.row; |
2090 | tmp[nz.row] /= pivots[nz.row]; |
2091 | /* tmp[nz.row] now equals multiplier */ |
2092 | |
2093 | /* Perform "mtx_add_row" for full array part of the row */ |
2094 | left_to_eliminate.high = nz.row - 1; |
2095 | mtx_cur_vec_add_row(mtx,tmp,nz.row,-tmp[nz.row], |
2096 | &left_to_eliminate,FALSE); |
2097 | |
2098 | /* Perform "mtx_add_row" on remaining part of row */ |
2099 | mtx_add_row_series(nz.row,-tmp[nz.row],&high_cols); |
2100 | } |
2101 | if (do_series) { |
2102 | mtx_add_row_series_init(mtx,mtx_NONE,TRUE); |
2103 | } |
2104 | |
2105 | mtx_fill_cur_row_vec(mtx,row,tmp,mtx_range(&tmp_cols,rng->low,row-1)); |
2106 | |
2107 | } |
2108 | |
2109 | |
2110 | /* not in use it seems */ |
2111 | #if BUILD_DEAD_CODE /* yes, baa */ |
2112 | static int32 top_of_spike(linsolqr_system_t sys,int32 col) |
2113 | /** |
2114 | *** Determines the top row (row of lowest index) in a possible spike |
2115 | *** above the diagonal element in the given column. If there is no spike, |
2116 | *** then (row = ) col is returned. |
2117 | **/ |
2118 | { |
2119 | mtx_range_t above_diagonal; |
2120 | mtx_coord_t nz; |
2121 | real64 value; |
2122 | int32 top_row; |
2123 | |
2124 | above_diagonal.low = sys->rng.low; |
2125 | above_diagonal.high = col-1; |
2126 | top_row = nz.col = col; |
2127 | nz.row = mtx_FIRST; |
2128 | while( value = mtx_next_in_col(sys->factors,&nz,&above_diagonal), |
2129 | nz.row != mtx_LAST ) |
2130 | if( nz.row < top_row ) |
2131 | top_row = nz.row; |
2132 | return( top_row ); |
2133 | } |
2134 | #endif /* BUILD_DEAD_CODE */ |
2135 | |
2136 | static boolean col_is_a_spike(linsolqr_system_t sys,int32 col) |
2137 | /** |
2138 | *** Determines if the col is a spike, characterized by having any |
2139 | *** nonzeros above the diagonal. By construction there shouldn't |
2140 | *** be any numeric 0 nonzeros above the diagonal, so we don't prune |
2141 | *** them out here. |
2142 | **/ |
2143 | { |
2144 | mtx_range_t above_diagonal; |
2145 | mtx_coord_t nz; |
2146 | |
2147 | above_diagonal.low = sys->rng.low; |
2148 | above_diagonal.high = col-1; |
2149 | nz.col = col; |
2150 | nz.row = mtx_FIRST; |
2151 | /* this loop is cheaper than it looks */ |
2152 | while( (void)mtx_next_in_col(sys->factors,&nz,&above_diagonal), |
2153 | nz.row != mtx_LAST ) { |
2154 | if( nz.row < col ) return TRUE; |
2155 | } |
2156 | return( FALSE ); |
2157 | } |
2158 | |
2159 | /* see implementation notes before modifying this function! */ |
2160 | static void number_drag(real64 *vec, int32 rfrom, int32 rto) |
2161 | { |
2162 | int32 i; |
2163 | real64 tmp; |
2164 | if (rto <rfrom) { |
2165 | tmp=vec[rfrom]; |
2166 | for (i=rfrom; i> rto; i--) { |
2167 | vec[i]=vec[i-1]; /* rotate right */ |
2168 | } |
2169 | vec[rto]=tmp; |
2170 | return; |
2171 | } |
2172 | if (rto > rfrom) { |
2173 | tmp=vec[rfrom]; |
2174 | for (i=rfrom; i< rto; i++) { |
2175 | vec[i]=vec[i+1]; /* rotate left */ |
2176 | } |
2177 | vec[rto]=tmp; |
2178 | } |
2179 | } |
2180 | |
2181 | static void rankikw_factor(linsolqr_system_t sys) |
2182 | /** |
2183 | *** (the following description also applies to rankijz_factor which is |
2184 | *** different only in pivot selection strategy.) |
2185 | *** This is the heart of the linear equation solver. This function |
2186 | *** factorizes the matrix into a lower (L) and upper (U) triangular |
2187 | *** matrix. sys->smallest_pivot and sys->rank are calculated. The |
2188 | *** RANKI method is utilized. At the end of elimination, the matrix A |
2189 | *** is factored into A = U L, where U and L are stored as follows: |
2190 | *** |
2191 | *** <----- r ----> <- n-r -> |
2192 | *** +--------------+---------+ |
2193 | *** | | | |
2194 | *** | U | | |
2195 | *** | | | |
2196 | *** | L | | r |
2197 | *** | | | |
2198 | *** +--------------+---------+ |
2199 | *** | | | |
2200 | *** | | 0 | n-r |
2201 | *** | | | |
2202 | *** +--------------+---------+ |
2203 | *** |
2204 | *** The rows and columns have been permuted so that all of the pivoted |
2205 | *** original rows and columns are found in the first r rows and columns |
2206 | *** of the region. The last n-r rows and columns are unpivoted. U has |
2207 | *** 1's on its diagonal, whereas L's diagonal is stored in the matrix. |
2208 | *** |
2209 | *** Everything above was written as though the entire matrix is |
2210 | *** involved. In reality, only the relevant square region is involved. |
2211 | **/ |
2212 | { |
2213 | mtx_coord_t nz; |
2214 | int32 last_row; |
2215 | mtx_range_t pivot_candidates; |
2216 | real64 *tmp; |
2217 | real64 pivot, *pivots; |
2218 | int32 length; |
2219 | mtx_matrix_t mtx; |
2220 | |
2221 | length = sys->rng.high - sys->rng.low + 1; |
2222 | tmp = sys->ludata->tmp; |
2223 | /* eliminate row takes care of zeroing the relevant region and won't |
2224 | change values outside of it. */ |
2225 | pivots=sys->ludata->pivlist; |
2226 | mtx=sys->factors; |
2227 | |
2228 | sys->smallest_pivot = MAXDOUBLE; |
2229 | last_row = pivot_candidates.high = sys->rng.high; |
2230 | for( nz.row = sys->rng.low ; nz.row <= last_row ; ) { |
2231 | |
2232 | pivot_candidates.low = nz.col = nz.row; |
2233 | pivots[nz.row]=pivot = mtx_value(mtx,&nz); |
2234 | pivot = fabs(pivot); |
2235 | if( pivot > sys->pivot_zero && |
2236 | pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) && |
2237 | !col_is_a_spike(sys,nz.row) ) { |
2238 | /* Good pivot and not a spike: continue with next row */ |
2239 | if( pivot < sys->smallest_pivot ) |
2240 | sys->smallest_pivot = pivot; |
2241 | ++(nz.row); |
2242 | continue; |
2243 | } |
2244 | /* pivots for rows nz.row back to sys->rng->low are stored in pivots */ |
2245 | /** |
2246 | *** Row is a spike row or will |
2247 | *** be when a necessary column |
2248 | *** exchange occurs. |
2249 | **/ |
2250 | eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots); |
2251 | /* pivot will be largest of those available. get size and value */ |
2252 | pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row])); |
2253 | if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */ |
2254 | /* Dependent row, drag to the end */ |
2255 | mtx_drag(mtx,nz.row,last_row); |
2256 | number_drag(pivots,nz.row,last_row); |
2257 | --last_row; |
2258 | #undef KAA_DEBUG |
2259 | #ifdef KAA_DEBUG |
2260 | FPRINTF(stderr,"Warning: Row %d is dependent with pivot %20.8g\n", |
2261 | nz.row,pivot); |
2262 | #endif /* KAA_DEBUG */ |
2263 | } else { |
2264 | /* Independent row: nz contains best pivot */ |
2265 | /* Move pivot to diagonal */ |
2266 | mtx_swap_cols(mtx,nz.row,nz.col); |
2267 | mtx_drag( mtx , nz.row , sys->rng.low ); |
2268 | number_drag(pivots,nz.row,sys->rng.low); |
2269 | if( pivot < sys->smallest_pivot ) |
2270 | sys->smallest_pivot = pivot; |
2271 | ++(nz.row); |
2272 | } |
2273 | } |
2274 | |
2275 | sys->rank = last_row - sys->rng.low + 1; |
2276 | } |
2277 | |
2278 | |
2279 | static void rankijz_factor(linsolqr_system_t sys) |
2280 | /** |
2281 | *** This is an alternate pivoting strategy introduced by Joe Zaher. |
2282 | *** it looks down and across for good pivots rather than just across. |
2283 | *** |
2284 | **/ |
2285 | { |
2286 | real64 biggest; |
2287 | mtx_coord_t nz, best; |
2288 | mtx_region_t candidates; |
2289 | real64 *tmp; |
2290 | real64 pivot, *pivots; |
2291 | int32 length; |
2292 | mtx_matrix_t mtx; |
2293 | |
2294 | length = sys->rng.high - sys->rng.low + 1; |
2295 | tmp = sys->ludata->tmp; |
2296 | /* eliminate row takes care of zeroing the relevant region and won't |
2297 | change values outside of it. */ |
2298 | pivots=sys->ludata->pivlist; |
2299 | mtx=sys->factors; |
2300 | |
2301 | sys->smallest_pivot = MAXDOUBLE; |
2302 | candidates.row.high = sys->rng.high; |
2303 | candidates.col.high = sys->rng.high; |
2304 | for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) { |
2305 | nz.col = nz.row; |
2306 | pivots[nz.row] = pivot = mtx_value(mtx,&nz); |
2307 | pivot = fabs(pivot); |
2308 | candidates.row.low = nz.row; |
2309 | candidates.col.low = nz.row; |
2310 | if( !col_is_a_spike(sys,nz.row) ) { |
2311 | best.col = nz.row; |
2312 | mtx_col_max(mtx,&best,&(candidates.row),&biggest); |
2313 | if( fabs(biggest) >= sys->pivot_zero ) { |
2314 | if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) { |
2315 | mtx_swap_rows(mtx,nz.row,best.row); |
2316 | pivots[nz.row] = biggest; |
2317 | pivot = fabs(biggest); |
2318 | } |
2319 | if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot; |
2320 | ++(nz.row); |
2321 | continue; |
2322 | } |
2323 | } |
2324 | /* pivots for rows nz.row back to sys->rng->low are stored in pivots */ |
2325 | /** |
2326 | *** Row is a spike row or will |
2327 | *** be when a necessary column |
2328 | *** exchange occurs. |
2329 | **/ |
2330 | eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots); |
2331 | /* pivot will be largest of those available. get size and value */ |
2332 | pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row])); |
2333 | /* Move pivot to diagonal */ |
2334 | if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */ |
2335 | /* Dependent row, drag nz to lower right */ |
2336 | mtx_drag(mtx,nz.row,candidates.row.high); |
2337 | number_drag(pivots,nz.row,candidates.row.high); |
2338 | --(candidates.row.high); |
2339 | } else { |
2340 | /* Independent row, drag nz to upper left */ |
2341 | mtx_swap_cols(mtx,nz.row,nz.col); |
2342 | mtx_drag( mtx , nz.row , sys->rng.low ); |
2343 | number_drag(pivots,nz.row,sys->rng.low); |
2344 | if( pivot < sys->smallest_pivot ) |
2345 | sys->smallest_pivot = pivot; |
2346 | ++(nz.row); |
2347 | } |
2348 | } |
2349 | |
2350 | sys->rank = candidates.row.high - sys->rng.low + 1; |
2351 | } |
2352 | |
2353 | |
2354 | #define KAA_DEBUG 0 |
2355 | #if KAA_DEBUG |
2356 | int ranki_entry(linsolqr_system_t sys,mtx_region_t *region) |
2357 | #else |
2358 | static int ranki_entry(linsolqr_system_t sys,mtx_region_t *region) |
2359 | #endif |
2360 | /** |
2361 | *** The region to factor is first isolated by truncating the region |
2362 | *** provided to the largest square region confined to the matrix diagonal. |
2363 | *** It is presumed it will contain no empty rows or columns and that it has |
2364 | *** been previously reordered using linsolqr_reorder(sys,region,spk1) |
2365 | *** or a sane variant of spk1. |
2366 | *** This is the entry point for all ranki based strategies, regardless of |
2367 | *** pivot selection subtleties. |
2368 | **/ |
2369 | { |
2370 | struct rhs_list *rl; |
2371 | double time; |
2372 | |
2373 | CHECK_SYSTEM(sys); |
2374 | if( sys->factored ) |
2375 | return 0; |
2376 | switch(sys->fmethod) { |
2377 | case ranki_kw: |
2378 | case ranki_jz: |
2379 | case ranki_ka: /* add new methods here */ |
2380 | break; |
2381 | default: |
2382 | return 1; |
2383 | } |
2384 | |
2385 | if(ISNULL(sys->ludata)) |
2386 | return 1; |
2387 | |
2388 | if( NOTNULL(sys->inverse) ) |
2389 | mtx_destroy(sys->inverse); |
2390 | sys->inverse=NULL; |
2391 | if( NOTNULL(sys->factors) ) |
2392 | mtx_destroy(sys->factors); |
2393 | if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys); |
2394 | else square_region(sys,region); |
2395 | |
2396 | sys->factors = mtx_copy_region(sys->coef,region); |
2397 | sys->rank = -1; |
2398 | sys->smallest_pivot = MAXDOUBLE; |
2399 | for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next ) |
2400 | rl->solved = FALSE; |
2401 | insure_capacity(sys); |
2402 | insure_lu_capacity(sys); |
2403 | |
2404 | time = tm_cpu_time(); |
2405 | switch(sys->fmethod) { |
2406 | case ranki_ka: |
2407 | case ranki_kw: |
2408 | rankikw_factor(sys); |
2409 | break; |
2410 | case ranki_jz: |
2411 | default: |
2412 | rankijz_factor(sys); |
2413 | } |
2414 | /* no longer done automatically as noone usually cares. |
2415 | calc_dependent_rows_ranki1(sys); |
2416 | calc_dependent_cols_ranki1(sys); |
2417 | */ |
2418 | sys->factored = TRUE; |
2419 | |
2420 | #undef KAA_DEBUG |
2421 | #if KAA_DEBUG |
2422 | time = tm_cpu_time() - time; |
2423 | FPRINTF(stderr,"Time for Inversion = %f\n",time); |
2424 | FPRINTF(stderr,"Non-zeros in Inverse = %d\n", |
2425 | mtx_nonzeros_in_region(sys->factors,region)); |
2426 | #endif |
2427 | return 0; |
2428 | } |
2429 | |
2430 | static int ranki_solve(linsolqr_system_t sys, struct rhs_list *rl) |
2431 | { |
2432 | backward_substitute(sys,rl->varvalue,rl->transpose); |
2433 | forward_substitute(sys,rl->varvalue,rl->transpose); |
2434 | zero_unpivoted_vars(sys,rl->varvalue,rl->transpose); |
2435 | return 0; |
2436 | } |
2437 | |
2438 | |
2439 | /* |
2440 | ********************************************************************* |
2441 | * Start of the 2 bodied factorization schemes. |
2442 | * |
2443 | * ranki2_entry will be used to access these routines. |
2444 | * there is now code called: |
2445 | * rankikw2_factor |
2446 | * rankijz2_factor |
2447 | * eliminate_row2 |
2448 | * ranki2_entry |
2449 | * forward_substitute2 |
2450 | * backward_substitute2 |
2451 | * ranki2_solve |
2452 | * |
2453 | * We have fixed the calc_col_dependent and calc_row_dependent |
2454 | * routines. |
2455 | ********************************************************************* |
2456 | */ |
2457 | |
2458 | extern int32 mtx_number_ops; |
2459 | |
2460 | /* this function does no permutation of any sort */ |
2461 | static |
2462 | void eliminate_row2(mtx_matrix_t mtx, |
2463 | mtx_matrix_t upper_mtx, |
2464 | mtx_range_t *rng, |
2465 | int32 row, /* row to eliminate */ |
2466 | real64 *tmp, /* temporary array */ |
2467 | real64 *pivots, /* prior pivots array */ |
2468 | real64 dtol /* drop tolerance */ |
2469 | ) |
2470 | { |
2471 | mtx_coord_t nz; |
2472 | int j,low,high; |
2473 | double tmpval; |
2474 | |
2475 | /* |
2476 | * Move all non-zeros from current row to full array. |
2477 | * The full array would have been initialized before, |
2478 | * we must put it back in the clean state when we leave. |
2479 | * All operations are done over mtx_ALL_COLS. |
2480 | * |
2481 | * Note: because of an addrow over mtx_ALL_COLS, entries |
2482 | * of tmp outside rng may have nonzeros put in them if |
2483 | * sys->factors has nonzeros in the outlying columns. |
2484 | * If enough of these pile up out there during elimination |
2485 | * of a block and the compiler treats double overflow as |
2486 | * a signalable error, we will have a floating point |
2487 | * exception. |
2488 | * Currently sys->factors is a copy of a region from |
2489 | * the sys->coef matrix, so we should not have anything |
2490 | * out there to pile up. If we make a variant which |
2491 | * does not copy the coef matrix but uses it directly, |
2492 | * this code needs to be revisited. |
2493 | */ |
2494 | mtx_steal_cur_row_vec(mtx,row,tmp,mtx_ALL_COLS); |
2495 | |
2496 | /* |
2497 | * Eliminates nzs from pivot, one by one, filling tmp with multipliers |
2498 | * We now operate on the entire row, since we are not writing the |
2499 | * multipliers back to the matrix. |
2500 | */ |
2501 | |
2502 | low = rng->low; |
2503 | high = rng->high; |
2504 | for (j = row-1 ; j >= low ; --(j) ) { |
2505 | if (tmp[j] == D_ZERO) |
2506 | continue; /* Nothing to do for this row */ |
2507 | tmpval = tmp[j]/pivots[j]; /* Compute multiplier */ |
2508 | |
2509 | /* |
2510 | * tmpval is now the multiplier. We use it to eliminate the row, |
2511 | * but backpatch it to tmp, *after* we do the elimination, as |
2512 | * mtx_cur_vec_add_row over all columns will stomp on tmp[j] |
2513 | */ |
2514 | mtx_cur_vec_add_row(mtx,tmp,j,-tmpval,mtx_ALL_COLS,FALSE); |
2515 | tmp[j] = tmpval; /* patch the diagonal */ |
2516 | } |
2517 | |
2518 | /* |
2519 | * Fill up the upper triangular matrix. |
2520 | * refill the range [low .. row-1]. Remember that we have |
2521 | * to zero all nnz's in tmp. |
2522 | */ |
2523 | nz.row = row; |
2524 | for (j=low;j<row;j++) { |
2525 | if (tmp[j] != D_ZERO) { |
2526 | nz.col = j; |
2527 | mtx_fill_value(upper_mtx,&nz,tmp[j]); |
2528 | #if RBADEBUG |
2529 | FPRINTF(gscr,"fillingU: or %d oc %d (cc %d) %24.18g\n", |
2530 | mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), nz.col, tmp[j]); |
2531 | #endif |
2532 | tmp[j] = D_ZERO; |
2533 | } |
2534 | } |
2535 | /* |
2536 | * refill the range [row .. high]. We *wont* keep |
2537 | * the diagonal as this is sorted out by the pivot |
2538 | * array, but here we must keep it, sigh. |
2539 | * At this stage we don't know that the diagonal is the |
2540 | * pivot, as that is determined after elimination done. |
2541 | * Outer loop must delete the ultimate pivot element. |
2542 | * Odds are good, however, that it is the current diagonal, |
2543 | * so put in the diagonal last. |
2544 | */ |
2545 | low = row; |
2546 | for (j=high;j>=low;j--) { |
2547 | if (tmp[j] != D_ZERO) { |
2548 | if (fabs(tmp[j]) > dtol) { |
2549 | nz.col = j; |
2550 | mtx_fill_value(mtx,&nz,tmp[j]); |
2551 | #if RBADEBUG |
2552 | FPRINTF(gscr,"fillingL: or %d oc %d %24.18g\n", |
2553 | mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), tmp[j]); |
2554 | #endif |
2555 | } |
2556 | tmp[j] = D_ZERO; /* zero element regardless */ |
2557 | } |
2558 | } |
2559 | tmp[row] = D_ZERO; |
2560 | } |
2561 | |
2562 | /* |
2563 | * This function is the same as rankikw_factor except |
2564 | * that it uses 2 matrices; one to store L and one for U. |
2565 | * As such it uses eliminate_row2 rather than eliminate_row. |
2566 | * Note there has been a change in the way ptol is used, |
2567 | * now selecting nearest passing, rather than taking max. |
2568 | * We assume there is NO incidence outside the region to be factored |
2569 | * in sys->factors. |
2570 | */ |
2571 | static void rankikw2_factor(linsolqr_system_t sys) |
2572 | { |
2573 | mtx_coord_t nz; |
2574 | int32 last_row, defect; |
2575 | mtx_range_t pivot_candidates; |
2576 | real64 *tmp; |
2577 | real64 pivot, *pivots; |
2578 | mtx_matrix_t mtx, upper_mtx; |
2579 | |
2580 | |
2581 | /** |
2582 | ** tmp and pivots are cur indexed, tmp by cols, pivots by rows. |
2583 | ** rather than allocating tmp every time, which can get expensive, |
2584 | ** we allocate it once *elsewhere* and zero it all here. |
2585 | ** Because we don't know the state of it on entry (due to exceptions, |
2586 | ** etc, the rezeroing is unavoidable. |
2587 | ** Eliminate_row2 is responsible for returning the working region |
2588 | ** zeroed. |
2589 | **/ |
2590 | |
2591 | tmp = sys->ludata->tmp; |
2592 | mtx_zero_real64(tmp,sys->capacity); |
2593 | pivots = sys->ludata->pivlist; |
2594 | mtx = sys->factors; |
2595 | upper_mtx = sys->inverse; |
2596 | defect = 0; |
2597 | #if RBADEBUG |
2598 | gscr = fopen("/usr1/ballan/tmp/lu/rkw","w+"); |
2599 | #endif |
2600 | |
2601 | sys->smallest_pivot = MAXDOUBLE; |
2602 | last_row = pivot_candidates.high = sys->rng.high; |
2603 | for( nz.row = sys->rng.low ; nz.row <= last_row ; ) { |
2604 | |
2605 | #if (RBADEBUG && 0) |
2606 | /* remove the && 0 and you better have a lot for free |
2607 | * disk space |
2608 | */ |
2609 | mtx_write_region_human_orgrows(gscr,mtx,mtx_ENTIRE_MATRIX); |
2610 | #endif |
2611 | |
2612 | pivot_candidates.low = nz.col = nz.row; |
2613 | pivots[nz.row]=pivot = mtx_value(mtx,&nz); |
2614 | pivot = fabs(pivot); |
2615 | if( pivot > sys->pivot_zero && |
2616 | pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) && |
2617 | !col_is_a_spike(sys,nz.row) ) { |
2618 | /* Good pivot and not a spike: continue with next row */ |
2619 | if( pivot < sys->smallest_pivot ) { |
2620 | sys->smallest_pivot = pivot; |
2621 | } |
2622 | #if RBADEBUG |
2623 | FPRINTF(gscr,"Cheap pivot col %d (org %d)\n", |
2624 | nz.row, mtx_col_to_org(mtx,nz.row)); |
2625 | if (nz.row ==179) { |
2626 | FPRINTF(stderr,"ben, stop here\n"); |
2627 | } |
2628 | #endif |
2629 | ++(nz.row); |
2630 | continue; |
2631 | } |
2632 | /* pivots for rows nz.row back to sys->rng->low are stored in pivots */ |
2633 | /** |
2634 | *** Row is a spike row or will |
2635 | *** be when a necessary column |
2636 | *** exchange occurs. |
2637 | **/ |
2638 | eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol); |
2639 | /* pivot will be leftmost of those that pass ptol and eps, or 0.0. */ |
2640 | if (defect) { |
2641 | /* unfortunately, can't use mtx_ALL_COLS in search now because we must |
2642 | ignore columns that were dragged out with previous singular rows. */ |
2643 | pivot = mtx_get_pivot_col(mtx,&nz,&pivot_candidates,&(pivots[nz.row]), |
2644 | sys->ptol,sys->pivot_zero); |
2645 | } else { |
2646 | /* no singular cols, and eliminate moved all left of diag crap out, so */ |
2647 | pivot = mtx_get_pivot_col(mtx,&nz,mtx_ALL_COLS,&(pivots[nz.row]), |
2648 | sys->ptol,sys->pivot_zero); |
2649 | } |
2650 | #if RBADEBUG |
2651 | FPRINTF(gscr,"Pivot column found %d (org %d)\n", |
2652 | nz.col, mtx_col_to_org(mtx,nz.col)); |
2653 | #endif |
2654 | if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */ |
2655 | /* |
2656 | * Dependent row, drag to the end. The upper_mtx is a slave |
2657 | * of mtx, and will be dragged automatically. |
2658 | */ |
2659 | mtx_drag(mtx, nz.row, last_row); |
2660 | number_drag(pivots, nz.row, last_row); |
2661 | --last_row; |
2662 | defect = 1; |
2663 | } else { |
2664 | /* Independent row: nz contains selected pivot */ |
2665 | mtx_swap_cols(mtx, nz.row, nz.col); /* this Fixes U as well */ |
2666 | /* Move pivot to diagonal */ |
2667 | mtx_drag(mtx, nz.row, sys->rng.low ); /* this Fix U as well */ |
2668 | number_drag(pivots, nz.row, sys->rng.low); |
2669 | if( pivot < sys->smallest_pivot ) |
2670 | sys->smallest_pivot = pivot; |
2671 | ++(nz.row); |
2672 | } |
2673 | } |
2674 | #if RBADEBUG |
2675 | /* get rid of this asap*/ |
2676 | { |
2677 | FILE *fp; |
2678 | int32 cc; |
2679 | |
2680 | fp = fopen("/usr1/ballan/tmp/lu/kw2p","w+"); |
2681 | FPRINTF(fp,"kw2 final pivot sequence:\n"); |
2682 | for (cc= sys->rng.low; cc <= last_row; cc++) { |
2683 | FPRINTF(fp,"orgrow,orgcol = %d,%d,%24.18g\n", |
2684 | mtx_row_to_org(mtx,cc), |
2685 | mtx_col_to_org(mtx,cc), pivots[cc]); |
2686 | } |
2687 | fclose(fp); |
2688 | } |
2689 | |
2690 | #endif |
2691 | |
2692 | zero_diagonal_elements(sys->factors,sys->rng.low,sys->rng.high); |
2693 | sys->rank = last_row - sys->rng.low + 1; |
2694 | |
2695 | #if RBADEBUG |
2696 | { |
2697 | FILE *fp; |
2698 | fp = fopen("/usr1/ballan/tmp/lu/kw2m","w+"); |
2699 | mtx_write_region_human_rows(fp,mtx,mtx_ENTIRE_MATRIX); |
2700 | fclose(fp); |
2701 | fp = fopen("/usr1/ballan/tmp/lu/kw2um","w+"); |
2702 | mtx_write_region_human_rows(fp,upper_mtx,mtx_ENTIRE_MATRIX); |
2703 | fclose(fp); |
2704 | } |
2705 | #endif |
2706 | #if RBADEBUG |
2707 | fclose(gscr); |
2708 | #endif |
2709 | } |
2710 | |
2711 | /* |
2712 | * This function is the same as rankijz_factor except |
2713 | * that it uses 2 matrices; one to store L and one for U. |
2714 | * As such it uses eliminate_row2 rather than eliminate_row. |
2715 | * |
2716 | * We are now using the slave matrix feature of the code, and |
2717 | * thus we all drag/swaps automatically affect the slave (upper_mtx) |
2718 | * as well. |
2719 | */ |
2720 | static void rankijz2_factor(linsolqr_system_t sys) |
2721 | { |
2722 | real64 biggest; |
2723 | mtx_coord_t nz, best; |
2724 | mtx_region_t candidates; |
2725 | real64 *tmp; |
2726 | real64 pivot, *pivots; |
2727 | mtx_matrix_t mtx, upper_mtx; |
2728 | /** |
2729 | ** tmp and pivots are cur indexed, tmp by cols, pivots by rows. |
2730 | ** rather than allocating tmp every time, which can get expensive, |
2731 | ** we allocate it once elsewhere and zero it all here. |
2732 | ** Because we don't know the state of it on entry (due to exceptions, |
2733 | ** etc), the rezeroing is unavoidable. |
2734 | ** Eliminate_row2 is responsible for returning the working region |
2735 | ** zeroed. |
2736 | **/ |
2737 | tmp = sys->ludata->tmp; |
2738 | mtx_zero_real64(tmp,sys->capacity); |
2739 | pivots=sys->ludata->pivlist; |
2740 | mtx = sys->factors; |
2741 | upper_mtx = sys->inverse; |
2742 | |
2743 | sys->smallest_pivot = MAXDOUBLE; |
2744 | candidates.row.high = sys->rng.high; |
2745 | candidates.col.high = sys->rng.high; |
2746 | for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) { |
2747 | nz.col = nz.row; |
2748 | pivots[nz.row] = pivot = mtx_value(mtx,&nz); |
2749 | pivot = fabs(pivot); |
2750 | candidates.row.low = nz.row; |
2751 | candidates.col.low = nz.row; |
2752 | if( !col_is_a_spike(sys,nz.row) ) { |
2753 | best.col = nz.row; |
2754 | mtx_col_max(mtx,&best,&(candidates.row),&biggest); |
2755 | /* make sure we aren't missing someone really nice in the column */ |
2756 | if( fabs(biggest) >= sys->pivot_zero ) { |
2757 | if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) { |
2758 | mtx_swap_rows(mtx,nz.row,best.row); /* Fixes U as well */ |
2759 | pivots[nz.row] = biggest; |
2760 | pivot = fabs(biggest); |
2761 | } |
2762 | if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot; |
2763 | ++(nz.row); |
2764 | continue; |
2765 | } |
2766 | } |
2767 | /* pivots for rows nz.row back to sys->rng->low are stored in pivots */ |
2768 | /** |
2769 | *** Row is a spike row or will |
2770 | *** be when a necessary column |
2771 | *** exchange occurs. |
2772 | **/ |
2773 | eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol); |
2774 | /* pivot will be largest of those available. get size and value */ |
2775 | pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row])); |
2776 | /* Move pivot to diagonal */ |
2777 | if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */ |
2778 | /* Dependent row, drag nz to lower right */ |
2779 | mtx_drag(mtx,nz.row,candidates.row.high); |
2780 | number_drag(pivots,nz.row,candidates.row.high); |
2781 | --(candidates.row.high); |
2782 | } else { |
2783 | /* Independent row, drag nz to upper left */ |
2784 | mtx_swap_cols(mtx,nz.row,nz.col); /* this Fixes U as well */ |
2785 | mtx_drag(mtx ,nz.row ,sys->rng.low); |
2786 | |
2787 | number_drag(pivots,nz.row,sys->rng.low); |
2788 | if( pivot < sys->smallest_pivot ) |
2789 | sys->smallest_pivot = pivot; |
2790 | ++(nz.row); |
2791 | } |
2792 | } |
2793 | zero_diagonal_elements(sys->factors,sys->rng.low,sys->rng.high); |
2794 | sys->rank = candidates.row.high - sys->rng.low + 1; |
2795 | } |
2796 | |
2797 | /* |
2798 | * See the comments attached to forward_substitute. |
2799 | * This code otherwise is the same. It uses a 2 bodied |
2800 | * matrix as well as making use of mtx_ALL_COLS and |
2801 | * mtx_ALL_ROWS whereever possible. |
2802 | * We make the following additional assumptions here: |
2803 | * (if they do not hold, do NOT use this function) |
2804 | * - sys->inverse (U) has no incidence that is not multipliers |
2805 | * (and the diagonal of 1s is NOT in sys->inverse.) |
2806 | * As of 10/95, this is how ranki2 U is constructed. |
2807 | * - sys->factors (L) has no incidence on the upper triangle, |
2808 | * including the diagonal, or outside the factored region. |
2809 | * relaxation: incidence anywhere allowed if value = 0.0 |
2810 | * since 0 doesn't contribute to a dot product |
2811 | * and the only thing we do with triangles is dot them. |
2812 | * - There may be singular rows and columns in the factorization, |
2813 | * but any additions coming from these rows/columns during |
2814 | * mtx_ALL_*O*S operations will not contribute to sums because the |
2815 | * user zeroed the arr entries corresponding to these before |
2816 | * calling this function. |
2817 | */ |
2818 | static |
2819 | void forward_substitute2(linsolqr_system_t sys, |
2820 | real64 *arr, |
2821 | boolean transpose) |
2822 | { |
2823 | mtx_coord_t nz; |
2824 | real64 sum, *pivlist; |
2825 | mtx_matrix_t mtx; |
2826 | int32 dotlim; |
2827 | boolean nonzero_found=FALSE; |
2828 | |
2829 | pivlist=sys->ludata->pivlist; |
2830 | dotlim = sys->rng.low+sys->rank; |
2831 | if (transpose) { /* arr is indexed by original column number */ |
2832 | mtx=sys->inverse; |
2833 | for( nz.col=sys->rng.low; nz.col < dotlim; ++(nz.col) ) { |
2834 | register int32 org_col; |
2835 | |
2836 | org_col = mtx_col_to_org(mtx,nz.col); |
2837 | if (arr[org_col]!=D_ZERO) nonzero_found=TRUE; |
2838 | if (nonzero_found) { |
2839 | sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,mtx_ALL_ROWS,TRUE); |
2840 | /* arr[org_col] = (arr[org_col] - sum) / D_ONE */; |
2841 | arr[org_col] -= sum; |
2842 | } |
2843 | } |
2844 | } else { /* arr is indexed by original row number */ |
2845 | mtx=sys->factors; |
2846 | for( nz.row=sys->rng.low; nz.row < dotlim; ++(nz.row) ) { |
2847 | register int32 org_row; |
2848 | |
2849 | org_row = mtx_row_to_org(mtx,nz.row); |
2850 | if (arr[org_row]!=D_ZERO) nonzero_found=TRUE; |
2851 | if (nonzero_found) { |
2852 | sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,mtx_ALL_COLS,TRUE); |
2853 | /* |
2854 | nz.col = nz.row; |
2855 | arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz); |
2856 | */ |
2857 | arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row]; |
2858 | } |
2859 | } |
2860 | } |
2861 | } |
2862 | |
2863 | /* |
2864 | * See the comments attached to backward_substitute and |
2865 | * forward_substitute2. |
2866 | * When solving for the transpose, then we are actually |
2867 | * running of the lower triangle, hence we use sys->factors. |
2868 | * Otherwise we use sys->inverse which stores U. |
2869 | */ |
2870 | static |
2871 | void backward_substitute2(linsolqr_system_t sys, |
2872 | real64 *arr, |
2873 | boolean transpose) |
2874 | { |
2875 | mtx_coord_t nz; |
2876 | real64 sum, *pivlist; |
2877 | mtx_matrix_t mtx; |
2878 | int32 dotlim; |
2879 | boolean nonzero_found=FALSE; /* once TRUE, substitution must be done |
2880 | over remaining rows/cols */ |
2881 | |
2882 | dotlim=sys->rng.low; |
2883 | pivlist=sys->ludata->pivlist; |
2884 | if (transpose) { /* arr is indexed by original column number */ |
2885 | mtx = sys->factors; |
2886 | for( nz.col = sys->rng.low+sys->rank-1; nz.col >= dotlim ; --(nz.col) ) { |
2887 | register int32 org_col; |
2888 | |
2889 | org_col = mtx_col_to_org(mtx,nz.col); |
2890 | if (arr[org_col] != D_ZERO) nonzero_found=TRUE; |
2891 | if (nonzero_found) { |
2892 | sum = mtx_col_dot_full_org_vec(mtx,nz.col,arr,mtx_ALL_ROWS,TRUE); |
2893 | arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col]; |
2894 | } |
2895 | } |
2896 | } else { /* arr is indexed by original row number */ |
2897 | /* we are working from the bottom up */ |
2898 | mtx = sys->inverse; |
2899 | for( nz.row = sys->rng.low+sys->rank-1; nz.row >= dotlim ; --(nz.row) ) { |
2900 | register int32 org_row; |
2901 | |
2902 | org_row = mtx_row_to_org(mtx,nz.row); |
2903 | if (arr[org_row]!=D_ZERO) nonzero_found=TRUE; |
2904 | if (nonzero_found) { |
2905 | sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,mtx_ALL_COLS,TRUE); |
2906 | arr[org_row] -= sum; |
2907 | } |
2908 | } |
2909 | } |
2910 | } |
2911 | |
2912 | static void calc_dependent_rows_ranki2(linsolqr_system_t sys) |
2913 | { |
2914 | mtx_coord_t nz; |
2915 | real64 value; |
2916 | mtx_range_t colrange; |
2917 | mtx_range_t rowrange; |
2918 | real64 *lc; |
2919 | mtx_matrix_t mtx; |
2920 | |
2921 | sys->rowdeps = TRUE; |
2922 | if( ( (sys->reg.row.low == sys->rng.low) && |
2923 | ( sys->reg.row.high == sys->rng.low+sys->rank-1 ) |
2924 | ) || sys->rank==0 ) |
2925 | return; |
2926 | |
2927 | lc = sys->ludata->tmp; |
2928 | colrange.low = sys->rng.low; |
2929 | colrange.high = colrange.low + sys->rank - 1; |
2930 | rowrange.low = sys->rng.high; |
2931 | rowrange.high = sys->rng.low+sys->rank; |
2932 | mtx=sys->factors; |
2933 | |
2934 | nz.row = sys->reg.row.low; |
2935 | for( ; nz.row <= sys->reg.row.high; nz.row++ ) { |
2936 | if( nz.row == sys->rng.low ) { |
2937 | nz.row = rowrange.high-1; |
2938 | continue; |
2939 | } |
2940 | mtx_zero_real64(lc,(sys->capacity)); |
2941 | /* must zero the whole thing to use the backward_substitute2 right */ |
2942 | mtx_org_row_vec(mtx,nz.row,lc,&colrange); |
2943 | if( nz.row < rowrange.high || nz.row > rowrange.low ) |
2944 | backward_substitute2(sys,lc,TRUE); |
2945 | forward_substitute2(sys,lc,TRUE); |
2946 | mtx_clear_row(mtx,nz.row,&colrange); |
2947 | for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) { |
2948 | value = lc[mtx_col_to_org(mtx,nz.col)]; |
2949 | if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value); |
2950 | } |
2951 | } |
2952 | } |
2953 | |
2954 | static void calc_dependent_cols_ranki2(linsolqr_system_t sys) |
2955 | { |
2956 | mtx_coord_t nz; |
2957 | real64 value; |
2958 | mtx_range_t rowrange; |
2959 | mtx_range_t colrange; |
2960 | real64 *lc; |
2961 | mtx_matrix_t mtx; |
2962 | |
2963 | sys->coldeps = TRUE; |
2964 | if( ( (sys->reg.col.low == sys->rng.low) && |
2965 | ( sys->reg.col.high == sys->rng.low+sys->rank-1 ) |
2966 | ) || sys->rank==0 ) |
2967 | return; |
2968 | |
2969 | lc = sys->ludata->tmp; |
2970 | rowrange.low = sys->rng.low; |
2971 | rowrange.high = rowrange.low + sys->rank - 1; |
2972 | colrange.high = sys->rng.low+sys->rank; |
2973 | colrange.low = sys->rng.high; |
2974 | mtx=sys->factors; |
2975 | |
2976 | nz.col = sys->reg.col.low; |
2977 | for( ; nz.col <= sys->reg.col.high; nz.col++ ) { |
2978 | if( nz.col == sys->rng.low ) { |
2979 | nz.col = colrange.high-1; |
2980 | continue; |
2981 | } |
2982 | mtx_zero_real64(lc,sys->capacity); |
2983 | mtx_org_col_vec(mtx,nz.col,lc,&rowrange); |
2984 | if( nz.col < colrange.high || nz.col > colrange.low ) |
2985 | backward_substitute2(sys,lc,FALSE); |
2986 | forward_substitute2(sys,lc,FALSE); |
2987 | mtx_clear_col(mtx,nz.col,&rowrange); |
2988 | for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) { |
2989 | value = lc[mtx_row_to_org(mtx,nz.row)]; |
2990 | if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value); |
2991 | } |
2992 | } |
2993 | } |
2994 | |
2995 | static int ranki2_solve(linsolqr_system_t sys, struct rhs_list *rl) |
2996 | { |
2997 | /* zero any unsolved for vars first so they don't contaminate |
2998 | mtx_ALL_*O*S dot products. |
2999 | */ |
3000 | zero_unpivoted_vars(sys,rl->varvalue,rl->transpose); |
3001 | backward_substitute2(sys,rl->varvalue,rl->transpose); |
3002 | forward_substitute2(sys,rl->varvalue,rl->transpose); |
3003 | return 0; |
3004 | } |
3005 | |
3006 | /* include rankiba2 implementation. Note that this is VERY VERY bad style |
3007 | and needs to be fixed immediately by splitting up the linsolqr file. |
3008 | This is our fastest (far and away) ranki implementation -- drag free. |
3009 | */ |
3010 | #if 1 |
3011 | #include "rankiba2.c" |
3012 | #endif |
3013 | |
3014 | #ifdef BUILD_KIRK_CODE |
3015 | /* |
3016 | * this forward declaration is necessary to keept the |
3017 | * system happy until we can organize this file |
3018 | */ |
3019 | |
3020 | static |
3021 | int kirk1_factor(linsolqr_system_t sys, |
3022 | mtx_region_t *A11, |
3023 | int kirk_method); |
3024 | |
3025 | #endif /* BUILD_KIRK_CODE */ |
3026 | |
3027 | /* |
3028 | * This is the entry point for all the ranki2_* schemes |
3029 | * which make use of a 2 bodied matrix. |
3030 | * it does not respond to ranki_kw and ranki_jz as if they |
3031 | * were calls for ranki_kw2 and ranki_jz2. with the new |
3032 | * factorization classes , there's really no call to be |
3033 | * sneaking around the header functions. |
3034 | * ranki_ka goes here as well. check sys->fmethod to see whos |
3035 | * expected to be in use. |
3036 | */ |
3037 | int ranki2_entry(linsolqr_system_t sys, mtx_region_t *region) |
3038 | { |
3039 | struct rhs_list *rl; |
3040 | double time; |
3041 | |
3042 | CHECK_SYSTEM(sys); |
3043 | if (sys->factored) return 0; |
3044 | switch(sys->fmethod) { |
3045 | case ranki_ba2: |
3046 | case ranki_kw2: |
3047 | case ranki_jz2: /* add new methods here */ |
3048 | #ifdef BUILD_KIRK_CODE |
3049 | case ranki_ka: |
3050 | #endif /* BUILD_KIRK_CODE */ |
3051 | break; |
3052 | default: |
3053 | return 1; |
3054 | } |
3055 | |
3056 | if (ISNULL(sys->ludata)) return 1; |
3057 | if (NOTNULL(sys->inverse)) mtx_destroy(sys->inverse); |
3058 | sys->inverse = NULL; |
3059 | if (NOTNULL(sys->factors)) mtx_destroy(sys->factors); |
3060 | if (region == mtx_ENTIRE_MATRIX) determine_pivot_range(sys); |
3061 | else square_region(sys,region); |
3062 | |
3063 | sys->factors = mtx_copy_region(sys->coef,region); |
3064 | sys->inverse = mtx_create_slave(sys->factors); |
3065 | sys->rank = -1; |
3066 | sys->smallest_pivot = MAXDOUBLE; |
3067 | for (rl = sys->rl ; NOTNULL(rl) ; rl = rl->next) |
3068 | rl->solved = FALSE; |
3069 | insure_capacity(sys); |
3070 | insure_lu_capacity(sys); |
3071 | |
3072 | time = tm_cpu_time(); |
3073 | switch(sys->fmethod) { |
3074 | case ranki_ba2: |
3075 | rankiba2_factor(sys); |
3076 | break; |
3077 | case ranki_kw2: |
3078 | rankikw2_factor(sys); |
3079 | break; |
3080 | case ranki_jz2: |
3081 | rankijz2_factor(sys); |
3082 | break; |
3083 | |
3084 | #ifdef BUILD_KIRK_CODE |
3085 | case ranki_ka: |
3086 | kirk1_factor(sys,region,2); |
3087 | break; |
3088 | #endif /* BUILD_KIRK_CODE */ |
3089 | |
3090 | default: |
3091 | return 1; |
3092 | } |
3093 | sys->factored = TRUE; |
3094 | |
3095 | #define KAA_DEBUG 1 |
3096 | #if KAA_DEBUG |
3097 | if (g_linsolqr_timing) { |
3098 | int anz; |
3099 | int fnz; |
3100 | anz = mtx_nonzeros_in_region(sys->coef,region); |
3101 | fnz = mtx_nonzeros_in_region(sys->factors,region) + |
3102 | mtx_nonzeros_in_region(sys->inverse,0); |
3103 | time = tm_cpu_time() - time; |
3104 | FPRINTF(stderr,"A-NNZ: %d Factor time: %f Fill %g\n", |
3105 | anz,time,( anz>0 ? (double)fnz/(double)anz : 0)); |
3106 | } |
3107 | #endif /* KAA_DEBUG */ |
3108 | #undef KAA_DEBUG |
3109 | return 0; |
3110 | } |
3111 | |
3112 | /***************************************************************************\ |
3113 | End of RANKI implementation functions. |
3114 | \***************************************************************************/ |
3115 | |
3116 | |
3117 | #ifdef BUILD_KIRK_CODE |
3118 | /* |
3119 | *************************************************************************** |
3120 | * Start of kirk_* routines |
3121 | * |
3122 | * kirk1_factor: |
3123 | * This routine is based on rankikw2_factor, except that it takes |
3124 | * an additional region, so as to do some *global* pivot restriction. |
3125 | * This is for the case of a single border for the entire matrix. |
3126 | * |
3127 | *************************************************************************** |
3128 | */ |
3129 | |
3130 | /* |
3131 | * This is just a dummy structure so as to get the size |
3132 | * correct. It is *exactly the same size as mtx_linklist, |
3133 | * until we can sort out the software engineering issues. |
3134 | */ |
3135 | struct dlinklist { |
3136 | int index; |
3137 | struct dlinklist *prev; |
3138 | }; |
3139 | |
3140 | static |
3141 | int kirk1_factor(linsolqr_system_t sys, |
3142 | mtx_region_t *A11, |
3143 | int kirk_method) |
3144 | { |
3145 | mtx_coord_t nz; |
3146 | int32 last_row; |
3147 | mtx_range_t pivot_candidates; |
3148 | real64 *tmp,*tmp_array_origin; |
3149 | real64 pivot, *pivots; |
3150 | int32 length; |
3151 | mtx_matrix_t mtx, upper_mtx; |
3152 | real64 maxa; |
3153 | |
3154 | int32 *inserted = NULL; /* stuff for the link list */ |
3155 | mem_store_t eltbuffer = NULL; |
3156 | |
3157 | #ifdef NOP_DEBUG |
3158 | mtx_number_ops = 0; |
3159 | #endif /* NOP_DEBUG */ |
3160 | |
3161 | length = sys->rng.high + 1; |
3162 | tmp_array_origin = length > 0 ? |
3163 | (real64 *)asccalloc( length,sizeof(real64) ) : NULL; |
3164 | tmp = tmp_array_origin; |
3165 | pivots=sys->ludata->pivlist; |
3166 | mtx = sys->factors; |
3167 | upper_mtx = sys->inverse; |
3168 | inserted = (int32 *)asccalloc(length,sizeof(int32)); |
3169 | eltbuffer = mem_create_store(2,256,sizeof(struct dlinklist), |
3170 | 2,256); |
3171 | |
3172 | sys->smallest_pivot = MAXDOUBLE; |
3173 | last_row = pivot_candidates.high = sys->rng.high; |
3174 | for( nz.row = sys->rng.low ; nz.row <= last_row ; ) { |
3175 | |
3176 | pivot_candidates.low = nz.col = nz.row; |
3177 | pivots[nz.row]=pivot = mtx_value(mtx,&nz); |
3178 | pivot = fabs(pivot); |
3179 | maxa = mtx_row_max(mtx,&nz,&pivot_candidates,NULL); |
3180 | if ((pivot > sys->pivot_zero) && (pivot >= sys->ptol*maxa) && |
3181 | !col_is_a_spike(sys,nz.row)) { |
3182 | if (pivot < sys->smallest_pivot) |
3183 | sys->smallest_pivot = pivot; |
3184 | ++(nz.row); |
3185 | continue; |
3186 | } |
3187 | /* pivots for rows nz.row back to sys->rng->low are stored in pivots */ |
3188 | /** |
3189 | *** Row is a spike row or will |
3190 | *** be when a necessary column |
3191 | *** exchange occurs. |
3192 | **/ |
3193 | if (kirk_method==1) |
3194 | eliminate_row2(mtx,upper_mtx,&(sys->rng),nz.row,tmp,pivots,sys->dtol); |
3195 | else{ |
3196 | mtx_eliminate_row2(mtx,upper_mtx,&(sys->rng), |
3197 | nz.row,tmp,pivots,inserted,eltbuffer); |
3198 | mem_clear_store(eltbuffer); |
3199 | } |
3200 | /* pivot will be largest of those available. get size and value */ |
3201 | pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row])); |
3202 | if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */ |
3203 | /* |
3204 | * Dependent row, drag to the end. The upper_mtx is a slave |
3205 | * of mtx, and will be dragged automatically. |
3206 | */ |
3207 | mtx_drag(mtx,nz.row,last_row); |
3208 | number_drag(pivots,nz.row,last_row); |
3209 | --last_row; |
3210 | #ifdef KAA_DEBUG |
3211 | FPRINTF(stderr,"Warning: Row %d is dependent with pivot %20.8g\n", |
3212 | nz.row,pivot); |
3213 | #endif /* KAA_DEBUG */ |
3214 | } else { |
3215 | /* Independent row: nz contains best pivot */ |
3216 | mtx_swap_cols(mtx,nz.row,nz.col); /* this Fixes U as well */ |