Parent Directory
|
Revision Log
Setting up web subdirectory in repository
1 | aw0a | 1 | /* |
2 | * linsol II: Ascend Sparse Linear Solvers | ||
3 | * by Benjamin Allan | ||
4 | * Created: 2/6/90 | ||
5 | * Version: $Revision: 1.26 $ | ||
6 | * Version control file: $RCSfile: linsolqr.c,v $ | ||
7 | * Date last modified: $Date: 2000/01/25 02:26:58 $ | ||
8 | * Last modified by: $Author: ballan $ | ||
9 | * | ||
10 | * This file is part of the SLV solver. | ||
11 | * | ||
12 | * Copyright (C) 1994 Benjamin Andrew Allan | ||
13 | * | ||
14 | * Based on linsol: | ||
15 | * Copyright (C) 1990 Karl Michael Westerberg | ||
16 | * Copyright (C) 1993 Joseph Zaher | ||
17 | * Copyright (C) 1994 Joseph Zaher, Benjamin Andrew Allan | ||
18 | * and on sqr.pas v1.5: | ||
19 | * Copyright (C) 1994 Boyd Safrit | ||
20 | * | ||
21 | * The SLV solver is free software; you can redistribute | ||
22 | * it and/or modify it under the terms of the GNU General Public License as | ||
23 | * published by the Free Software Foundation; either version 2 of the | ||
24 | * License, or (at your option) any later version. | ||
25 | * | ||
26 | * The SLV solver is distributed in hope that it will be | ||
27 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
29 | * General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with the program; if not, write to the Free Software Foundation, | ||
33 | * Inc., 675 Mass Ave, Cambridge, MA 02139 USA. Check the file named | ||
34 | * COPYING. COPYING is found in ../compiler. | ||
35 | */ | ||
36 | |||
37 | /* | ||
38 | * Implementation Notes: | ||
39 | * 9/95: there are many places in linsolqr.c where lower level matrix | ||
40 | * operations have been replaced by higher level ones. These used to | ||
41 | * be flagged as such so that people who wanted to construct newer | ||
42 | * operations from the lowest level mtx operators could see how to | ||
43 | * use the lowlevel operators. Since it has been decided that the old | ||
44 | * linsol will NOT be discarded completely (we like having easy | ||
45 | * benchmarks to look better than), the user wishing to code new | ||
46 | * operations is directed back to linsol to see how lowlevel operations | ||
47 | * can be expensively done. | ||
48 | * | ||
49 | * 10/95: (BAA) I attempted to optimize number_drag through use of | ||
50 | * pointer arithmetic. Testing the resulting code without optimization | ||
51 | * turned on gave about 20% improvement over the original on an alpha | ||
52 | * using the native compiler. However, with optimization on the array | ||
53 | * subscripted version which is left here won by ~40%. | ||
54 | * The results for gcc were similar: pointer 10% better unoptimized, | ||
55 | * subscripted 30% better when -O2. | ||
56 | * For both compilers -O3 was worse! gcc was ~1.8x slower than the | ||
57 | * native cc both optimized and not. | ||
58 | * I have not tested a loop unrolled drag, but I expect unrolling is | ||
59 | * exactly what the optimizer is doing, so why bother? | ||
60 | */ | ||
61 | |||
62 | #include <math.h> | ||
63 | #include <stdarg.h> | ||
64 | #include "utilities/ascConfig.h" | ||
65 | #include "utilities/ascMalloc.h" | ||
66 | #include "utilities/ascPanic.h" | ||
67 | #include "utilities/mem.h" | ||
68 | #include "utilities/set.h" | ||
69 | #include "general/tm_time.h" | ||
70 | #include "solver/mtx.h" | ||
71 | #include "solver/linsolqr.h" | ||
72 | |||
73 | #ifdef __WIN32__ | ||
74 | #define copysign _copysign | ||
75 | #endif /* __WIN32__ */ | ||
76 | |||
77 | #define LINSOLQR_DEBUG FALSE | ||
78 | /* qr code debugging flag */ | ||
79 | #define LINSOL_DEBUG FALSE | ||
80 | /* linsol debugging flag */ | ||
81 | |||
82 | #define D_ZERO (real64)0.0 | ||
83 | #define ZERO (int32)0 | ||
84 | #define D_ONE (real64)1.0 | ||
85 | |||
86 | /* housekeeping flags */ | ||
87 | #define BUILD_DEAD_CODE 0 | ||
88 | #undef BUILD_KIRK_CODE | ||
89 | |||
90 | |||
91 | #define RBADEBUG 0 | ||
92 | /* turns on spew that will generate output files | ||
93 | * comparable to and in the same location as for rankiba2 | ||
94 | * user specific hardcoded pathnames are involved. | ||
95 | * don't try this at home. | ||
96 | */ | ||
97 | #if RBADEBUG | ||
98 | extern FILE *gscr; | ||
99 | #endif | ||
100 | |||
101 | /* | ||
102 | * timing messages disable flag. | ||
103 | */ | ||
104 | int g_linsolqr_timing = 1; | ||
105 | |||
106 | /***************************************************************************\ | ||
107 | Data structures for linsol | ||
108 | Any time you modify these, please verify linsolqr_size is still correct. | ||
109 | \***************************************************************************/ | ||
110 | struct rhs_list { | ||
111 | real64 *rhs; /* Vector of rhs values */ | ||
112 | real64 *varvalue; /* Solution of the linear system */ | ||
113 | boolean solved; /* ? has the rhs been solved */ | ||
114 | boolean transpose; /* ? transpose the coef matrix */ | ||
115 | struct rhs_list *next; | ||
116 | }; | ||
117 | |||
118 | struct qr_fill_t { | ||
119 | int32 cnt; /* number of real nonzeros in a column */ | ||
120 | int32 col; /* column number prior to being sorted */ | ||
121 | int32 fill; /* fill if this (or if col?) were the pivot column */ | ||
122 | int32 overlap; /* scratch for fillable calculation. */ | ||
123 | }; | ||
124 | /* fill counting structure for sparse qr */ | ||
125 | |||
126 | struct lu_auxdata { /* all indexed by cur row or cur col number */ | ||
127 | real64 *pivlist; /* vector of pivots (diagonal of L for ranki) */ | ||
128 | real64 *tmp; /* row elimination, dependency buffer */ | ||
129 | int32 cap; /* current capacity of the vectors if not null */ | ||
130 | }; | ||
131 | /* structure for lu algorithms */ | ||
132 | |||
133 | struct qr_auxdata { /* all indexed by cur row or cur col number */ | ||
134 | mtx_matrix_t hhvects; /* slave matrix of sys->factors for holding u's */ | ||
135 | mtx_sparse_t sp; /* sparse data space */ | ||
136 | real64 *alpha; /* column norms for the Q\R factor matrix */ | ||
137 | real64 *sigma; /* column norms for the S (R inverse) matrix */ | ||
138 | real64 *tau; /* column tau of the Householder transforms */ | ||
139 | real64 *hhcol; /* Householder transform scratch vector */ | ||
140 | real64 *hhrow; /* Householder transform scratch vector */ | ||
141 | real64 nu; /* current condition number of inverse */ | ||
142 | real64 anu; /* condition number of coef */ | ||
143 | real64 asubnu; /* condition number of active region in coef */ | ||
144 | struct qr_fill_t *fill; /* fill counting structure */ | ||
145 | mtx_region_t facreg; /* region over which qr is done if nonsquare */ | ||
146 | int32 cap; /* current capacity of these vectors if not null */ | ||
147 | }; | ||
148 | /* structure for CondQR algorithms */ | ||
149 | |||
150 | struct linsolqr_header { | ||
151 | int integrity; | ||
152 | enum factor_class fclass; /* Type of factoring expected */ | ||
153 | enum factor_method fmethod; /* Method factoring expected */ | ||
154 | enum reorder_method rmethod; /* Type of most recent reordering */ | ||
155 | int32 capacity; /* Capacity of arrays */ | ||
156 | mtx_matrix_t coef; /* Coefficient matrix */ | ||
157 | struct rhs_list *rl; /* List of rhs vectors */ | ||
158 | int rlength; /* Length of rhs list */ | ||
159 | real64 pivot_zero; /* Smallest acceptable pivot */ | ||
160 | real64 ptol; /* Pivot selection tolerance */ | ||
161 | real64 ctol; /* Condition selection tolerance */ | ||
162 | real64 dtol; /* Matrix entry drop tolerance */ | ||
163 | mtx_matrix_t factors; /* Matrix with UL and dependence info (ranki), | ||
164 | or L and dependence info (ranki2), | ||
165 | or R and dependence info (qr methods) */ | ||
166 | mtx_matrix_t inverse; /* Matrix with U multipliers (ranki2) or R^-1 | ||
167 | (condqr), in both cases slave of factors */ | ||
168 | boolean factored; /* ? has the matrix been factored */ | ||
169 | boolean rowdeps; /* ? have row dependencies been calc'ed */ | ||
170 | boolean coldeps; /* ? have col dependencies been calc'ed */ | ||
171 | mtx_range_t rng; /* Pivot range */ | ||
172 | mtx_region_t reg; /* Bounding region given */ | ||
173 | int32 rank; /* Rank of the matrix */ | ||
174 | real64 smallest_pivot; /* Smallest pivot accepted */ | ||
175 | struct qr_auxdata *qrdata; /* Data vectors for qr methods */ | ||
176 | struct lu_auxdata *ludata; /* Data vectors for lu methods */ | ||
177 | }; | ||
178 | /* linsol main structure */ | ||
179 | |||
180 | /***************************************************************************\ | ||
181 | string functions for linsol io | ||
182 | \***************************************************************************/ | ||
183 | |||
184 | char *linsolqr_rmethods() { | ||
185 | static char names[]="Natural, SPK1, TSPK1"; | ||
186 | return names; | ||
187 | } | ||
188 | |||
189 | char *linsolqr_fmethods() { | ||
190 | static char names[] = | ||
191 | "SPK1/RANKI,SPK1/RANKI+ROW,Fast-SPK1/RANKI,Fast-SPK1/RANKI+ROW,Fastest-SPK1/MR-RANKI,CondQR,CPQR"; | ||
192 | return names; | ||
193 | } | ||
194 | |||
195 | /** **/ | ||
196 | enum reorder_method linsolqr_rmethod_to_enum(char *name) { | ||
197 | if (strcmp(name,"SPK1")==0) return spk1; | ||
198 | if (strcmp(name,"TSPK1")==0) return tspk1; | ||
199 | if (strcmp(name,"Natural")==0) return natural; | ||
200 | return unknown_r; | ||
201 | } | ||
202 | |||
203 | enum factor_method linsolqr_fmethod_to_enum(char *name) { | ||
204 | if (strcmp(name,"KIRK-STUFF")==0) return ranki_ka; | ||
205 | if (strcmp(name,"SPK1/RANKI")==0) return ranki_kw; | ||
206 | if (strcmp(name,"SPK1/RANKI+ROW")==0) return ranki_jz; | ||
207 | if (strcmp(name,"Fast-SPK1/RANKI")==0) return ranki_kw2; | ||
208 | if (strcmp(name,"Fast-SPK1/RANKI+ROW")==0) return ranki_jz2; | ||
209 | if (strcmp(name,"Fastest-SPK1/MR-RANKI")==0) return ranki_ba2; | ||
210 | if (strcmp(name,"CondQR")==0) return cond_qr; | ||
211 | if (strcmp(name,"CPQR")==0) return plain_qr; | ||
212 | return unknown_f; | ||
213 | } | ||
214 | |||
215 | enum factor_class linsolqr_fmethod_to_fclass(enum factor_method fm) | ||
216 | { | ||
217 | switch (fm) { | ||
218 | /* ranki-like things */ | ||
219 | case ranki_kw: | ||
220 | case ranki_jz: | ||
221 | case ranki_ba2: | ||
222 | case ranki_kw2: | ||
223 | case ranki_jz2: | ||
224 | case ranki_ka: | ||
225 | return ranki; | ||
226 | /* implemented qr things */ | ||
227 | case plain_qr: | ||
228 | return s_qr; | ||
229 | /* other stuff unimplemented. all fall through */ | ||
230 | case cond_qr: | ||
231 | case opt_qr: | ||
232 | case ls_qr: | ||
233 | case gauss_ba2: | ||
234 | case symmetric_lu: | ||
235 | case unknown_f: | ||
236 | default: | ||
237 | return unknown_c; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | /** **/ | ||
242 | char *linsolqr_enum_to_rmethod(enum reorder_method meth) { | ||
243 | switch (meth) { | ||
244 | case spk1: return "SPK1"; | ||
245 | case tspk1: return "TSPK1"; | ||
246 | case natural: return "Natural"; | ||
247 | default: return "Unknown reordering method."; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | char *linsolqr_enum_to_fmethod(enum factor_method meth) { | ||
252 | switch (meth) { | ||
253 | case ranki_kw: return "SPK1/RANKI"; | ||
254 | case ranki_jz: return "SPK1/RANKI+ROW"; | ||
255 | case ranki_ba2: return "Fastest-SPK1/MR-RANKI"; | ||
256 | case ranki_kw2: return "Fast-SPK1/RANKI"; | ||
257 | case ranki_jz2: return "Fast-SPK1/RANKI+ROW"; | ||
258 | case ranki_ka: return "KIRK-STUFF"; | ||
259 | case cond_qr: return "CondQR"; | ||
260 | case plain_qr: return "CPQR"; | ||
261 | default: return "Unknown factorization method."; | ||
262 | } | ||
263 | } | ||
264 | |||
265 | /** **/ | ||
266 | char *linsolqr_rmethod_description(enum reorder_method meth) { | ||
267 | static char sspk1[]="SPK1 reordering ala Stadtherr."; | ||
268 | static char stspk1[]="SPK1 reordering column wise ala Stadtherr."; | ||
269 | static char nat[]="Ordering as received from user"; | ||
270 | switch (meth) { | ||
271 | case spk1: return sspk1; | ||
272 | case tspk1: return stspk1; | ||
273 | case natural: return nat; | ||
274 | default: return "Unknown reordering method."; | ||
275 | } | ||
276 | } | ||
277 | |||
278 | char *linsolqr_fmethod_description(enum factor_method meth) { | ||
279 | static char ex_ranki[] = | ||
280 | "SPK1 reordering with RANKI LU factoring ala Stadtherr."; | ||
281 | static char ex_rankib[] = | ||
282 | "SPK1 reordering with RANKI LU factoring ala Stadtherr, but fast."; | ||
283 | static char ex_rankij[]="SPK1/RANKI LU with pseudo-complete pivoting"; | ||
284 | static char ex_rankik[]="KIRK-STUFF/RANKI LU with pseudo-complete pivoting"; | ||
285 | static char ex_condqr[]="Sparse QR with condition controlled pivoting."; | ||
286 | static char ex_plainqr[]="Sparse QR with column pivoting."; | ||
287 | switch (meth) { | ||
288 | case ranki_kw: return ex_ranki; | ||
289 | case ranki_kw2: return ex_ranki; | ||
290 | case ranki_ba2: return ex_rankib; | ||
291 | case ranki_jz: return ex_rankij; | ||
292 | case ranki_jz2: return ex_rankij; | ||
293 | case ranki_ka: return ex_rankik; | ||
294 | case cond_qr: return ex_condqr; | ||
295 | case plain_qr: return ex_plainqr; | ||
296 | default: return "Unknown factorization method."; | ||
297 | } | ||
298 | } | ||
299 | |||
300 | /***************************************************************************\ | ||
301 | internal check routines | ||
302 | \***************************************************************************/ | ||
303 | #define OK ((int)439828676) | ||
304 | #define DESTROYED ((int)839276847) | ||
305 | #define CHECK_SYSTEM(a) check_system((a),__FILE__,__LINE__) | ||
306 | static int check_system(linsolqr_system_t sys, char *file, int line) | ||
307 | /** | ||
308 | *** Checks the system handle. Return 0 if ok, 1 otherwise. | ||
309 | **/ | ||
310 | { | ||
311 | if( ISNULL(sys) ) { | ||
312 | FPRINTF(stderr,"ERROR: (linsolqr) check_system\n"); | ||
313 | FPRINTF(stderr," NULL system handle.\n"); | ||
314 | return 1; | ||
315 | } | ||
316 | |||
317 | switch( sys->integrity ) { | ||
318 | case OK: | ||
319 | return 0; | ||
320 | case DESTROYED: | ||
321 | FPRINTF(stderr,"ERROR: (linsolqr) check_system, line %d\n",line); | ||
322 | FPRINTF(stderr," System was recently destroyed.\n"); | ||
323 | break; | ||
324 | default: | ||
325 | FPRINTF(stderr,"ERROR: (linsolqr) check_system, line %d\n",line); | ||
326 | FPRINTF(stderr," System was reused or never created.\n"); | ||
327 | break; | ||
328 | } | ||
329 | return 1; | ||
330 | } | ||
331 | |||
332 | /***************************************************************************\ | ||
333 | external calls (and some of their internals) | ||
334 | \***************************************************************************/ | ||
335 | static void destroy_rhs_list(struct rhs_list *rl) | ||
336 | /** | ||
337 | *** Destroys rhs list. | ||
338 | **/ | ||
339 | { | ||
340 | while( NOTNULL(rl) ) { | ||
341 | struct rhs_list *p; | ||
342 | p = rl; | ||
343 | rl = rl->next; | ||
344 | if( NOTNULL(p->varvalue) ) | ||
345 | ascfree( (POINTER)(p->varvalue) ); | ||
346 | ascfree( (POINTER)p ); | ||
347 | } | ||
348 | } | ||
349 | |||
350 | static struct rhs_list *find_rhs(struct rhs_list *rl,real64 *rhs) | ||
351 | /** | ||
352 | *** Searches for rhs in rhs list, returning it or NULL if not found. | ||
353 | **/ | ||
354 | { | ||
355 | for( ; NOTNULL(rl) ; rl = rl->next ) | ||
356 | if( rl->rhs == rhs ) | ||
357 | break; | ||
358 | return(rl); | ||
359 | } | ||
360 | |||
361 | static struct lu_auxdata *create_ludata() | ||
362 | /** | ||
363 | *** Creates an empty zeroed ludata struct. | ||
364 | *** Filled up by insure_lu_capacity. | ||
365 | **/ | ||
366 | { | ||
367 | struct lu_auxdata *d; | ||
368 | d=(struct lu_auxdata *)asccalloc(1,sizeof(struct lu_auxdata)); | ||
369 | return d; | ||
370 | } | ||
371 | |||
372 | static struct qr_auxdata *create_qrdata() | ||
373 | /** | ||
374 | *** Creates an empty zeroed qrdata struct. | ||
375 | *** Filled up by insure_qr_capacity. | ||
376 | **/ | ||
377 | { | ||
378 | struct qr_auxdata *d; | ||
379 | d=(struct qr_auxdata *)asccalloc(1,sizeof(struct qr_auxdata)); | ||
380 | return d; | ||
381 | } | ||
382 | |||
383 | static void destroy_ludata(struct lu_auxdata *d) { | ||
384 | /** | ||
385 | *** Conditionally destroys a ludata struct and its parts. | ||
386 | **/ | ||
387 | if (NOTNULL(d)) { | ||
388 | if (NOTNULL(d->pivlist)) ascfree(d->pivlist); | ||
389 | d->pivlist=NULL; | ||
390 | if (NOTNULL(d->tmp)) ascfree(d->tmp); | ||
391 | d->tmp=NULL; | ||
392 | ascfree(d); | ||
393 | } | ||
394 | } | ||
395 | |||
396 | static void destroy_qrdata(struct qr_auxdata *d) { | ||
397 | /** | ||
398 | *** Conditionally destroys a qrdata struct and its parts. | ||
399 | **/ | ||
400 | if (NOTNULL(d)) { | ||
401 | if (NOTNULL(d->alpha)) ascfree(d->alpha); | ||
402 | if (NOTNULL(d->sigma)) ascfree(d->sigma); | ||
403 | if (NOTNULL(d->tau)) ascfree(d->tau); | ||
404 | if (NOTNULL(d->hhcol)) ascfree(d->hhcol); | ||
405 | if (NOTNULL(d->hhrow)) ascfree(d->hhrow); | ||
406 | if (NOTNULL(d->fill)) ascfree(d->fill); | ||
407 | |||
408 | ascfree(d); | ||
409 | } | ||
410 | } | ||
411 | |||
412 | linsolqr_system_t linsolqr_create() | ||
413 | { | ||
414 | linsolqr_system_t sys; | ||
415 | |||
416 | sys = (linsolqr_system_t)ascmalloc( sizeof(struct linsolqr_header) ); | ||
417 | sys->fclass = unknown_c; | ||
418 | sys->fmethod = unknown_f; | ||
419 | sys->rmethod = unknown_r; | ||
420 | sys->integrity = OK; | ||
421 | sys->capacity = 0; | ||
422 | sys->coef = NULL; | ||
423 | sys->rl = NULL; | ||
424 | sys->rlength = 0; | ||
425 | sys->pivot_zero = 1e-12; /* default value */ | ||
426 | sys->ptol = 0.1; /* default value */ | ||
427 | sys->ctol = 0.1; /* default value */ | ||
428 | sys->dtol = LINQR_DROP_TOLERANCE; /* default value */ | ||
429 | sys->factors = NULL; | ||
430 | sys->inverse = NULL; | ||
431 | sys->factored = FALSE; | ||
432 | sys->rowdeps = FALSE; | ||
433 | sys->coldeps = FALSE; | ||
434 | sys->rng.low = sys->rng.high = -1; | ||
435 | sys->reg.row.low = sys->reg.row.high = -1; | ||
436 | sys->reg.col.low = sys->reg.col.high = -1; | ||
437 | sys->rank = -1; | ||
438 | sys->smallest_pivot = MAXDOUBLE; | ||
439 | sys->qrdata = NULL; | ||
440 | sys->ludata = NULL; | ||
441 | return(sys); | ||
442 | } | ||
443 | |||
444 | void linsolqr_destroy(linsolqr_system_t sys) | ||
445 | { | ||
446 | if(CHECK_SYSTEM(sys)) { | ||
447 | FPRINTF(stderr,"Bad linsolqr_system_t found. Not destroyed.\n"); | ||
448 | return; | ||
449 | } | ||
450 | if( NOTNULL(sys->coef) ) { | ||
451 | FPRINTF(stderr,"Notice: (linsolqr) non-NULL coefficient matrix\n"); | ||
452 | FPRINTF(stderr," in linsolqr system not being\n"); | ||
453 | FPRINTF(stderr," destroyed with linsolqr_system_t.\n"); | ||
454 | } | ||
455 | if( NOTNULL(sys->inverse) ) | ||
456 | mtx_destroy(sys->inverse); | ||
457 | if( NOTNULL(sys->factors) ) | ||
458 | mtx_destroy(sys->factors); | ||
459 | destroy_rhs_list(sys->rl); | ||
460 | destroy_qrdata(sys->qrdata); | ||
461 | destroy_ludata(sys->ludata); | ||
462 | sys->integrity = DESTROYED; | ||
463 | ascfree( (POINTER)sys ); | ||
464 | } | ||
465 | |||
466 | void linsolqr_set_matrix(linsolqr_system_t sys,mtx_matrix_t mtx) | ||
467 | { | ||
468 | if(CHECK_SYSTEM(sys)) { | ||
469 | FPRINTF(stderr,"Bad linsolqr_system_t found. coef mtx not set.\n"); | ||
470 | return; | ||
471 | } | ||
472 | sys->coef = mtx; | ||
473 | } | ||
474 | |||
475 | |||
476 | void linsolqr_set_region(linsolqr_system_t sys,mtx_region_t region) | ||
477 | { | ||
478 | if(CHECK_SYSTEM(sys)) { | ||
479 | FPRINTF(stderr,"Bad linsolqr_system_t found. coef mtx not set.\n"); | ||
480 | return; | ||
481 | } | ||
482 | sys->reg = region; | ||
483 | } | ||
484 | |||
485 | |||
486 | mtx_matrix_t linsolqr_get_matrix(linsolqr_system_t sys) | ||
487 | { | ||
488 | CHECK_SYSTEM(sys); | ||
489 | return( sys->coef ); | ||
490 | } | ||
491 | |||
492 | mtx_region_t *linsolqr_get_region(linsolqr_system_t sys) | ||
493 | { | ||
494 | return &(sys->reg); | ||
495 | } | ||
496 | |||
497 | void linsolqr_add_rhs(linsolqr_system_t sys, | ||
498 | real64 *rhs, | ||
499 | boolean transpose) | ||
500 | { | ||
501 | struct rhs_list *rl; | ||
502 | |||
503 | if(CHECK_SYSTEM(sys)) { | ||
504 | FPRINTF(stderr,"Bad linsolqr_system_t found. rhs not added.\n"); | ||
505 | return; | ||
506 | } | ||
507 | rl = find_rhs(sys->rl,rhs); | ||
508 | if( NOTNULL(rl) ) { | ||
509 | return; | ||
510 | } else { | ||
511 | if( NOTNULL(rhs) ) { | ||
512 | rl = (struct rhs_list *)ascmalloc( sizeof(struct rhs_list) ); | ||
513 | rl->rhs = rhs; | ||
514 | rl->varvalue = NULL; | ||
515 | rl->solved = FALSE; | ||
516 | rl->transpose = transpose; | ||
517 | rl->next = sys->rl; | ||
518 | sys->rl = rl; | ||
519 | ++(sys->rlength); | ||
520 | if (sys->capacity>0) { | ||
521 | rl->varvalue = | ||
522 | (real64 *)ascmalloc(sys->capacity*sizeof(real64)); | ||
523 | } | ||
524 | } | ||
525 | } | ||
526 | } | ||
527 | |||
528 | void linsolqr_remove_rhs(linsolqr_system_t sys,real64 *rhs) | ||
529 | { | ||
530 | struct rhs_list **q; | ||
531 | |||
532 | if(CHECK_SYSTEM(sys)) { | ||
533 | FPRINTF(stderr,"Bad linsolqr_system_t found. rhs not removed.\n"); | ||
534 | return; | ||
535 | } | ||
536 | for( q = &(sys->rl) ; NOTNULL(*q) ; q = &((*q)->next) ) | ||
537 | if( (*q)->rhs == rhs ) | ||
538 | break; | ||
539 | if( NOTNULL(*q) ) { | ||
540 | struct rhs_list *p; | ||
541 | p = *q; | ||
542 | *q = p->next; | ||
543 | if( NOTNULL(p->varvalue) ) | ||
544 | ascfree( (POINTER)(p->varvalue) ); | ||
545 | ascfree( (POINTER)p ); | ||
546 | --(sys->rlength); | ||
547 | } else if( NOTNULL(rhs) ) { | ||
548 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_remove_rhs\n"); | ||
549 | FPRINTF(stderr," Rhs does not exist.\n"); | ||
550 | } | ||
551 | } | ||
552 | |||
553 | int32 linsolqr_number_of_rhs(linsolqr_system_t sys) | ||
554 | { | ||
555 | CHECK_SYSTEM(sys); | ||
556 | return( sys->rlength ); | ||
557 | } | ||
558 | |||
559 | real64 *linsolqr_get_rhs(linsolqr_system_t sys,int n) | ||
560 | { | ||
561 | struct rhs_list *rl; | ||
562 | int count; | ||
563 | |||
564 | CHECK_SYSTEM(sys); | ||
565 | |||
566 | count = sys->rlength - 1 - n; | ||
567 | if( count < 0 ) return(NULL); | ||
568 | for( rl = sys->rl ; count > 0 && NOTNULL(rl) ; rl = rl->next ) | ||
569 | --count; | ||
570 | return( ISNULL(rl) ? NULL : rl->rhs ); | ||
571 | } | ||
572 | |||
573 | void linsolqr_matrix_was_changed(linsolqr_system_t sys) | ||
574 | { | ||
575 | if(CHECK_SYSTEM(sys)) { | ||
576 | FPRINTF(stderr, | ||
577 | "Bad linsolqr_system_t found. matrix change message ignored.\n"); | ||
578 | return; | ||
579 | } | ||
580 | sys->rowdeps = sys->coldeps = sys->factored = FALSE; | ||
581 | } | ||
582 | |||
583 | void linsolqr_rhs_was_changed(linsolqr_system_t sys, real64 *rhs) | ||
584 | { | ||
585 | struct rhs_list *rl; | ||
586 | |||
587 | if(CHECK_SYSTEM(sys)) { | ||
588 | FPRINTF(stderr,"Bad linsolqr_system_t found. rhs change ignored.\n"); | ||
589 | return; | ||
590 | } | ||
591 | rl = find_rhs(sys->rl,rhs); | ||
592 | if( NOTNULL(rl) ) { | ||
593 | rl->solved = FALSE; | ||
594 | } else if( NOTNULL(rhs) ) { | ||
595 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_rhs_was_modified\n"); | ||
596 | FPRINTF(stderr," Rhs does not exist.\n"); | ||
597 | } | ||
598 | } | ||
599 | |||
600 | void linsolqr_set_pivot_zero(linsolqr_system_t sys,real64 pivot_zero) | ||
601 | { | ||
602 | if(CHECK_SYSTEM(sys)) { | ||
603 | FPRINTF(stderr,"Bad linsolqr_system_t found. set_pivot_zero ignored.\n"); | ||
604 | return; | ||
605 | } | ||
606 | if( pivot_zero < D_ZERO ) { | ||
607 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_pivot_zero\n"); | ||
608 | FPRINTF(stderr," Invalid pivot zero of %g\n",pivot_zero); | ||
609 | } else { | ||
610 | sys->pivot_zero = pivot_zero; | ||
611 | linsolqr_matrix_was_changed(sys); | ||
612 | } | ||
613 | } | ||
614 | |||
615 | real64 linsolqr_pivot_zero(linsolqr_system_t sys) | ||
616 | { | ||
617 | CHECK_SYSTEM(sys); | ||
618 | return( sys->pivot_zero ); | ||
619 | } | ||
620 | |||
621 | void linsolqr_set_pivot_tolerance(linsolqr_system_t sys, real64 ptol) | ||
622 | { | ||
623 | if(CHECK_SYSTEM(sys)) { | ||
624 | FPRINTF(stderr,"Bad linsolqr_system_t found. set_pivot_tol ignored.\n"); | ||
625 | return; | ||
626 | } | ||
627 | if( ptol < D_ZERO || ptol > D_ONE ) { | ||
628 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_pivot_tolerance\n"); | ||
629 | FPRINTF(stderr," Invalid pivot tolerance of %g\n",ptol); | ||
630 | } else { | ||
631 | sys->ptol = ptol; | ||
632 | linsolqr_matrix_was_changed(sys); | ||
633 | } | ||
634 | } | ||
635 | |||
636 | void linsolqr_set_condition_tolerance(linsolqr_system_t sys, real64 ctol) | ||
637 | { | ||
638 | if(CHECK_SYSTEM(sys)) { | ||
639 | FPRINTF(stderr, | ||
640 | "Bad linsolqr_system_t found. set_condition_tolerance ignored.\n"); | ||
641 | return; | ||
642 | } | ||
643 | if( ctol < D_ZERO || ctol > D_ONE ) { | ||
644 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_condition_tolerance\n"); | ||
645 | FPRINTF(stderr," Invalid condition tolerance of %g\n",ctol); | ||
646 | } else { | ||
647 | sys->ctol = ctol; | ||
648 | linsolqr_matrix_was_changed(sys); | ||
649 | } | ||
650 | } | ||
651 | |||
652 | void linsolqr_set_drop_tolerance(linsolqr_system_t sys, real64 dtol) | ||
653 | { | ||
654 | if(CHECK_SYSTEM(sys)) { | ||
655 | FPRINTF(stderr, | ||
656 | "Bad linsolqr_system_t found. set_drop_tolerance ignored.\n"); | ||
657 | return; | ||
658 | } | ||
659 | if( dtol < D_ZERO || dtol > D_ONE ) { | ||
660 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_set_drop_tolerance\n"); | ||
661 | FPRINTF(stderr," Invalid drop tolerance of %g\n",dtol); | ||
662 | } else { | ||
663 | sys->dtol = dtol; | ||
664 | linsolqr_matrix_was_changed(sys); | ||
665 | } | ||
666 | } | ||
667 | |||
668 | real64 linsolqr_pivot_tolerance(linsolqr_system_t sys) | ||
669 | { | ||
670 | CHECK_SYSTEM(sys); | ||
671 | return( sys->ptol ); | ||
672 | } | ||
673 | |||
674 | real64 linsolqr_condition_tolerance(linsolqr_system_t sys) | ||
675 | { | ||
676 | CHECK_SYSTEM(sys); | ||
677 | return( sys->ctol ); | ||
678 | } | ||
679 | |||
680 | real64 linsolqr_drop_tolerance(linsolqr_system_t sys) | ||
681 | { | ||
682 | CHECK_SYSTEM(sys); | ||
683 | return( sys->dtol ); | ||
684 | } | ||
685 | |||
686 | extern enum factor_class linsolqr_fclass(linsolqr_system_t sys) | ||
687 | { | ||
688 | CHECK_SYSTEM(sys); | ||
689 | return( sys->fclass ); | ||
690 | } | ||
691 | extern enum factor_method linsolqr_fmethod(linsolqr_system_t sys) | ||
692 | { | ||
693 | CHECK_SYSTEM(sys); | ||
694 | return( sys->fmethod ); | ||
695 | } | ||
696 | extern enum reorder_method linsolqr_rmethod(linsolqr_system_t sys) | ||
697 | { | ||
698 | CHECK_SYSTEM(sys); | ||
699 | return( sys->rmethod ); | ||
700 | } | ||
701 | |||
702 | int32 linsolqr_rank(linsolqr_system_t sys) | ||
703 | { | ||
704 | CHECK_SYSTEM(sys); | ||
705 | if( !sys->factored ) { | ||
706 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_rank\n"); | ||
707 | FPRINTF(stderr," System not factored yet.\n"); | ||
708 | } | ||
709 | return(sys->rank); | ||
710 | } | ||
711 | |||
712 | real64 linsolqr_smallest_pivot(linsolqr_system_t sys) | ||
713 | { | ||
714 | CHECK_SYSTEM(sys); | ||
715 | #if LINSOL_DEBUG | ||
716 | if( !sys->factored ) { | ||
717 | FPRINTF(stderr,"ERROR: (linsolqr) linsolqr_smallest_pivot\n"); | ||
718 | FPRINTF(stderr," System not factored yet.\n"); | ||
719 | } | ||
720 | #endif | ||
721 | return(sys->smallest_pivot); | ||
722 | } | ||
723 | |||
724 | /***************************************************************************\ | ||
725 | Commonly used internal functions for sparse linear solvers based on mtx. | ||
726 | void insure_capacity(sys) | ||
727 | void insure_lu_capacity(sys) | ||
728 | void insure_qr_capacity(sys) | ||
729 | void forward_substitute(sys,rvec,transpose) | ||
730 | void backward_substitute(sys,rvec,transpose) | ||
731 | macro SQR(x) | ||
732 | int find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi) | ||
733 | \***************************************************************************/ | ||
734 | |||
735 | static real64 *raise_capacity(real64 *vec, | ||
736 | int32 oldcap, | ||
737 | int32 newcap) | ||
738 | /** | ||
739 | *** Raises the capacity of the array and returns a new array. | ||
740 | *** It is assumed that oldcap < newcap. vec is destroyed or | ||
741 | *** returned as appropriate. | ||
742 | *** If !NDEBUG, the vector expanded is also set to 0. | ||
743 | **/ | ||
744 | { | ||
745 | real64 *newvec=NULL; | ||
746 | int i; | ||
747 | if (newcap < oldcap) { | ||
748 | #ifndef NDEBUG | ||
749 | for (i = 0; i < newcap; i++) { | ||
750 | newvec[i] = 0.0; | ||
751 | } | ||
752 | #endif | ||
753 | return vec; | ||
754 | } | ||
755 | if (NOTNULL(vec)) { | ||
756 | /* don't call realloc on null with newcap 0 or it frees */ | ||
757 | newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64))); | ||
758 | } else { | ||
759 | newvec=(newcap > 0 ? | ||
760 | (real64 *)ascmalloc( newcap * sizeof(real64) ) : NULL); | ||
761 | } | ||
762 | #ifndef NDEBUG | ||
763 | for (i = 0; i < newcap; i++) { | ||
764 | newvec[i] = 0.0; | ||
765 | } | ||
766 | #endif | ||
767 | return newvec; | ||
768 | } | ||
769 | |||
770 | static int32 *raise_qr_int_capacity(int32 *vec, | ||
771 | int32 oldcap, | ||
772 | int32 newcap) | ||
773 | /** | ||
774 | *** Raises the capacity of the index array and returns a new array. | ||
775 | *** It is assumed that oldcap < newcap. vec is destroyed or | ||
776 | *** returned as appropriate. vec returned is zeroed. | ||
777 | *** calling with newcap=0 does not force deallocation. | ||
778 | **/ | ||
779 | { | ||
780 | int32 *newvec=NULL; | ||
781 | if (newcap < oldcap) | ||
782 | return vec; | ||
783 | if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */ | ||
784 | newvec=(int32 *)ascrealloc(vec,(newcap * sizeof(int32))); | ||
785 | mtx_zero_int32(vec,newcap); | ||
786 | } else { | ||
787 | newvec= (newcap > 0 ? | ||
788 | (int32 *)asccalloc( newcap , sizeof(int32) ) : NULL); | ||
789 | } | ||
790 | return newvec; | ||
791 | } | ||
792 | |||
793 | static real64 *raise_qr_real_capacity(real64 *vec, | ||
794 | int32 oldcap, | ||
795 | int32 newcap) | ||
796 | /** | ||
797 | *** Raises the capacity of the real array and returns a new array. | ||
798 | *** It is assumed that oldcap < newcap. vec is destroyed or | ||
799 | *** returned as appropriate. vec returned is zeroed. | ||
800 | *** calling with newcap=0 does not force deallocation. | ||
801 | **/ | ||
802 | { | ||
803 | real64 *newvec=NULL; | ||
804 | if (newcap < oldcap) | ||
805 | return vec; | ||
806 | if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */ | ||
807 | newvec=(real64 *)ascrealloc(vec,(newcap * sizeof(real64))); | ||
808 | mtx_zero_real64(vec,newcap); | ||
809 | } else { | ||
810 | newvec= (newcap > 0 ? | ||
811 | (real64 *)asccalloc( newcap , sizeof(real64) ) : NULL); | ||
812 | } | ||
813 | return newvec; | ||
814 | } | ||
815 | |||
816 | static struct qr_fill_t *raise_qr_fill_capacity(struct qr_fill_t *vec, | ||
817 | int32 oldcap, | ||
818 | int32 newcap) | ||
819 | /** | ||
820 | *** Raises the capacity of the fill array and returns a new array. | ||
821 | *** It is assumed that oldcap < newcap. vec is destroyed or | ||
822 | *** returned as appropriate. vec returned is zeroed. | ||
823 | *** calling with newcap=0 does not force deallocation. | ||
824 | **/ | ||
825 | { | ||
826 | struct qr_fill_t *newvec=NULL; | ||
827 | if (newcap < oldcap) | ||
828 | return vec; | ||
829 | if (NOTNULL(vec)) {/* don't call realloc on null with newcap 0 or it frees */ | ||
830 | newvec=(struct qr_fill_t *) | ||
831 | ascrealloc(vec,(newcap * sizeof(struct qr_fill_t))); | ||
832 | mtx_zero_char((char *)vec,newcap*sizeof(struct qr_fill_t)); | ||
833 | } else { | ||
834 | newvec= (newcap > 0 ? | ||
835 | (struct qr_fill_t *)asccalloc( newcap , sizeof(struct qr_fill_t) ) : | ||
836 | NULL); | ||
837 | } | ||
838 | return newvec; | ||
839 | } | ||
840 | |||
841 | static void insure_capacity(linsolqr_system_t sys) | ||
842 | /** | ||
843 | *** Insures that the capacity of all of the solution vectors | ||
844 | *** for each rhs is large enough. | ||
845 | *** The above implies a malloc if varvalue is null. | ||
846 | *** Assumes varvalue are at sys->capacity already. | ||
847 | **/ | ||
848 | { | ||
849 | int32 req_cap; | ||
850 | req_cap = mtx_capacity(sys->coef); | ||
851 | |||
852 | if( req_cap > sys->capacity ) { | ||
853 | struct rhs_list *rl; | ||
854 | |||
855 | for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next ) | ||
856 | rl->varvalue = raise_capacity(rl->varvalue,sys->capacity,req_cap); | ||
857 | sys->capacity = req_cap; | ||
858 | } | ||
859 | } | ||
860 | |||
861 | static void insure_lu_capacity(linsolqr_system_t sys) | ||
862 | /** | ||
863 | *** Insures that the capacity of all of the ludata vectors. | ||
864 | *** The above implies a malloc if vector is null. | ||
865 | *** If not null, implies an extension if needed. | ||
866 | **/ | ||
867 | { | ||
868 | int32 req_cap; | ||
869 | if (!sys || !(sys->coef) || !(sys->ludata)) { | ||
870 | FPRINTF(stderr,"linsolqr: insure_lu_capacity called with NULL pointer"); | ||
871 | return; | ||
872 | } | ||
873 | req_cap = mtx_capacity(sys->coef); | ||
874 | |||
875 | if( req_cap > sys->ludata->cap ) { | ||
876 | sys->ludata->pivlist = | ||
877 | raise_capacity(sys->ludata->pivlist,sys->ludata->cap,req_cap); | ||
878 | sys->ludata->tmp = | ||
879 | raise_capacity(sys->ludata->tmp,sys->ludata->cap,req_cap); | ||
880 | sys->ludata->cap = req_cap; | ||
881 | } | ||
882 | } | ||
883 | |||
884 | static void insure_qr_capacity(linsolqr_system_t sys) | ||
885 | /** | ||
886 | *** Insures that the capacity of all of the qrdata vectors | ||
887 | *** is large enough. | ||
888 | *** The above implies a calloc if vector is null. | ||
889 | *** If not null, implies an extension and zeroing of the vector. | ||
890 | *** Also zeroes the simple elements of the qrdata. | ||
891 | **/ | ||
892 | { | ||
893 | int32 req_cap; | ||
894 | if (!sys || !(sys->coef) || !(sys->qrdata)) { | ||
895 | FPRINTF(stderr,"linsolqr: insure_qr_capacity called with NULL pointer"); | ||
896 | return; | ||
897 | } | ||
898 | req_cap = mtx_capacity(sys->coef); | ||
899 | |||
900 | if( req_cap > sys->qrdata->cap ) { | ||
901 | sys->qrdata->alpha = | ||
902 | raise_qr_real_capacity(sys->qrdata->alpha,sys->qrdata->cap,req_cap); | ||
903 | sys->qrdata->sigma = | ||
904 | raise_qr_real_capacity(sys->qrdata->sigma,sys->qrdata->cap,req_cap); | ||
905 | sys->qrdata->tau = | ||
906 | raise_qr_real_capacity(sys->qrdata->tau,sys->qrdata->cap,req_cap); | ||
907 | sys->qrdata->hhcol = | ||
908 | raise_qr_real_capacity(sys->qrdata->hhcol,sys->qrdata->cap,req_cap); | ||
909 | sys->qrdata->hhrow = | ||
910 | raise_qr_real_capacity(sys->qrdata->hhrow,sys->qrdata->cap,req_cap); | ||
911 | |||
912 | sys->qrdata->sp.data = | ||
913 | raise_qr_real_capacity(sys->qrdata->sp.data,sys->qrdata->cap,req_cap); | ||
914 | sys->qrdata->sp.idata = | ||
915 | raise_qr_int_capacity(sys->qrdata->sp.idata,sys->qrdata->cap,req_cap); | ||
916 | |||
917 | sys->qrdata->fill = | ||
918 | raise_qr_fill_capacity(sys->qrdata->fill,sys->qrdata->cap,req_cap); | ||
919 | |||
920 | sys->qrdata->cap = req_cap; | ||
921 | sys->qrdata->sp.cap = req_cap; | ||
922 | sys->qrdata->sp.len = 0; | ||
923 | } | ||
924 | sys->qrdata->nu=D_ZERO; | ||
925 | sys->qrdata->anu=D_ZERO; | ||
926 | sys->qrdata->asubnu=D_ZERO; | ||
927 | } | ||
928 | |||
929 | static void forward_substitute(linsolqr_system_t sys, | ||
930 | real64 *arr, | ||
931 | boolean transpose) | ||
932 | /** | ||
933 | *** Forward substitute. It is assumed that the L (or U) part of | ||
934 | *** sys->factors is computed. This function converts c to x in place. The | ||
935 | *** values are stored in arr indexed by original row number (or original | ||
936 | *** column number). | ||
937 | *** | ||
938 | *** transpose = FALSE: transpose = TRUE: | ||
939 | *** T | ||
940 | *** L x = c U x = c | ||
941 | *** | ||
942 | *** The following formula holds: | ||
943 | *** 0<=k<r ==> x(k) = [c(k) - L(k,(0..k-1)) dot x(0..k-1)] / L(k,k) | ||
944 | *** or | ||
945 | *** 0<=k<r ==> x(k) = [c(k) - U((0..k-1),k) dot x(0..k-1)] / U(k,k) | ||
946 | *** | ||
947 | *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k] | ||
948 | *** and in matrix. | ||
949 | **/ | ||
950 | { | ||
951 | mtx_range_t dot_rng; | ||
952 | mtx_coord_t nz; | ||
953 | real64 sum, *pivlist; | ||
954 | mtx_matrix_t mtx; | ||
955 | int32 dotlim; | ||
956 | boolean nonzero_found=FALSE; | ||
957 | |||
958 | mtx=sys->factors; | ||
959 | pivlist=sys->ludata->pivlist; | ||
960 | dot_rng.low = sys->rng.low; | ||
961 | dotlim=dot_rng.low+sys->rank; | ||
962 | if (transpose) { /* arr is indexed by original column number */ | ||
963 | for( nz.col=dot_rng.low; nz.col < dotlim; ++(nz.col) ) { | ||
964 | register int32 org_col; | ||
965 | |||
966 | dot_rng.high = nz.col - 1; | ||
967 | org_col = mtx_col_to_org(mtx,nz.col); | ||
968 | if (arr[org_col]!=D_ZERO) nonzero_found=TRUE; | ||
969 | if (nonzero_found) { | ||
970 | sum=mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE); | ||
971 | /* arr[org_col] = (arr[org_col] - sum) / D_ONE */; | ||
972 | arr[org_col] -=sum; | ||
973 | } | ||
974 | } | ||
975 | |||
976 | } else { /* arr is indexed by original row number */ | ||
977 | for( nz.row=dot_rng.low; nz.row < dotlim; ++(nz.row) ) { | ||
978 | register int32 org_row; | ||
979 | |||
980 | dot_rng.high = nz.row - 1; | ||
981 | org_row = mtx_row_to_org(mtx,nz.row); | ||
982 | if (arr[org_row]!=D_ZERO) nonzero_found=TRUE; | ||
983 | if (nonzero_found) { | ||
984 | sum = mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE); | ||
985 | /* | ||
986 | nz.col = nz.row; | ||
987 | arr[org_row] = (arr[org_row] - sum) / mtx_value(mtx,&nz); | ||
988 | */ | ||
989 | arr[org_row] = (arr[org_row] - sum) / pivlist[nz.row]; | ||
990 | } | ||
991 | } | ||
992 | } | ||
993 | } | ||
994 | |||
995 | static void backward_substitute(linsolqr_system_t sys, | ||
996 | real64 *arr, | ||
997 | boolean transpose) | ||
998 | /** | ||
999 | *** Backward substitute. It is assumed that the U (or L) part of | ||
1000 | *** sys->factors is computed. This function converts rhs to c in place. The | ||
1001 | *** values are stored in arr indexed by original row number (or original | ||
1002 | *** column number). | ||
1003 | *** | ||
1004 | *** transpose = FALSE: transpose = TRUE: | ||
1005 | *** T | ||
1006 | *** U c = rhs L c = rhs | ||
1007 | *** | ||
1008 | *** The following formula holds: | ||
1009 | *** transpose=FALSE: (the usual for J.dx=-f where rhs is -f) | ||
1010 | *** 0<=k<r ==> c(k) = [rhs(k) - U(k,(k+1..r-1)) dot c(k+1..r-1)] / U(k,k) | ||
1011 | *** working up from the bottom. | ||
1012 | *** or | ||
1013 | *** transpose=TRUE: | ||
1014 | *** 0<=k<r ==> c(k) = [rhs(k) - L((k+1..r-1),k) dot c(k+1..r-1)] / L(k,k) | ||
1015 | *** working right to left. | ||
1016 | *** | ||
1017 | *** U(k,k) is 1 by construction. L(k,k) is stored in sys->ludata->pivlist[k] | ||
1018 | *** and in matrix. | ||
1019 | **/ | ||
1020 | { | ||
1021 | mtx_range_t dot_rng; | ||
1022 | mtx_coord_t nz; | ||
1023 | real64 sum, *pivlist; | ||
1024 | mtx_matrix_t mtx; | ||
1025 | int32 dotlim; | ||
1026 | boolean nonzero_found=FALSE; /* once TRUE, substitution must be done | ||
1027 | over remaining rows/cols */ | ||
1028 | |||
1029 | dot_rng.high = sys->rng.low + sys->rank - 1; | ||
1030 | dotlim=sys->rng.low; | ||
1031 | mtx=sys->factors; | ||
1032 | pivlist=sys->ludata->pivlist; | ||
1033 | if (transpose) { /* arr is indexed by original column number */ | ||
1034 | for( nz.col = dot_rng.high ; nz.col >= dotlim ; --(nz.col) ) { | ||
1035 | register int32 org_col; | ||
1036 | |||
1037 | dot_rng.low = nz.col + 1; | ||
1038 | |||
1039 | org_col = mtx_col_to_org(mtx,nz.col); | ||
1040 | if (arr[org_col]!=D_ZERO) nonzero_found=TRUE; | ||
1041 | if (nonzero_found) { | ||
1042 | sum= mtx_col_dot_full_org_vec(mtx,nz.col,arr,&dot_rng,TRUE); | ||
1043 | /* | ||
1044 | reminders: | ||
1045 | nz.row = nz.col; | ||
1046 | arr[org_col] = (arr[org_col] - sum) / mtx_value(mtx,&nz); | ||
1047 | */ | ||
1048 | arr[org_col] = (arr[org_col] - sum) / pivlist[nz.col]; | ||
1049 | } | ||
1050 | } | ||
1051 | } else { /* arr is indexed by original row number */ | ||
1052 | /* we are working from the bottom up */ | ||
1053 | for( nz.row = dot_rng.high ; nz.row >= dotlim ; --(nz.row) ) { | ||
1054 | register int32 org_row; | ||
1055 | |||
1056 | dot_rng.low = nz.row + 1; | ||
1057 | org_row = mtx_row_to_org(mtx,nz.row); | ||
1058 | |||
1059 | if (arr[org_row]!=D_ZERO) nonzero_found=TRUE; | ||
1060 | if (nonzero_found) { | ||
1061 | sum= mtx_row_dot_full_org_vec(mtx,nz.row,arr,&dot_rng,TRUE); | ||
1062 | /* | ||
1063 | reminders: | ||
1064 | nz.row = nz.col; | ||
1065 | arr[org_row] = (arr[org_row] - sum) / D_ONE; | ||
1066 | */ | ||
1067 | arr[org_row] -= sum; | ||
1068 | } | ||
1069 | } | ||
1070 | } | ||
1071 | } | ||
1072 | |||
1073 | #ifdef SQR | ||
1074 | #undef SQR | ||
1075 | #endif | ||
1076 | #define SQR(x) ((x)*(x)) | ||
1077 | /* | ||
1078 | * int32 find_pivot_number(vec,len,tol,eps,ivec,rvec,maxi); | ||
1079 | * real64 *vec,*rvec,tol; | ||
1080 | * int32 len, *ivec, *maxi; | ||
1081 | * | ||
1082 | * Search array vec of positive numbers for the first entry, k, which passes | ||
1083 | * (vec[k] >= tol*vecmax) where vecmax is the largest value in vec. | ||
1084 | * vec is an array len long. rvec, ivec are (worst case) len long. | ||
1085 | * *maxi will be set to the index of the first occurence of the largest | ||
1086 | * number in vec which is >= eps. | ||
1087 | * 0.0 <= tol <= 1. | ||
1088 | * If tol <=0, no search is done (*maxi = 0, k = 0). | ||
1089 | * Eps is an absolute number which vec values must be >= to before | ||
1090 | * being eligible for the tol test. | ||
1091 | * | ||
1092 | * Best case, highly nonlinear feature: | ||
1093 | * if on entry *maxi = len, we will do a simplified search for the | ||
1094 | * first k which satisfies tol and eps tests, based on the value | ||
1095 | * stored at vec[len-1]. | ||
1096 | * | ||
1097 | * We could allocate and deallocate rvec/ivec since they are | ||
1098 | * temporaries, but it is more efficient for caller to manage that. | ||
1099 | * If tol>=1.0, rvec and ivec are ignored and may be NULL. | ||
1100 | * NOTE: we are going on value vec[i], not fabs(vec[i]). | ||
1101 | * Returns k. | ||
1102 | * Remember: GIGO. | ||
1103 | * Failure modes- if all numbers are <= 0.0, k and *maxi will be returned 0. | ||
1104 | * Failure modes- if all numbers are < eps, k and *maxi will be returned 0. | ||
1105 | * | ||
1106 | * find_pivot_index, should we implement, would search an int vector. | ||
1107 | * | ||
1108 | * Hint: if you know you have not increased any values in vec and have not | ||
1109 | * changed the value at vec[maxi] before a subsequent search, then call | ||
1110 | * with len = previous maxi+1 to reduce the search time. | ||
1111 | */ | ||
1112 | static int32 find_pivot_number(const real64 *vec, | ||
1113 | const int32 len, | ||
1114 | const real64 tol, | ||
1115 | const real64 eps, | ||
1116 | int32 * const ivec, | ||
1117 | real64 * const rvec, | ||
1118 | int32 * const maxi) | ||
1119 | { | ||
1120 | int32 j,k; | ||
1121 | if (tol <= D_ZERO ) { | ||
1122 | *maxi = 0; | ||
1123 | return 0; | ||
1124 | } | ||
1125 | if (tol >= D_ONE) { | ||
1126 | register real64 biggest = MAX(eps,D_ZERO); | ||
1127 | /* get the biggest, period */ | ||
1128 | k = 0; | ||
1129 | for (j=0; j < len; j++) { | ||
1130 | if (vec[j] > biggest) { | ||
1131 | biggest = vec[j]; | ||
1132 | k = j; | ||
1133 | } | ||
1134 | } | ||
1135 | *maxi = k; | ||
1136 | } else { | ||
1137 | int32 bigi; | ||
1138 | bigi = 0; | ||
1139 | rvec[0] = eps; | ||
1140 | ivec[0] = 0; | ||
1141 | if (*maxi==len) { | ||
1142 | /* cheap search against eps and tol, no list. */ | ||
1143 | register real64 thold; | ||
1144 | thold = tol * vec[len-1]; | ||
1145 | if (thold >= eps) { | ||
1146 | for (k = 0; k < len && vec[k] < thold; k++); | ||
1147 | } else { | ||
1148 | for (k = 0; k < len && vec[k] < eps; k++); | ||
1149 | } | ||
1150 | /* adjust maxi to still point at last location, as indicated by len */ | ||
1151 | (*maxi)--; | ||
1152 | } else { | ||
1153 | real64 rlast; | ||
1154 | rlast = eps; | ||
1155 | /* create short list */ | ||
1156 | for (j=0; j < len; j++) { | ||
1157 | while (j < len && vec[j] <= rlast) j++; /* skip to next max */ | ||
1158 | if (j < len) { | ||
1159 | ivec[bigi] = j; | ||
1160 | rvec[bigi] = vec[j]; | ||
1161 | bigi++; /* bigi ends up being len of rvec or, if vec all<= eps, 0 */ | ||
1162 | rlast = rvec[bigi-1]; | ||
1163 | } | ||
1164 | } | ||
1165 | if (bigi == 0) { | ||
1166 | *maxi = k = 0; | ||
1167 | } else { | ||
1168 | register real64 thold; | ||
1169 | /* Note: if bigi is enormous, we should do a binary search, | ||
1170 | not linear. */ | ||
1171 | *maxi = ivec[bigi-1]; | ||
1172 | thold = tol * rvec[bigi-1]; | ||
1173 | /* get pivot from shortlist, searching backward */ | ||
1174 | if (thold >= eps) { | ||
1175 | for (k = bigi-1; k >= 0 && rvec[k] >= thold; k--); | ||
1176 | } else { | ||
1177 | for (k = bigi-1; k >= 0 && rvec[k] >= eps; k--); | ||
1178 | } | ||
1179 | /* translate pivot to vec index */ | ||
1180 | k = ivec[k+1]; | ||
1181 | } | ||
1182 | } | ||
1183 | } | ||
1184 | return k; | ||
1185 | } | ||
1186 | |||
1187 | static void calc_dependent_rows_ranki1(linsolqr_system_t sys) | ||
1188 | { | ||
1189 | mtx_coord_t nz; | ||
1190 | real64 value; | ||
1191 | mtx_range_t colrange; | ||
1192 | mtx_range_t rowrange; | ||
1193 | real64 *lc; | ||
1194 | mtx_matrix_t mtx; | ||
1195 | |||
1196 | sys->rowdeps = TRUE; | ||
1197 | if( ( (sys->reg.row.low == sys->rng.low) && | ||
1198 | ( sys->reg.row.high == sys->rng.low+sys->rank-1 ) | ||
1199 | ) || sys->rank==0 ) | ||
1200 | return; | ||
1201 | |||
1202 | lc = sys->ludata->tmp; | ||
1203 | colrange.low = sys->rng.low; | ||
1204 | colrange.high = colrange.low + sys->rank - 1; | ||
1205 | rowrange.low = sys->rng.high; | ||
1206 | rowrange.high = sys->rng.low+sys->rank; | ||
1207 | mtx=sys->factors; | ||
1208 | |||
1209 | nz.row = sys->reg.row.low; | ||
1210 | for( ; nz.row <= sys->reg.row.high; nz.row++ ) { | ||
1211 | if( nz.row == sys->rng.low ) { | ||
1212 | nz.row = rowrange.high-1; | ||
1213 | continue; | ||
1214 | } | ||
1215 | mtx_zero_real64(lc,(sys->capacity)); | ||
1216 | mtx_org_row_vec(mtx,nz.row,lc,&colrange); | ||
1217 | if( nz.row < rowrange.high || nz.row > rowrange.low ) | ||
1218 | backward_substitute(sys,lc,TRUE); | ||
1219 | forward_substitute(sys,lc,TRUE); | ||
1220 | mtx_clear_row(mtx,nz.row,&colrange); | ||
1221 | for( nz.col=colrange.low; nz.col <= colrange.high; nz.col++ ) { | ||
1222 | value = lc[mtx_col_to_org(mtx,nz.col)]; | ||
1223 | if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value); | ||
1224 | } | ||
1225 | } | ||
1226 | } | ||
1227 | |||
1228 | |||
1229 | static void calc_dependent_cols_ranki1(linsolqr_system_t sys) | ||
1230 | { | ||
1231 | mtx_coord_t nz; | ||
1232 | real64 value; | ||
1233 | mtx_range_t rowrange; | ||
1234 | mtx_range_t colrange; | ||
1235 | real64 *lc; | ||
1236 | mtx_matrix_t mtx; | ||
1237 | |||
1238 | sys->coldeps = TRUE; | ||
1239 | if( ( (sys->reg.col.low == sys->rng.low) && | ||
1240 | ( sys->reg.col.high == sys->rng.low+sys->rank-1 ) | ||
1241 | ) || sys->rank==0 ) | ||
1242 | return; | ||
1243 | |||
1244 | lc = sys->ludata->tmp; | ||
1245 | rowrange.low = sys->rng.low; | ||
1246 | rowrange.high = rowrange.low + sys->rank - 1; | ||
1247 | colrange.high = sys->rng.low+sys->rank; | ||
1248 | colrange.low = sys->rng.high; | ||
1249 | mtx=sys->factors; | ||
1250 | |||
1251 | nz.col = sys->reg.col.low; | ||
1252 | for( ; nz.col <= sys->reg.col.high; nz.col++ ) { | ||
1253 | if( nz.col == sys->rng.low ) { | ||
1254 | nz.col = colrange.high-1; | ||
1255 | continue; | ||
1256 | } | ||
1257 | mtx_zero_real64(lc,sys->capacity); | ||
1258 | mtx_org_col_vec(mtx,nz.col,lc,&rowrange); | ||
1259 | if( nz.col < colrange.high || nz.col > colrange.low ) | ||
1260 | backward_substitute(sys,lc,FALSE); | ||
1261 | forward_substitute(sys,lc,FALSE); | ||
1262 | mtx_clear_col(mtx,nz.col,&rowrange); | ||
1263 | for( nz.row=rowrange.low; nz.row <= rowrange.high; nz.row++ ) { | ||
1264 | value = lc[mtx_row_to_org(mtx,nz.row)]; | ||
1265 | if( value != D_ZERO ) mtx_fill_value(mtx,&nz,value); | ||
1266 | } | ||
1267 | } | ||
1268 | } | ||
1269 | |||
1270 | /** | ||
1271 | *** Given a matrix and a diagonal range, sets the diagonal elements to 0 | ||
1272 | *** but does not delete them in the range low to high inclusive. | ||
1273 | *** worst cost= k*(nonzeros in rows of range treated). | ||
1274 | *** likely cost= k*(nonzeros in column pivoted rows of range treated) | ||
1275 | *** since you can smartly put in the diagonal last and find | ||
1276 | *** it first on many occasions. | ||
1277 | *** This is a good bit cheaper than deleting the elements unless one | ||
1278 | *** expects thousands of solves. | ||
1279 | **/ | ||
1280 | static void zero_diagonal_elements(mtx_matrix_t mtx, | ||
1281 | int32 low, int32 high) | ||
1282 | { | ||
1283 | mtx_coord_t nz; | ||
1284 | for (nz.row = low; nz.row <= high; nz.row++) { | ||
1285 | nz.col = nz.row; | ||
1286 | mtx_set_value(mtx,&nz,0.0); | ||
1287 | } | ||
1288 | } | ||
1289 | |||
1290 | static void zero_unpivoted_vars(linsolqr_system_t sys, | ||
1291 | real64 *varvalues, | ||
1292 | boolean transpose) | ||
1293 | /** | ||
1294 | *** Sets the values of unpivoted variables to zero. | ||
1295 | **/ | ||
1296 | { | ||
1297 | int32 ndx,order; | ||
1298 | mtx_matrix_t mtx; | ||
1299 | |||
1300 | mtx = sys->factors; | ||
1301 | order = mtx_order(mtx); | ||
1302 | for( ndx = 0 ; ndx < sys->rng.low ; ++ndx ) | ||
1303 | if (transpose) | ||
1304 | varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO; | ||
1305 | else | ||
1306 | varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO; | ||
1307 | |||
1308 | for( ndx = sys->rng.low + sys->rank ; ndx < order ; ++ndx ) | ||
1309 | if (transpose) | ||
1310 | varvalues[mtx_col_to_org(mtx,ndx)] = D_ZERO; | ||
1311 | else | ||
1312 | varvalues[mtx_row_to_org(mtx,ndx)] = D_ZERO; | ||
1313 | } | ||
1314 | |||
1315 | #if LINSOL_DEBUG | ||
1316 | static void debug_out_factors(FILE *fp,linsolqr_system_t sys) | ||
1317 | /** | ||
1318 | *** Outputs permutation and values of the nonzero elements in the | ||
1319 | *** factor matrix square region given by sys->rng. | ||
1320 | **/ | ||
1321 | { | ||
1322 | mtx_region_t reg; | ||
1323 | reg.row.low=reg.col.low=sys->rng.low; | ||
1324 | reg.row.high=reg.col.high=sys->rng.high; | ||
1325 | mtx_write_region(fp,sys->factors,®); | ||
1326 | } | ||
1327 | #endif /* LINSOL_DEBUG */ | ||
1328 | |||
1329 | /***************************************************************************\ | ||
1330 | Reordering functions for SPK1, and possibly for other schemes to be | ||
1331 | implemented later. | ||
1332 | The stuff here is almost, but not quite, black magic. Don't tinker with it. | ||
1333 | \***************************************************************************/ | ||
1334 | |||
1335 | /********************************* | ||
1336 | begin of spk1 stuff | ||
1337 | *********************************/ | ||
1338 | struct reorder_list { /* List of rows/columns and their counts. */ | ||
1339 | int32 ndx; | ||
1340 | int32 count; | ||
1341 | struct reorder_list *next; | ||
1342 | }; | ||
1343 | |||
1344 | struct reorder_vars { | ||
1345 | mtx_matrix_t mtx; | ||
1346 | mtx_region_t reg; /* Current active region */ | ||
1347 | int32 colhigh; /* Original value of reg.col.high */ | ||
1348 | struct reorder_list *tlist; /* Array of (enough) list elements */ | ||
1349 | int32 *rowcount; /* rowcount[reg.row.low .. reg.row.high] */ | ||
1350 | }; | ||
1351 | |||
1352 | static void adjust_row_count(struct reorder_vars *vars,int32 removed_col) | ||
1353 | /** | ||
1354 | *** Adjusts the row counts to account for the (to be) removed column. | ||
1355 | **/ | ||
1356 | { | ||
1357 | mtx_coord_t nz; | ||
1358 | real64 value; | ||
1359 | nz.col = removed_col; | ||
1360 | nz.row = mtx_FIRST; | ||
1361 | while( value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row)), | ||
1362 | nz.row != mtx_LAST ) | ||
1363 | --(vars->rowcount[nz.row]); | ||
1364 | } | ||
1365 | |||
1366 | static void assign_row_and_col(struct reorder_vars *vars, | ||
1367 | int32 row, | ||
1368 | int32 col) | ||
1369 | /** | ||
1370 | *** Assigns the given row to the given column, moving the row and column | ||
1371 | *** to the beginning of the active region and removing them (readjusting | ||
1372 | *** the region). The row counts are NOT adjusted. If col == mtx_NONE, | ||
1373 | *** then no column is assigned and the row is moved to the end of the | ||
1374 | *** active block instead. Otherwise, it is assumed that the column | ||
1375 | *** is active. | ||
1376 | **/ | ||
1377 | { | ||
1378 | if( col == mtx_NONE ) { | ||
1379 | mtx_swap_rows(vars->mtx,row,vars->reg.row.high); | ||
1380 | vars->rowcount[row] = vars->rowcount[vars->reg.row.high]; | ||
1381 | --(vars->reg.row.high); | ||
1382 | } else { | ||
1383 | mtx_swap_rows(vars->mtx,row,vars->reg.row.low); | ||
1384 | vars->rowcount[row] = vars->rowcount[vars->reg.row.low]; | ||
1385 | mtx_swap_cols(vars->mtx,col,vars->reg.col.low); | ||
1386 | ++(vars->reg.row.low); | ||
1387 | ++(vars->reg.col.low); | ||
1388 | } | ||
1389 | } | ||
1390 | |||
1391 | static void push_column_on_stack(struct reorder_vars *vars,int32 col) | ||
1392 | /** | ||
1393 | *** Pushes the given column onto the stack. It is assumed that the | ||
1394 | *** column is active. Row counts are adjusted. | ||
1395 | **/ | ||
1396 | { | ||
1397 | adjust_row_count(vars,col); | ||
1398 | mtx_swap_cols(vars->mtx,col,vars->reg.col.high); | ||
1399 | --(vars->reg.col.high); | ||
1400 | } | ||
1401 | |||
1402 | static int32 pop_column_from_stack(struct reorder_vars *vars) | ||
1403 | /** | ||
1404 | *** Pops the column on the "top" of the stack off of the stack and | ||
1405 | *** returns the column index, where it now lies in the active region. | ||
1406 | *** If the stack is empty, mtx_NONE is returned. Row counts are NOT | ||
1407 | *** adjusted (this together with a subsequent assignment of this column | ||
1408 | *** ==> no row count adjustment necessary). | ||
1409 | **/ | ||
1410 | { | ||
1411 | if( vars->reg.col.high < vars->colhigh ) | ||
1412 | return(++(vars->reg.col.high)); | ||
1413 | else | ||
1414 | return( mtx_NONE ); | ||
1415 | } | ||
1416 | |||
1417 | static void assign_null_rows(struct reorder_vars *vars) | ||
1418 | /** | ||
1419 | *** Assigns empty rows, moving them to the assigned region. It is | ||
1420 | *** assumed that row counts are correct. Columns are assigned off the | ||
1421 | *** stack. | ||
1422 | **/ | ||
1423 | { | ||
1424 | int32 row; | ||
1425 | |||
1426 | for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row ) | ||
1427 | if( vars->rowcount[row] == 0 ) | ||
1428 | assign_row_and_col(vars , row , pop_column_from_stack(vars)); | ||
1429 | } | ||
1430 | |||
1431 | static void forward_triangularize(struct reorder_vars *vars) | ||
1432 | /** | ||
1433 | *** Forward triangularizes the region, assigning singleton rows with their | ||
1434 | *** one and only incident column until there are no more. The row counts | ||
1435 | *** must be correct, and they are updated. | ||
1436 | **/ | ||
1437 | { | ||
1438 | boolean change; | ||
1439 | mtx_coord_t nz; | ||
1440 | real64 value; | ||
1441 | |||
1442 | do { | ||
1443 | change = FALSE; | ||
1444 | for( nz.row = vars->reg.row.low ; | ||
1445 | nz.row <= vars->reg.row.high && vars->rowcount[nz.row] != 1; | ||
1446 | ++nz.row ) ; | ||
1447 | if( nz.row <= vars->reg.row.high ) { | ||
1448 | /* found singleton row */ | ||
1449 | nz.col = mtx_FIRST; /* this is somehow coming back with nz.col -1 */ | ||
1450 | value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col)); | ||
1451 | adjust_row_count(vars,nz.col); | ||
1452 | assign_row_and_col(vars,nz.row,nz.col); | ||
1453 | change = TRUE; | ||
1454 | } | ||
1455 | } while( change ); | ||
1456 | } | ||
1457 | |||
1458 | static int32 select_row(struct reorder_vars *vars) | ||
1459 | /** | ||
1460 | *** Selects a row and returns its index. It is assumed that there is a | ||
1461 | *** row. Row counts must be correct. vars->tlist will be used. | ||
1462 | **/ | ||
1463 | { | ||
1464 | int32 min_row_count; | ||
1465 | int32 max_col_count; | ||
1466 | int32 row; | ||
1467 | int32 i, nties=-2; /* # elements currently defined in vars->tlist */ | ||
1468 | int32 sum; | ||
1469 | mtx_coord_t nz; | ||
1470 | real64 value; | ||
1471 | mtx_matrix_t mtx; | ||
1472 | mtx_range_t *colrng, *rowrng; | ||
1473 | |||
1474 | /* Set to something > any possible value */ | ||
1475 | min_row_count = vars->reg.col.high-vars->reg.col.low+2; | ||
1476 | for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row ) | ||
1477 | if( vars->rowcount[row] <= min_row_count ) { | ||
1478 | if( vars->rowcount[row] < min_row_count ) { | ||
1479 | min_row_count = vars->rowcount[row]; | ||
1480 | nties = 0; | ||
1481 | } | ||
1482 | vars->tlist[nties++].ndx = row; | ||
1483 | } | ||
1484 | /** | ||
1485 | *** vars->tlist[0..nties-1] is a list of row numbers which tie for | ||
1486 | *** minimum row count. | ||
1487 | **/ | ||
1488 | |||
1489 | max_col_count = -1; /* < any possible value */ | ||
1490 | i = 0; | ||
1491 | mtx = vars->mtx; | ||
1492 | colrng=&(vars->reg.col); | ||
1493 | rowrng=&(vars->reg.row); | ||
1494 | while( i < nties ) { | ||
1495 | |||
1496 | sum = 0; | ||
1497 | nz.row = vars->tlist[i].ndx; | ||
1498 | nz.col = mtx_FIRST; | ||
1499 | while( value = mtx_next_in_row(mtx,&nz,colrng), | ||
1500 | nz.col != mtx_LAST ) | ||
1501 | sum += mtx_nonzeros_in_col(mtx,nz.col,rowrng); | ||
1502 | if( sum > max_col_count ) { | ||
1503 | max_col_count = sum; | ||
1504 | row = nz.row; | ||
1505 | } | ||
1506 | i++; | ||
1507 | } | ||
1508 | /** | ||
1509 | *** Now row contains the row with the minimum row count, which has the | ||
1510 | *** greatest total column count of incident columns among all rows with | ||
1511 | *** the same (minimum) row count. Select it. | ||
1512 | **/ | ||
1513 | return(row); | ||
1514 | } | ||
1515 | |||
1516 | static void spk1_reorder(struct reorder_vars *vars) | ||
1517 | /** | ||
1518 | *** Reorders the assigned matrix vars->mtx within the specified bounding | ||
1519 | *** block region vars->reg. The region is split into 6 subregions during | ||
1520 | *** reordering: the rows are divided in two, and the columns divided in | ||
1521 | *** three. Initially everything is in the active subregion. Ultimately, | ||
1522 | *** everything will be assigned. | ||
1523 | *** | ||
1524 | *** <-- assigned -->|<-- active-->|<-- on stack -->| | ||
1525 | *** ----+----------------+-------------+----------------+ | ||
1526 | *** a | | | | | ||
1527 | *** s | | | | | ||
1528 | *** s | | | | | ||
1529 | *** i | (SQUARE) | | | | ||
1530 | *** g | | | | | ||
1531 | *** n | | | | | ||
1532 | *** e | | | | | ||
1533 | *** d | | | | | ||
1534 | *** ----+----------------+-------------+----------------+ | ||
1535 | *** a | | | | | ||
1536 | *** c | | ACTIVE | | | ||
1537 | *** t | | REGION | | | ||
1538 | *** i | | | | | ||
1539 | *** v | | | | | ||
1540 | *** e | | | | | ||
1541 | *** ----+----------------+-------------+----------------+ | ||
1542 | *** | ||
1543 | *** The algorithm is roughly as follows: | ||
1544 | *** (1) select a row (by some criterion). | ||
1545 | *** (2) push columns incident on that row onto the stack in decreasing | ||
1546 | *** order of their length. | ||
1547 | *** (3) pop first column off the stack and assign it to the selected | ||
1548 | *** row. | ||
1549 | *** (4) forward-triangularize (assign singleton rows with their one | ||
1550 | *** and only incident column, until no longer possible). | ||
1551 | *** | ||
1552 | *** (1)-(4) should be repeated until the active subregion becomes empty. | ||
1553 | *** | ||
1554 | *** Everything above was written as though the entire matrix is | ||
1555 | *** involved. In reality, only the relevant square region is involved. | ||
1556 | **/ | ||
1557 | { | ||
1558 | int32 row, size; | ||
1559 | int32 *rowcount_array_origin; | ||
1560 | mtx_matrix_t mtx; | ||
1561 | |||
1562 | size = MAX(vars->reg.row.high,vars->reg.col.high) + 1; | ||
1563 | vars->tlist = size > 0 ? (struct reorder_list *) | ||
1564 | ascmalloc( size*sizeof(struct reorder_list) ) : NULL; | ||
1565 | vars->rowcount = rowcount_array_origin = size > 0 ? (int32 *) | ||
1566 | ascmalloc( size*sizeof(int32) ) : NULL; | ||
1567 | mtx = vars->mtx; | ||
1568 | |||
1569 | vars->colhigh = vars->reg.col.high; | ||
1570 | /* Establish row counts */ | ||
1571 | for( row = vars->reg.row.low ; row <= vars->reg.row.high ; ++row ) | ||
1572 | vars->rowcount[row] = | ||
1573 | mtx_nonzeros_in_row(mtx,row,&(vars->reg.col)); | ||
1574 | |||
1575 | while(TRUE) { | ||
1576 | struct reorder_list *head; | ||
1577 | int32 nelts; /* # elements "allocated" from vars->tlist */ | ||
1578 | mtx_coord_t nz; | ||
1579 | real64 value; | ||
1580 | |||
1581 | forward_triangularize(vars); | ||
1582 | assign_null_rows(vars); | ||
1583 | if( vars->reg.row.low>vars->reg.row.high || | ||
1584 | vars->reg.col.low>vars->reg.col.high ) { | ||
1585 | /* Active region is now empty, done */ | ||
1586 | if( NOTNULL(vars->tlist) ) | ||
1587 | ascfree( vars->tlist ); | ||
1588 | if( NOTNULL(rowcount_array_origin) ) | ||
1589 | ascfree( rowcount_array_origin ); | ||
1590 | return; | ||
1591 | } | ||
1592 | |||
1593 | head = NULL; | ||
1594 | nelts = 0; | ||
1595 | nz.row = select_row(vars); | ||
1596 | nz.col = mtx_FIRST; | ||
1597 | while( value = mtx_next_in_row(mtx,&nz,&(vars->reg.col)), | ||
1598 | nz.col != mtx_LAST ) { | ||
1599 | struct reorder_list **q,*p; | ||
1600 | |||
1601 | p = &(vars->tlist[nelts++]); | ||
1602 | p->ndx = mtx_col_to_org(mtx,nz.col); | ||
1603 | p->count = mtx_nonzeros_in_col(mtx,nz.col,&(vars->reg.row)); | ||
1604 | for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) ); | ||
1605 | p->next = *q; | ||
1606 | *q = p; | ||
1607 | } | ||
1608 | /** | ||
1609 | *** We now have a list of columns which intersect the selected row. | ||
1610 | *** The list is in order of decreasing column count. | ||
1611 | **/ | ||
1612 | |||
1613 | /* Push incident columns on stack */ | ||
1614 | for( ; NOTNULL(head) ; head = head->next ) | ||
1615 | push_column_on_stack(vars,mtx_org_to_col(mtx,head->ndx)); | ||
1616 | |||
1617 | /* Pop column off stack and assign to selected row */ | ||
1618 | assign_row_and_col(vars , nz.row , pop_column_from_stack(vars)); | ||
1619 | } | ||
1620 | /* Not reached. */ | ||
1621 | } | ||
1622 | |||
1623 | /********************************* | ||
1624 | end of spk1 stuff | ||
1625 | *********************************/ | ||
1626 | /********************************* | ||
1627 | begin of tspk1 stuff | ||
1628 | *********************************/ | ||
1629 | struct creorder_list { /* List of columns/rows and their counts. */ | ||
1630 | int32 ndx; | ||
1631 | int32 count; | ||
1632 | struct creorder_list *next; | ||
1633 | }; | ||
1634 | |||
1635 | struct creorder_vars { | ||
1636 | mtx_matrix_t mtx; | ||
1637 | mtx_region_t reg; /* Current active region */ | ||
1638 | int32 rowhigh; /* Original value of reg.row.high */ | ||
1639 | struct creorder_list *tlist; /* Array of (enough) list elements */ | ||
1640 | int32 *colcount; /* colcount[reg.col.low .. reg.col.high] */ | ||
1641 | }; | ||
1642 | |||
1643 | static void adjust_col_count(struct creorder_vars *vars,int32 removed_row) | ||
1644 | /** | ||
1645 | *** Adjusts the column counts to account for the (to be) removed row. | ||
1646 | **/ | ||
1647 | { | ||
1648 | mtx_coord_t nz; | ||
1649 | real64 value; | ||
1650 | nz.row = removed_row; | ||
1651 | nz.col = mtx_FIRST; | ||
1652 | while( value = mtx_next_in_row(vars->mtx,&nz,&(vars->reg.col)), | ||
1653 | nz.col != mtx_LAST ) | ||
1654 | --(vars->colcount[nz.col]); | ||
1655 | } | ||
1656 | |||
1657 | static void assign_col_and_row(struct creorder_vars *vars, | ||
1658 | int32 col, | ||
1659 | int32 row) | ||
1660 | /** | ||
1661 | *** Assigns the given row to the given column, moving the row and column | ||
1662 | *** to the beginning of the active region and removing them (readjusting | ||
1663 | *** the region). The col counts are NOT adjusted. If col == mtx_NONE, | ||
1664 | *** then no column is assigned and the col is moved to the end of the | ||
1665 | *** active block instead. Otherwise, it is assumed that the row | ||
1666 | *** is active. | ||
1667 | **/ | ||
1668 | { | ||
1669 | if( row == mtx_NONE ) { | ||
1670 | mtx_swap_cols(vars->mtx,col,vars->reg.col.high); | ||
1671 | vars->colcount[col] = vars->colcount[vars->reg.col.high]; | ||
1672 | --(vars->reg.col.high); | ||
1673 | } else { | ||
1674 | mtx_swap_cols(vars->mtx,col,vars->reg.col.low); | ||
1675 | vars->colcount[col] = vars->colcount[vars->reg.col.low]; | ||
1676 | mtx_swap_rows(vars->mtx,row,vars->reg.row.low); | ||
1677 | ++(vars->reg.col.low); | ||
1678 | ++(vars->reg.row.low); | ||
1679 | } | ||
1680 | } | ||
1681 | |||
1682 | static void push_row_on_stack(struct creorder_vars *vars,int32 row) | ||
1683 | /** | ||
1684 | *** Pushes the given row onto the stack. It is assumed that the | ||
1685 | *** row is active. Col counts are adjusted. | ||
1686 | **/ | ||
1687 | { | ||
1688 | adjust_col_count(vars,row); | ||
1689 | mtx_swap_rows(vars->mtx,row,vars->reg.row.high); | ||
1690 | --(vars->reg.row.high); | ||
1691 | } | ||
1692 | |||
1693 | static int32 pop_row_from_stack(struct creorder_vars *vars) | ||
1694 | /** | ||
1695 | *** Pops the row on the "top" of the stack off of the stack and | ||
1696 | *** returns the row index, where it now lies in the active region. | ||
1697 | *** If the stack is empty, mtx_NONE is returned. Col counts are NOT | ||
1698 | *** adjusted (this together with a subsequent assignment of this row | ||
1699 | *** ==> no col count adjustment necessary). | ||
1700 | **/ | ||
1701 | { | ||
1702 | if( vars->reg.row.high < vars->rowhigh ) | ||
1703 | return(++(vars->reg.row.high)); | ||
1704 | else | ||
1705 | return( mtx_NONE ); | ||
1706 | } | ||
1707 | |||
1708 | static void assign_null_cols(struct creorder_vars *vars) | ||
1709 | /** | ||
1710 | *** Assigns empty cols, moving them to the assigned region. It is | ||
1711 | *** assumed that col counts are correct. Rows are assigned off the | ||
1712 | *** stack. | ||
1713 | **/ | ||
1714 | { | ||
1715 | int32 col; | ||
1716 | |||
1717 | for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col ) | ||
1718 | if( vars->colcount[col] == 0 ) | ||
1719 | assign_col_and_row(vars , col , pop_row_from_stack(vars)); | ||
1720 | } | ||
1721 | |||
1722 | static void cforward_triangularize(struct creorder_vars *vars) | ||
1723 | /** | ||
1724 | *** Forward triangularizes the region, assigning singleton columns with their | ||
1725 | *** one and only incident row until there are no more. The column counts | ||
1726 | *** must be correct, and they are updated. | ||
1727 | **/ | ||
1728 | { | ||
1729 | boolean change; | ||
1730 | |||
1731 | do { | ||
1732 | mtx_coord_t nz; | ||
1733 | real64 value; | ||
1734 | change = FALSE; | ||
1735 | for( nz.col = vars->reg.col.low ; | ||
1736 | nz.col <= vars->reg.col.high && vars->colcount[nz.col] != 1; | ||
1737 | ++nz.col ) ; | ||
1738 | if( nz.col <= vars->reg.col.high ) { | ||
1739 | /* found singleton col */ | ||
1740 | nz.row = mtx_FIRST; | ||
1741 | value = mtx_next_in_col(vars->mtx,&nz,&(vars->reg.row)); | ||
1742 | adjust_col_count(vars,nz.row); | ||
1743 | assign_col_and_row(vars,nz.col,nz.row); | ||
1744 | change = TRUE; | ||
1745 | } | ||
1746 | } while( change ); | ||
1747 | } | ||
1748 | |||
1749 | static int32 select_col(struct creorder_vars *vars) | ||
1750 | /** | ||
1751 | *** Selects a col and returns its index. It is assumed that there is a | ||
1752 | *** col. Col counts must be correct. vars->tlist will be used. | ||
1753 | **/ | ||
1754 | { | ||
1755 | int32 min_col_count; | ||
1756 | int32 max_row_count; | ||
1757 | int32 col; | ||
1758 | int32 i, nties=-2; /* # elements currently defined in vars->tlist */ | ||
1759 | int32 sum; | ||
1760 | mtx_coord_t nz; | ||
1761 | real64 value; | ||
1762 | mtx_matrix_t mtx; | ||
1763 | mtx_range_t *colrng, *rowrng; | ||
1764 | |||
1765 | /* Set to something > any possible value */ | ||
1766 | min_col_count = vars->reg.row.high-vars->reg.row.low+2; | ||
1767 | for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col ) | ||
1768 | if( vars->colcount[col] <= min_col_count ) { | ||
1769 | if( vars->colcount[col] < min_col_count ) { | ||
1770 | min_col_count = vars->colcount[col]; | ||
1771 | nties = 0; | ||
1772 | } | ||
1773 | vars->tlist[nties++].ndx = col; | ||
1774 | } | ||
1775 | /** | ||
1776 | *** vars->tlist[0..nties-1] is a list of row numbers which tie for | ||
1777 | *** minimum col count. | ||
1778 | **/ | ||
1779 | |||
1780 | max_row_count = -1; /* < any possible value */ | ||
1781 | i = 0; | ||
1782 | mtx = vars->mtx; | ||
1783 | rowrng=&(vars->reg.row); | ||
1784 | colrng=&(vars->reg.col); | ||
1785 | while( i < nties ) { | ||
1786 | |||
1787 | sum = 0; | ||
1788 | nz.row = mtx_FIRST; | ||
1789 | nz.col = vars->tlist[i].ndx; | ||
1790 | while( value = mtx_next_in_col(mtx,&nz,rowrng), | ||
1791 | nz.row != mtx_LAST ) | ||
1792 | sum += mtx_nonzeros_in_row(mtx,nz.row,colrng); | ||
1793 | if( sum > max_row_count ) { | ||
1794 | max_row_count = sum; | ||
1795 | col = nz.col; | ||
1796 | } | ||
1797 | i++; | ||
1798 | } | ||
1799 | /** | ||
1800 | *** Now col contains the col with the minimum col count, which has the | ||
1801 | *** greatest total row count of incident rows among all cols with | ||
1802 | *** the same (minimum) col count. Select it. | ||
1803 | **/ | ||
1804 | return(col); | ||
1805 | } | ||
1806 | |||
1807 | static void tspk1_reorder(struct creorder_vars *vars) | ||
1808 | /** | ||
1809 | *** Transpose the picture and explanation that follows: | ||
1810 | *** Reorders the assigned matrix vars->mtx within the specified bounding | ||
1811 | *** block region vars->reg. The region is split into 6 subregions during | ||
1812 | *** reordering: the rows are divided in two, and the columns divided in | ||
1813 | *** three. Initially everything is in the active subregion. Ultimately, | ||
1814 | *** everything will be assigned. | ||
1815 | *** | ||
1816 | *** <-- assigned -->|<-- active-->|<-- on stack -->| | ||
1817 | *** ----+----------------+-------------+----------------+ | ||
1818 | *** a | | | | | ||
1819 | *** s | | | | | ||
1820 | *** s | | | | | ||
1821 | *** i | (SQUARE) | | | | ||
1822 | *** g | | | | | ||
1823 | *** n | | | | | ||
1824 | *** e | | | | | ||
1825 | *** d | | | | | ||
1826 | *** ----+----------------+-------------+----------------+ | ||
1827 | *** a | | | | | ||
1828 | *** c | | ACTIVE | | | ||
1829 | *** t | | REGION | | | ||
1830 | *** i | | | | | ||
1831 | *** v | | | | | ||
1832 | *** e | | | | | ||
1833 | *** ----+----------------+-------------+----------------+ | ||
1834 | *** | ||
1835 | *** The algorithm is roughly as follows: | ||
1836 | *** (1) select a row (by some criterion). | ||
1837 | *** (2) push columns incident on that row onto the stack in decreasing | ||
1838 | *** order of their length. | ||
1839 | *** (3) pop first column off the stack and assign it to the selected | ||
1840 | *** row. | ||
1841 | *** (4) forward-triangularize (assign singleton rows with their one | ||
1842 | *** and only incident column, until no longer possible). | ||
1843 | *** | ||
1844 | *** (1)-(4) should be repeated until the active subregion becomes empty. | ||
1845 | *** | ||
1846 | *** Everything above was written as though the entire matrix is | ||
1847 | *** involved. In reality, only the relevant square region is involved. | ||
1848 | **/ | ||
1849 | { | ||
1850 | int32 col, size; | ||
1851 | int32 *colcount_array_origin; | ||
1852 | mtx_matrix_t mtx; | ||
1853 | |||
1854 | size = vars->reg.col.high - vars->reg.col.low + 1; | ||
1855 | size = MAX(size,vars->reg.row.high - vars->reg.row.low + 1); | ||
1856 | vars->tlist = size > 0 ? (struct creorder_list *) | ||
1857 | ascmalloc( size*sizeof(struct creorder_list) ) : NULL; | ||
1858 | colcount_array_origin = size > 0 ? (int32 *) | ||
1859 | ascmalloc( size*sizeof(int32) ) : NULL; | ||
1860 | vars->colcount = | ||
1861 | NOTNULL(colcount_array_origin) ? | ||
1862 | colcount_array_origin - vars->reg.col.low : NULL; | ||
1863 | mtx = vars->mtx; | ||
1864 | |||
1865 | vars->rowhigh = vars->reg.row.high; | ||
1866 | /* Establish col counts */ | ||
1867 | for( col = vars->reg.col.low ; col <= vars->reg.col.high ; ++col ) | ||
1868 | vars->colcount[col] = | ||
1869 | mtx_nonzeros_in_col(mtx,col,&(vars->reg.row)); | ||
1870 | |||
1871 | while(TRUE) { | ||
1872 | struct creorder_list *head; | ||
1873 | int32 nelts; /* # elements "allocated" from vars->tlist */ | ||
1874 | mtx_coord_t nz; | ||
1875 | real64 value; | ||
1876 | |||
1877 | cforward_triangularize(vars); | ||
1878 | assign_null_cols(vars); | ||
1879 | if( vars->reg.col.low > vars->reg.col.high || | ||
1880 | vars->reg.row.low > vars->reg.row.high ) { | ||
1881 | /* Active region is now empty, done */ | ||
1882 | if( NOTNULL(vars->tlist) ) | ||
1883 | ascfree( vars->tlist ); | ||
1884 | if( NOTNULL(colcount_array_origin) ) | ||
1885 | ascfree( colcount_array_origin ); | ||
1886 | return; | ||
1887 | } | ||
1888 | |||
1889 | head = NULL; | ||
1890 | nelts = 0; | ||
1891 | nz.row = mtx_FIRST; | ||
1892 | nz.col = select_col(vars); | ||
1893 | while( value = mtx_next_in_col(mtx,&nz,&(vars->reg.row)), | ||
1894 | nz.row != mtx_LAST ) { | ||
1895 | struct creorder_list **q,*p; | ||
1896 | |||
1897 | p = &(vars->tlist[nelts++]); | ||
1898 | p->ndx = mtx_row_to_org(mtx,nz.row); | ||
1899 | p->count = mtx_nonzeros_in_row(mtx,nz.row,&(vars->reg.col)); | ||
1900 | for( q = &head; *q && (*q)->count > p->count; q = &((*q)->next) ); | ||
1901 | p->next = *q; | ||
1902 | *q = p; | ||
1903 | } | ||
1904 | /** | ||
1905 | *** We now have a list of columns which intersect the selected row. | ||
1906 | *** The list is in order of decreasing column count. | ||
1907 | **/ | ||
1908 | |||
1909 | /* Push incident rows on stack */ | ||
1910 | for( ; NOTNULL(head) ; head = head->next ) | ||
1911 | push_row_on_stack(vars,mtx_org_to_row(mtx,head->ndx)); | ||
1912 | |||
1913 | /* Pop row off stack and assign to selected col */ | ||
1914 | assign_col_and_row(vars , nz.col , pop_row_from_stack(vars)); | ||
1915 | } | ||
1916 | /* Not reached. */ | ||
1917 | } | ||
1918 | |||
1919 | /********************************* | ||
1920 | end of tspk1 stuff | ||
1921 | *********************************/ | ||
1922 | |||
1923 | static boolean nonempty_row(mtx_matrix_t mtx,int32 row) | ||
1924 | /** | ||
1925 | *** ? row not empty in mtx. | ||
1926 | **/ | ||
1927 | { | ||
1928 | mtx_coord_t nz; | ||
1929 | real64 value; | ||
1930 | value = mtx_next_in_row(mtx, mtx_coord(&nz,row,mtx_FIRST), mtx_ALL_COLS); | ||
1931 | return( nz.col != mtx_LAST ); | ||
1932 | } | ||
1933 | |||
1934 | static boolean nonempty_col(mtx_matrix_t mtx,int32 col) | ||
1935 | /** | ||
1936 | *** ? column not empty in mtx. | ||
1937 | **/ | ||
1938 | { | ||
1939 | mtx_coord_t nz; | ||
1940 | real64 value; | ||
1941 | value = mtx_next_in_col(mtx, mtx_coord(&nz,mtx_FIRST,col), mtx_ALL_ROWS); | ||
1942 | return( nz.row != mtx_LAST ); | ||
1943 | } | ||
1944 | |||
1945 | static void determine_pivot_range(linsolqr_system_t sys) | ||
1946 | /** | ||
1947 | *** Calculates sys->rng from sys->coef. | ||
1948 | **/ | ||
1949 | { | ||
1950 | sys->reg.row.low = sys->reg.row.high = -1; | ||
1951 | sys->reg.col.low = sys->reg.col.high = -1; | ||
1952 | |||
1953 | for( sys->rng.high=mtx_order(sys->coef) ; --(sys->rng.high) >= 0 ; ) { | ||
1954 | if( nonempty_row(sys->coef,sys->rng.high) && | ||
1955 | sys->reg.row.high < 0 ) | ||
1956 | sys->reg.row.high = sys->rng.high; | ||
1957 | if( nonempty_col(sys->coef,sys->rng.high) && | ||
1958 | sys->reg.col.high < 0 ) | ||
1959 | sys->reg.col.high = sys->rng.high; | ||
1960 | if( nonempty_row(sys->coef,sys->rng.high) && | ||
1961 | nonempty_col(sys->coef,sys->rng.high) ) | ||
1962 | break; | ||
1963 | } | ||
1964 | |||
1965 | for( sys->rng.low=0 ; sys->rng.low <= sys->rng.high ; ++(sys->rng.low) ) { | ||
1966 | if( nonempty_row(sys->coef,sys->rng.low) && | ||
1967 | sys->reg.row.low < 0 ) | ||
1968 | sys->reg.row.low = sys->rng.low; | ||
1969 | if( nonempty_col(sys->coef,sys->rng.low) && | ||
1970 | sys->reg.col.low < 0 ) | ||
1971 | sys->reg.col.low = sys->rng.low; | ||
1972 | if( nonempty_row(sys->coef,sys->rng.low) && | ||
1973 | nonempty_col(sys->coef,sys->rng.low) ) | ||
1974 | break; | ||
1975 | } | ||
1976 | } | ||
1977 | |||
1978 | static void square_region(linsolqr_system_t sys,mtx_region_t *region) | ||
1979 | /** | ||
1980 | *** Get the largest square confined to the diagonal within the region given | ||
1981 | *** and set sys->rng accordingly. | ||
1982 | **/ | ||
1983 | { | ||
1984 | sys->reg = *region; | ||
1985 | sys->rng.low = MAX(region->row.low,region->col.low); | ||
1986 | sys->rng.high = MIN(region->row.high,region->col.high); | ||
1987 | } | ||
1988 | |||
1989 | static int ranki_reorder(linsolqr_system_t sys,mtx_region_t *region) | ||
1990 | /** | ||
1991 | *** The region to reorder is first isolated by truncating the region | ||
1992 | *** provided to the largest square region confined to the matrix diagonal. | ||
1993 | *** It is presumed it will contain no empty rows or columns and will | ||
1994 | *** provide the basis of candidate pivots when factoring. | ||
1995 | **/ | ||
1996 | { | ||
1997 | struct reorder_vars vars; | ||
1998 | CHECK_SYSTEM(sys); | ||
1999 | if (sys->fclass != ranki) { | ||
2000 | FPRINTF(stderr,"linsolqr_reorder: spk1 reorder called on system\n"); | ||
2001 | FPRINTF(stderr," with inappropriate factor class\n"); | ||
2002 | return 1; | ||
2003 | } | ||
2004 | if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys); | ||
2005 | else square_region(sys,region); | ||
2006 | |||
2007 | vars.mtx = sys->coef; | ||
2008 | vars.reg.row.low = vars.reg.col.low = sys->rng.low; | ||
2009 | vars.reg.row.high = vars.reg.col.high = sys->rng.high; | ||
2010 | spk1_reorder(&vars); | ||
2011 | return 0; | ||
2012 | } | ||
2013 | |||
2014 | static int tranki_reorder(linsolqr_system_t sys,mtx_region_t *region) | ||
2015 | /** | ||
2016 | *** The region to reorder is first isolated by truncating the region | ||
2017 | *** provided to the largest square region confined to the matrix diagonal. | ||
2018 | *** It is presumed it will contain no empty rows or columns and will | ||
2019 | *** provide the basis of candidate pivots when factoring. | ||
2020 | **/ | ||
2021 | { | ||
2022 | struct creorder_vars vars; | ||
2023 | CHECK_SYSTEM(sys); | ||
2024 | if ( !(sys->fclass==ranki || sys->fclass==s_qr) ) { | ||
2025 | FPRINTF(stderr,"linsolqr_reorder: tspk1 reorder called on system\n"); | ||
2026 | FPRINTF(stderr," with inappropriate factor method\n"); | ||
2027 | return 1; | ||
2028 | } | ||
2029 | if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys); | ||
2030 | else square_region(sys,region); | ||
2031 | |||
2032 | vars.mtx = sys->coef; | ||
2033 | vars.reg.row.low = vars.reg.col.low = sys->rng.low; | ||
2034 | vars.reg.row.high = vars.reg.col.high = sys->rng.high; | ||
2035 | tspk1_reorder(&vars); | ||
2036 | return 0; | ||
2037 | } | ||
2038 | |||
2039 | /***************************************************************************\ | ||
2040 | End of reordering functions for SPK1. | ||
2041 | \***************************************************************************/ | ||
2042 | |||
2043 | /***************************************************************************\ | ||
2044 | RANKI implementation functions. | ||
2045 | \***************************************************************************/ | ||
2046 | |||
2047 | static void eliminate_row(mtx_matrix_t mtx, | ||
2048 | mtx_range_t *rng, | ||
2049 | int32 row, /* row to eliminate */ | ||
2050 | real64 *tmp, /* temporary array */ | ||
2051 | real64 *pivots) /* prior pivots array */ | ||
2052 | /** | ||
2053 | *** Eliminates the given row to the left of the diagonal element, assuming | ||
2054 | *** valid pivots for all of the diagonal elements above it (the elements | ||
2055 | *** above those diagonal elements, if any exist, are assumed to be U | ||
2056 | *** elements and therefore ignored). The temporary array is used by this | ||
2057 | *** function to do its job. tmp[k], where rng->low <= k <= rng->high must | ||
2058 | *** be defined (allocated) but need not be initialized. | ||
2059 | *** pivots[k] where rng->low <= k <=rng->high must be allocated and | ||
2060 | *** pivots[rng->low] to pivots[row-1] must contain the previous pivots. | ||
2061 | **/ | ||
2062 | { | ||
2063 | mtx_coord_t nz; | ||
2064 | mtx_range_t left_to_eliminate,high_cols,tmp_cols; | ||
2065 | boolean do_series=FALSE; | ||
2066 | |||
2067 | high_cols.low = row; | ||
2068 | high_cols.high = rng->high; | ||
2069 | left_to_eliminate.low = rng->low; | ||
2070 | left_to_eliminate.high = row - 1; | ||
2071 | |||
2072 | /* Move non-zeros left of pivot from matrix to full array */ | ||
2073 | mtx_zero_real64(tmp+rng->low,(row-rng->low)); | ||
2074 | nz.row = row; | ||
2075 | mtx_cur_row_vec(mtx,row,tmp,&left_to_eliminate); | ||
2076 | mtx_clear_row(mtx,row,&left_to_eliminate); | ||
2077 | |||
2078 | /* eliminates nzs from pivot, one by one, filling tmp with multipliers */ | ||
2079 | do_series= ( (row-1) >= (rng->low) ); | ||
2080 | if (do_series) { | ||
2081 | mtx_add_row_series_init(mtx,row,FALSE); | ||
2082 | /* do nothing between here and the END call to _init which removes | ||
2083 | elements from row */ | ||
2084 | } | ||
2085 | for( nz.row = row-1 ; nz.row >= rng->low ; --(nz.row) ) { | ||
2086 | if( tmp[nz.row] == D_ZERO ) | ||
2087 | continue; /* Nothing to do for this row */ | ||
2088 | |||
2089 | nz.col = nz.row; | ||
2090 | tmp[nz.row] /= pivots[nz.row]; | ||
2091 | /* tmp[nz.row] now equals multiplier */ | ||
2092 | |||
2093 | /* Perform "mtx_add_row" for full array part of the row */ | ||
2094 | left_to_eliminate.high = nz.row - 1; | ||
2095 | mtx_cur_vec_add_row(mtx,tmp,nz.row,-tmp[nz.row], | ||
2096 | &left_to_eliminate,FALSE); | ||
2097 | |||
2098 | /* Perform "mtx_add_row" on remaining part of row */ | ||
2099 | mtx_add_row_series(nz.row,-tmp[nz.row],&high_cols); | ||
2100 | } | ||
2101 | if (do_series) { | ||
2102 | mtx_add_row_series_init(mtx,mtx_NONE,TRUE); | ||
2103 | } | ||
2104 | |||
2105 | mtx_fill_cur_row_vec(mtx,row,tmp,mtx_range(&tmp_cols,rng->low,row-1)); | ||
2106 | |||
2107 | } | ||
2108 | |||
2109 | |||
2110 | /* not in use it seems */ | ||
2111 | #if BUILD_DEAD_CODE /* yes, baa */ | ||
2112 | static int32 top_of_spike(linsolqr_system_t sys,int32 col) | ||
2113 | /** | ||
2114 | *** Determines the top row (row of lowest index) in a possible spike | ||
2115 | *** above the diagonal element in the given column. If there is no spike, | ||
2116 | *** then (row = ) col is returned. | ||
2117 | **/ | ||
2118 | { | ||
2119 | mtx_range_t above_diagonal; | ||
2120 | mtx_coord_t nz; | ||
2121 | real64 value; | ||
2122 | int32 top_row; | ||
2123 | |||
2124 | above_diagonal.low = sys->rng.low; | ||
2125 | above_diagonal.high = col-1; | ||
2126 | top_row = nz.col = col; | ||
2127 | nz.row = mtx_FIRST; | ||
2128 | while( value = mtx_next_in_col(sys->factors,&nz,&above_diagonal), | ||
2129 | nz.row != mtx_LAST ) | ||
2130 | if( nz.row < top_row ) | ||
2131 | top_row = nz.row; | ||
2132 | return( top_row ); | ||
2133 | } | ||
2134 | #endif /* BUILD_DEAD_CODE */ | ||
2135 | |||
2136 | static boolean col_is_a_spike(linsolqr_system_t sys,int32 col) | ||
2137 | /** | ||
2138 | *** Determines if the col is a spike, characterized by having any | ||
2139 | *** nonzeros above the diagonal. By construction there shouldn't | ||
2140 | *** be any numeric 0 nonzeros above the diagonal, so we don't prune | ||
2141 | *** them out here. | ||
2142 | **/ | ||
2143 | { | ||
2144 | mtx_range_t above_diagonal; | ||
2145 | mtx_coord_t nz; | ||
2146 | |||
2147 | above_diagonal.low = sys->rng.low; | ||
2148 | above_diagonal.high = col-1; | ||
2149 | nz.col = col; | ||
2150 | nz.row = mtx_FIRST; | ||
2151 | /* this loop is cheaper than it looks */ | ||
2152 | while( (void)mtx_next_in_col(sys->factors,&nz,&above_diagonal), | ||
2153 | nz.row != mtx_LAST ) { | ||
2154 | if( nz.row < col ) return TRUE; | ||
2155 | } | ||
2156 | return( FALSE ); | ||
2157 | } | ||
2158 | |||
2159 | /* see implementation notes before modifying this function! */ | ||
2160 | static void number_drag(real64 *vec, int32 rfrom, int32 rto) | ||
2161 | { | ||
2162 | int32 i; | ||
2163 | real64 tmp; | ||
2164 | if (rto <rfrom) { | ||
2165 | tmp=vec[rfrom]; | ||
2166 | for (i=rfrom; i> rto; i--) { | ||
2167 | vec[i]=vec[i-1]; /* rotate right */ | ||
2168 | } | ||
2169 | vec[rto]=tmp; | ||
2170 | return; | ||
2171 | } | ||
2172 | if (rto > rfrom) { | ||
2173 | tmp=vec[rfrom]; | ||
2174 | for (i=rfrom; i< rto; i++) { | ||
2175 | vec[i]=vec[i+1]; /* rotate left */ | ||
2176 | } | ||
2177 | vec[rto]=tmp; | ||
2178 | } | ||
2179 | } | ||
2180 | |||
2181 | static void rankikw_factor(linsolqr_system_t sys) | ||
2182 | /** | ||
2183 | *** (the following description also applies to rankijz_factor which is | ||
2184 | *** different only in pivot selection strategy.) | ||
2185 | *** This is the heart of the linear equation solver. This function | ||
2186 | *** factorizes the matrix into a lower (L) and upper (U) triangular | ||
2187 | *** matrix. sys->smallest_pivot and sys->rank are calculated. The | ||
2188 | *** RANKI method is utilized. At the end of elimination, the matrix A | ||
2189 | *** is factored into A = U L, where U and L are stored as follows: | ||
2190 | *** | ||
2191 | *** <----- r ----> <- n-r -> | ||
2192 | *** +--------------+---------+ | ||
2193 | *** | | | | ||
2194 | *** | U | | | ||
2195 | *** | | | | ||
2196 | *** | L | | r | ||
2197 | *** | | | | ||
2198 | *** +--------------+---------+ | ||
2199 | *** | | | | ||
2200 | *** | | 0 | n-r | ||
2201 | *** | | | | ||
2202 | *** +--------------+---------+ | ||
2203 | *** | ||
2204 | *** The rows and columns have been permuted so that all of the pivoted | ||
2205 | *** original rows and columns are found in the first r rows and columns | ||
2206 | *** of the region. The last n-r rows and columns are unpivoted. U has | ||
2207 | *** 1's on its diagonal, whereas L's diagonal is stored in the matrix. | ||
2208 | *** | ||
2209 | *** Everything above was written as though the entire matrix is | ||
2210 | *** involved. In reality, only the relevant square region is involved. | ||
2211 | **/ | ||
2212 | { | ||
2213 | mtx_coord_t nz; | ||
2214 | int32 last_row; | ||
2215 | mtx_range_t pivot_candidates; | ||
2216 | real64 *tmp; | ||
2217 | real64 pivot, *pivots; | ||
2218 | int32 length; | ||
2219 | mtx_matrix_t mtx; | ||
2220 | |||
2221 | length = sys->rng.high - sys->rng.low + 1; | ||
2222 | tmp = sys->ludata->tmp; | ||
2223 | /* eliminate row takes care of zeroing the relevant region and won't | ||
2224 | change values outside of it. */ | ||
2225 | pivots=sys->ludata->pivlist; | ||
2226 | mtx=sys->factors; | ||
2227 | |||
2228 | sys->smallest_pivot = MAXDOUBLE; | ||
2229 | last_row = pivot_candidates.high = sys->rng.high; | ||
2230 | for( nz.row = sys->rng.low ; nz.row <= last_row ; ) { | ||
2231 | |||
2232 | pivot_candidates.low = nz.col = nz.row; | ||
2233 | pivots[nz.row]=pivot = mtx_value(mtx,&nz); | ||
2234 | pivot = fabs(pivot); | ||
2235 | if( pivot > sys->pivot_zero && | ||
2236 | pivot >= sys->ptol * mtx_row_max(mtx,&nz,&pivot_candidates,NULL) && | ||
2237 | !col_is_a_spike(sys,nz.row) ) { | ||
2238 | /* Good pivot and not a spike: continue with next row */ | ||
2239 | if( pivot < sys->smallest_pivot ) | ||
2240 | sys->smallest_pivot = pivot; | ||
2241 | ++(nz.row); | ||
2242 | continue; | ||
2243 | } | ||
2244 | /* pivots for rows nz.row back to sys->rng->low are stored in pivots */ | ||
2245 | /** | ||
2246 | *** Row is a spike row or will | ||
2247 | *** be when a necessary column | ||
2248 | *** exchange occurs. | ||
2249 | **/ | ||
2250 | eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots); | ||
2251 | /* pivot will be largest of those available. get size and value */ | ||
2252 | pivot=mtx_row_max(mtx,&nz,&pivot_candidates,&(pivots[nz.row])); | ||
2253 | if( pivot <= sys->pivot_zero ) { /* pivot is an epsilon */ | ||
2254 | /* Dependent row, drag to the end */ | ||
2255 | mtx_drag(mtx,nz.row,last_row); | ||
2256 | number_drag(pivots,nz.row,last_row); | ||
2257 | --last_row; | ||
2258 | #undef KAA_DEBUG | ||
2259 | #ifdef KAA_DEBUG | ||
2260 | FPRINTF(stderr,"Warning: Row %d is dependent with pivot %20.8g\n", | ||
2261 | nz.row,pivot); | ||
2262 | #endif /* KAA_DEBUG */ | ||
2263 | } else { | ||
2264 | /* Independent row: nz contains best pivot */ | ||
2265 | /* Move pivot to diagonal */ | ||
2266 | mtx_swap_cols(mtx,nz.row,nz.col); | ||
2267 | mtx_drag( mtx , nz.row , sys->rng.low ); | ||
2268 | number_drag(pivots,nz.row,sys->rng.low); | ||
2269 | if( pivot < sys->smallest_pivot ) | ||
2270 | sys->smallest_pivot = pivot; | ||
2271 | ++(nz.row); | ||
2272 | } | ||
2273 | } | ||
2274 | |||
2275 | sys->rank = last_row - sys->rng.low + 1; | ||
2276 | } | ||
2277 | |||
2278 | |||
2279 | static void rankijz_factor(linsolqr_system_t sys) | ||
2280 | /** | ||
2281 | *** This is an alternate pivoting strategy introduced by Joe Zaher. | ||
2282 | *** it looks down and across for good pivots rather than just across. | ||
2283 | *** | ||
2284 | **/ | ||
2285 | { | ||
2286 | real64 biggest; | ||
2287 | mtx_coord_t nz, best; | ||
2288 | mtx_region_t candidates; | ||
2289 | real64 *tmp; | ||
2290 | real64 pivot, *pivots; | ||
2291 | int32 length; | ||
2292 | mtx_matrix_t mtx; | ||
2293 | |||
2294 | length = sys->rng.high - sys->rng.low + 1; | ||
2295 | tmp = sys->ludata->tmp; | ||
2296 | /* eliminate row takes care of zeroing the relevant region and won't | ||
2297 | change values outside of it. */ | ||
2298 | pivots=sys->ludata->pivlist; | ||
2299 | mtx=sys->factors; | ||
2300 | |||
2301 | sys->smallest_pivot = MAXDOUBLE; | ||
2302 | candidates.row.high = sys->rng.high; | ||
2303 | candidates.col.high = sys->rng.high; | ||
2304 | for( nz.row = sys->rng.low ; nz.row <= candidates.row.high ; ) { | ||
2305 | nz.col = nz.row; | ||
2306 | pivots[nz.row] = pivot = mtx_value(mtx,&nz); | ||
2307 | pivot = fabs(pivot); | ||
2308 | candidates.row.low = nz.row; | ||
2309 | candidates.col.low = nz.row; | ||
2310 | if( !col_is_a_spike(sys,nz.row) ) { | ||
2311 | best.col = nz.row; | ||
2312 | mtx_col_max(mtx,&best,&(candidates.row),&biggest); | ||
2313 | if( fabs(biggest) >= sys->pivot_zero ) { | ||
2314 | if( pivot < sys->pivot_zero || pivot < sys->ptol*fabs(biggest) ) { | ||
2315 | mtx_swap_rows(mtx,nz.row,best.row); | ||
2316 | pivots[nz.row] = biggest; | ||
2317 | pivot = fabs(biggest); | ||
2318 | } | ||
2319 | if( pivot < sys->smallest_pivot ) sys->smallest_pivot = pivot; | ||
2320 | ++(nz.row); | ||
2321 | continue; | ||
2322 | } | ||
2323 | } | ||
2324 | /* pivots for rows nz.row back to sys->rng->low are stored in pivots */ | ||
2325 | /** | ||
2326 | *** Row is a spike row or will | ||
2327 | *** be when a necessary column | ||
2328 | *** exchange occurs. | ||
2329 | **/ | ||
2330 | eliminate_row(mtx,&(sys->rng),nz.row,tmp,pivots); | ||
2331 | /* pivot will be largest of those available. get size and value */ | ||
2332 | pivot=mtx_row_max(mtx,&nz,&(candidates.col),&(pivots[nz.row])); | ||
2333 | /* Move pivot to diagonal */ | ||
2334 | if( pivot < sys->pivot_zero ) { /* pivot is an epsilon */ | ||
2335 | /* Dependent row, drag nz to lower right */ | ||
2336 | mtx_drag(mtx,nz.row,candidates.row.high); | ||
2337 | number_drag(pivots,nz.row,candidates.row.high); | ||
2338 | --(candidates.row.high); | ||
2339 | } else { | ||
2340 | /* Independent row, drag nz to upper left */ | ||
2341 | mtx_swap_cols(mtx,nz.row,nz.col); | ||
2342 | mtx_drag( mtx , nz.row , sys->rng.low ); | ||
2343 | number_drag(pivots,nz.row,sys->rng.low); | ||
2344 | if( pivot < sys->smallest_pivot ) | ||
2345 | sys->smallest_pivot = pivot; | ||
2346 | ++(nz.row); | ||
2347 | } | ||
2348 | } | ||
2349 | |||
2350 | sys->rank = candidates.row.high - sys->rng.low + 1; | ||
2351 | } | ||
2352 | |||
2353 | |||
2354 | #define KAA_DEBUG 0 | ||
2355 | #if KAA_DEBUG | ||
2356 | int ranki_entry(linsolqr_system_t sys,mtx_region_t *region) | ||
2357 | #else | ||
2358 | static int ranki_entry(linsolqr_system_t sys,mtx_region_t *region) | ||
2359 | #endif | ||
2360 | /** | ||
2361 | *** The region to factor is first isolated by truncating the region | ||
2362 | *** provided to the largest square region confined to the matrix diagonal. | ||
2363 | *** It is presumed it will contain no empty rows or columns and that it has | ||
2364 | *** been previously reordered using linsolqr_reorder(sys,region,spk1) | ||
2365 | *** or a sane variant of spk1. | ||
2366 | *** This is the entry point for all ranki based strategies, regardless of | ||
2367 | *** pivot selection subtleties. | ||
2368 | **/ | ||
2369 | { | ||
2370 | struct rhs_list *rl; | ||
2371 | double time; | ||
2372 | |||
2373 | CHECK_SYSTEM(sys); | ||
2374 | if( sys->factored ) | ||
2375 | return 0; | ||
2376 | switch(sys->fmethod) { | ||
2377 | case ranki_kw: | ||
2378 | case ranki_jz: | ||
2379 | case ranki_ka: /* add new methods here */ | ||
2380 | break; | ||
2381 | default: | ||
2382 | return 1; | ||
2383 | } | ||
2384 | |||
2385 | if(ISNULL(sys->ludata)) | ||
2386 | return 1; | ||
2387 | |||
2388 | if( NOTNULL(sys->inverse) ) | ||
2389 | mtx_destroy(sys->inverse); | ||
2390 | sys->inverse=NULL; | ||
2391 | if( NOTNULL(sys->factors) ) | ||
2392 | mtx_destroy(sys->factors); | ||
2393 | if( region == mtx_ENTIRE_MATRIX ) determine_pivot_range(sys); | ||
2394 | else square_region(sys,region); | ||
2395 | |||
2396 | sys->factors = mtx_copy_region(sys->coef,region); | ||
2397 | sys->rank = -1; | ||
2398 | sys->smallest_pivot = MAXDOUBLE; | ||
2399 | for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next ) | ||
2400 | rl->solved = FALSE; | ||
2401 | insure_capacity(sys); | ||
2402 | insure_lu_capacity(sys); | ||
2403 | |||
2404 | time = tm_cpu_time(); | ||
2405 | switch(sys->fmethod) { | ||
2406 | case ranki_ka: | ||
2407 | case ranki_kw: | ||
2408 | rankikw_factor(sys); | ||
2409 | break; | ||
2410 | case ranki_jz: | ||
2411 | default: | ||
2412 | rankijz_factor(sys); | ||
2413 | } | ||
2414 | /* no longer done automatically as noone usually cares. | ||
2415 | calc_dependent_rows_ranki1(sys); | ||
2416 | calc_dependent_cols_ranki1(sys); | ||
2417 | */ | ||
2418 | sys->factored = TRUE; | ||
2419 | |||
2420 | #undef KAA_DEBUG | ||
2421 | #if KAA_DEBUG | ||
2422 | time = tm_cpu_time() - time; | ||
2423 | FPRINTF(stderr,"Time for Inversion = %f\n",time); | ||
2424 | FPRINTF(stderr,"Non-zeros in Inverse = %d\n", | ||
2425 | mtx_nonzeros_in_region(sys->factors,region)); | ||
2426 | #endif | ||
2427 | return 0; | ||
2428 | } | ||
2429 | |||
2430 | static int ranki_solve(linsolqr_system_t sys, struct rhs_list *rl) | ||
2431 | { | ||
2432 | backward_substitute(sys,rl->varvalue,rl->transpose); | ||
2433 | forward_substitute(sys,rl->varvalue,rl->transpose); | ||
2434 | zero_unpivoted_vars(sys,rl->varvalue,rl->transpose); | ||
2435 | return 0; | ||
2436 | } | ||
2437 | |||
2438 | |||
2439 | /* | ||
2440 | ********************************************************************* | ||
2441 | * Start of the 2 bodied factorization schemes. | ||
2442 | * | ||
2443 | * ranki2_entry will be used to access these routines. | ||
2444 | * there is now code called: | ||
2445 | * rankikw2_factor | ||
2446 | * rankijz2_factor | ||
2447 | * eliminate_row2 | ||
2448 | * ranki2_entry | ||
2449 | * forward_substitute2 | ||
2450 | * backward_substitute2 | ||
2451 | * ranki2_solve | ||
2452 | * | ||
2453 | * We have fixed the calc_col_dependent and calc_row_dependent | ||
2454 | * routines. | ||
2455 | ********************************************************************* | ||
2456 | */ | ||
2457 | |||
2458 | extern int32 mtx_number_ops; | ||
2459 | |||
2460 | /* this function does no permutation of any sort */ | ||
2461 | static | ||
2462 | void eliminate_row2(mtx_matrix_t mtx, | ||
2463 | mtx_matrix_t upper_mtx, | ||
2464 | mtx_range_t *rng, | ||
2465 | int32 row, /* row to eliminate */ | ||
2466 | real64 *tmp, /* temporary array */ | ||
2467 | real64 *pivots, /* prior pivots array */ | ||
2468 | real64 dtol /* drop tolerance */ | ||
2469 | ) | ||
2470 | { | ||
2471 | mtx_coord_t nz; | ||
2472 | int j,low,high; | ||
2473 | double tmpval; | ||
2474 | |||
2475 | /* | ||
2476 | * Move all non-zeros from current row to full array. | ||
2477 | * The full array would have been initialized before, | ||
2478 | * we must put it back in the clean state when we leave. | ||
2479 | * All operations are done over mtx_ALL_COLS. | ||
2480 | * | ||
2481 | * Note: because of an addrow over mtx_ALL_COLS, entries | ||
2482 | * of tmp outside rng may have nonzeros put in them if | ||
2483 | * sys->factors has nonzeros in the outlying columns. | ||
2484 | * If enough of these pile up out there during elimination | ||
2485 | * of a block and the compiler treats double overflow as | ||
2486 | * a signalable error, we will have a floating point | ||
2487 | * exception. | ||
2488 | * Currently sys->factors is a copy of a region from | ||
2489 | * the sys->coef matrix, so we should not have anything | ||
2490 | * out there to pile up. If we make a variant which | ||
2491 | * does not copy the coef matrix but uses it directly, | ||
2492 | * this code needs to be revisited. | ||
2493 | */ | ||
2494 | mtx_steal_cur_row_vec(mtx,row,tmp,mtx_ALL_COLS); | ||
2495 | |||
2496 | /* | ||
2497 | * Eliminates nzs from pivot, one by one, filling tmp with multipliers | ||
2498 | * We now operate on the entire row, since we are not writing the | ||
2499 | * multipliers back to the matrix. | ||
2500 | */ | ||
2501 | |||
2502 | low = rng->low; | ||
2503 | high = rng->high; | ||
2504 | for (j = row-1 ; j >= low ; --(j) ) { | ||
2505 | if (tmp[j] == D_ZERO) | ||
2506 | continue; /* Nothing to do for this row */ | ||
2507 | tmpval = tmp[j]/pivots[j]; /* Compute multiplier */ | ||
2508 | |||
2509 | /* | ||
2510 | * tmpval is now the multiplier. We use it to eliminate the row, | ||
2511 | * but backpatch it to tmp, *after* we do the elimination, as | ||
2512 | * mtx_cur_vec_add_row over all columns will stomp on tmp[j] | ||
2513 | */ | ||
2514 | mtx_cur_vec_add_row(mtx,tmp,j,-tmpval,mtx_ALL_COLS,FALSE); | ||
2515 | tmp[j] = tmpval; /* patch the diagonal */ | ||
2516 | } | ||
2517 | |||
2518 | /* | ||
2519 | * Fill up the upper triangular matrix. | ||
2520 | * refill the range [low .. row-1]. Remember that we have | ||
2521 | * to zero all nnz's in tmp. | ||
2522 | */ | ||
2523 | nz.row = row; | ||
2524 | for (j=low;j<row;j++) { | ||
2525 | if (tmp[j] != D_ZERO) { | ||
2526 | nz.col = j; | ||
2527 | mtx_fill_value(upper_mtx,&nz,tmp[j]); | ||
2528 | #if RBADEBUG | ||
2529 | FPRINTF(gscr,"fillingU: or %d oc %d (cc %d) %24.18g\n", | ||
2530 | mtx_row_to_org(mtx,nz.row), mtx_col_to_org(mtx,nz.col), nz.col, tmp[j]); | ||
2531 | #endif | ||
2532 | tmp[j] = D_ZERO; | ||
2533 | } | ||
2534 | } | ||
2535 | /* | ||
2536 | * refill the range [row .. high]. We *wont* keep | ||
2537 | * the diagonal as this is sorted out by the pivot | ||
2538 | * array, but here we must keep it, sigh. | ||
2539 | * At this stage we don't know that the diagonal is the | ||
2540 | * pivot, as that is determined after elimination done. | ||
2541 | * Outer loop must delete the ultimate pivot element. | ||
2542 | * Odds are good, however, that it is the current diagonal, | ||
2543 | * so put in the diagonal last. | ||
2544 | */ | ||
2545 |