1 |
/* |
2 |
* mtx: Ascend Sparse Matrix Package |
3 |
* by Karl Michael Westerberg |
4 |
* Created: 2/6/90 |
5 |
* Version: $Revision: 1.8 $ |
6 |
* Version control file: $RCSfile: mtx_linal.c,v $ |
7 |
* Date last modified: $Date: 1997/07/28 20:53:03 $ |
8 |
* Last modified by: $Author: ballan $ |
9 |
* |
10 |
* This file is part of the SLV solver. |
11 |
* |
12 |
* Copyright (C) 1990 Karl Michael Westerberg |
13 |
* Copyright (C) 1993 Joseph Zaher |
14 |
* Copyright (C) 1994 Joseph Zaher, Benjamin Andrew Allan |
15 |
* Copyright (C) 1995 Kirk Andre Abbott, Benjamin Andrew Allan |
16 |
* Copyright (C) 1996 Benjamin Andrew Allan |
17 |
* |
18 |
* The SLV solver is free software; you can redistribute |
19 |
* it and/or modify it under the terms of the GNU General Public License as |
20 |
* published by the Free Software Foundation; either version 2 of the |
21 |
* License, or (at your option) any later version. |
22 |
* |
23 |
* The SLV solver is distributed in hope that it will be |
24 |
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
25 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
26 |
* General Public License for more details. |
27 |
* |
28 |
* You should have received a copy of the GNU General Public License along with |
29 |
* the program; if not, write to the Free Software Foundation, Inc., 675 |
30 |
* Mass Ave, Cambridge, MA 02139 USA. Check the file named COPYING. |
31 |
* COPYING is found in ../compiler. |
32 |
*/ |
33 |
|
34 |
#include <math.h> |
35 |
#include "utilities/ascConfig.h" |
36 |
#include "utilities/ascMalloc.h" |
37 |
#include "utilities/mem.h" |
38 |
#include "solver/mtx.h" |
39 |
/* grab our private parts */ |
40 |
#define __MTX_C_SEEN__ |
41 |
#include "solver/mtx_use_only.h" |
42 |
|
43 |
/* |
44 |
* at present these assume the caller knows how long the vectors |
45 |
* value and index must be ahead of time, or that they are of size |
46 |
* mtx_order, which can get expensive. This may be a good functionality, |
47 |
* but it needs some examples of why it should be supported before |
48 |
* these are exported properly. |
49 |
*/ |
50 |
/*********************************************************************\ |
51 |
Start of New functions added by kaa. KAA_DEBUG |
52 |
\*********************************************************************/ |
53 |
/* |
54 |
* Export these if they work. |
55 |
*/ |
56 |
/* they don't, or more to the point, the duplicate the sparse_t functions |
57 |
* and are a total waste of bytes. */ |
58 |
#ifdef DELETE_THIS_DEAD_FUNCTION |
59 |
static int32 mtx_cur_col_to_csr(mtx_matrix_t mtx, |
60 |
int32 col, |
61 |
real64 *value, |
62 |
int32 *index) |
63 |
{ |
64 |
struct element_t *elt; |
65 |
int32 *tocur; |
66 |
int i = 0; |
67 |
|
68 |
elt = mtx->hdr.col[mtx->perm.col.cur_to_org[col]]; |
69 |
tocur = mtx->perm.row.org_to_cur; |
70 |
|
71 |
for( ; NOTNULL(elt); elt = elt->next.row ) { |
72 |
index[i] = tocur[elt->row]; |
73 |
value[i] = elt->value; |
74 |
i++; |
75 |
} |
76 |
return i; |
77 |
} |
78 |
#endif /* DELETE_THIS_DEAD_FUNCTION */ |
79 |
|
80 |
#ifdef DELETE_THIS_DEAD_FUNCTION |
81 |
static |
82 |
int32 mtx_cur_col_to_csr_slow(mtx_matrix_t mtx, |
83 |
int32 col, |
84 |
mtx_range_t *rng, |
85 |
real64 *value, |
86 |
int32 *index) |
87 |
{ |
88 |
struct element_t *elt; |
89 |
int32 *tocur; |
90 |
int32 cur; |
91 |
int i = 0; |
92 |
|
93 |
elt = mtx->hdr.col[mtx->perm.col.cur_to_org[col]]; |
94 |
tocur = mtx->perm.row.org_to_cur; |
95 |
|
96 |
for( ; NOTNULL(elt); elt = elt->next.row ) { |
97 |
cur = tocur[elt->row]; |
98 |
if (in_range(rng,cur)) { |
99 |
index[i] = cur; |
100 |
value[i] = elt->value; |
101 |
i++; |
102 |
} |
103 |
} |
104 |
return i; |
105 |
} |
106 |
#endif /* DELETE_THIS_DEAD_FUNCTION */ |
107 |
|
108 |
#ifdef DELETE_THIS_DEAD_FUNCTION |
109 |
static |
110 |
int32 mtx_cur_row_to_csr(mtx_matrix_t mtx, |
111 |
int32 row, |
112 |
real64 *value, |
113 |
int32 *index) |
114 |
{ |
115 |
struct element_t *elt; |
116 |
int32 *tocur; |
117 |
int i = 0; |
118 |
|
119 |
tocur = mtx->perm.col.org_to_cur; |
120 |
elt = mtx->hdr.row[mtx->perm.row.cur_to_org[row]]; |
121 |
|
122 |
for( ; NOTNULL(elt); elt = elt->next.col) { |
123 |
index[i] = tocur[elt->col]; |
124 |
value[i] = elt->value; |
125 |
i++; |
126 |
} |
127 |
return i; |
128 |
} |
129 |
#endif /* DELETE_THIS_DEAD_FUNCTION */ |
130 |
|
131 |
#ifdef DELETE_THIS_DEAD_FUNCTION |
132 |
static |
133 |
int32 mtx_cur_row_to_csr_slow(mtx_matrix_t mtx, |
134 |
int32 row, |
135 |
mtx_range_t *rng, |
136 |
real64 *value, |
137 |
int32 *index) |
138 |
{ |
139 |
struct element_t *elt; |
140 |
int32 *tocur; |
141 |
int32 cur; |
142 |
int i = 0; |
143 |
|
144 |
tocur = mtx->perm.col.org_to_cur; |
145 |
elt = mtx->hdr.row[mtx->perm.row.cur_to_org[row]]; |
146 |
|
147 |
for( ; NOTNULL(elt); elt = elt->next.col) { |
148 |
cur = tocur[elt->col]; |
149 |
if (in_range(rng,cur)) { |
150 |
index[i] = cur; |
151 |
value[i] = elt->value; |
152 |
i++; |
153 |
} |
154 |
} |
155 |
return i; |
156 |
} |
157 |
#endif /* DELETE_THIS_DEAD_FUNCTION */ |
158 |
|
159 |
/*********************************************************************\ |
160 |
End of New functions added by kaa. KAA_DEBUG |
161 |
\*********************************************************************/ |
162 |
|
163 |
|
164 |
/* |
165 |
* mtx_householder_transform_region(mtx,coef,sp,reg,droptol,transpose); |
166 |
* add the dot product coef * u dot Transpose[u] dot A to A. |
167 |
* real use: Anew = (I + k u.Transpose[u]) dot A |
168 |
* aka A += u dot Transpose[u] dot A. |
169 |
* bugs. assumes no incidence outside region in the rows of u. |
170 |
* or its a feature: buys a LOT of speed. |
171 |
*/ |
172 |
#undef DUMELT |
173 |
#define DUMELT ((struct element_t *)8) |
174 |
void mtx_householder_transform_region(mtx_matrix_t mtx, |
175 |
const real64 coef, |
176 |
const mtx_sparse_t *sp, |
177 |
const mtx_region_t *reg, |
178 |
real64 droptol, |
179 |
boolean transpose) |
180 |
{ |
181 |
int32 *ctoorg; |
182 |
struct element_t *elt; |
183 |
real64 u,mult; |
184 |
int32 ku, kr, numcols, kcol; |
185 |
register int32 org; |
186 |
|
187 |
/* the following are reuseable buffers we must zero before releasing */ |
188 |
char *mv; /* mark buffer, should we need it, droptol */ |
189 |
struct element_t **ev; /* elt buffer for row addition */ |
190 |
real64 *hrow; /* u^T . A */ |
191 |
int32 *irow; /* list of nonzeros in hrow */ |
192 |
|
193 |
#if MTX_DEBUG |
194 |
if(!mtx_check_matrix(mtx)) return; |
195 |
#endif |
196 |
|
197 |
if (coef==D_ZERO) return; /* adding 0 rather silly */ |
198 |
if (reg==mtx_ENTIRE_MATRIX || |
199 |
reg->col.low > reg->col.high || reg->row.low > reg->row.high) { |
200 |
FPRINTF(g_mtxerr,"Error: (mtx.c) Bogus Householder region given.\n"); |
201 |
return; |
202 |
} |
203 |
if (sp==NULL) { |
204 |
FPRINTF(g_mtxerr,"Error: (mtx.c) Bogus Householder u given.\n"); |
205 |
return; |
206 |
} |
207 |
if (sp->len == 0) return; /* I - k00t = I */ |
208 |
if (ISNULL(sp->idata) || ISNULL(sp->data)) { |
209 |
FPRINTF(g_mtxerr,"Error: (mtx.c) Bogus Householder u data given.\n"); |
210 |
return; |
211 |
} |
212 |
if (droptol != D_ZERO || transpose) { |
213 |
FPRINTF(g_mtxerr, |
214 |
"Error: (mtx.c) Householder droptol and transpose not done.\n"); |
215 |
return; |
216 |
} |
217 |
mv = mtx_null_mark(mtx->order); |
218 |
if (ISNULL(mv)) { |
219 |
FPRINTF(g_mtxerr,"Error: (mtx.c) Householder. Insufficient memory.\n"); |
220 |
return; |
221 |
} |
222 |
ev = mtx_null_vector(mtx->order); |
223 |
if (ISNULL(ev)) { |
224 |
FPRINTF(g_mtxerr,"Error: (mtx.c) Householder. Insufficient memory.\n"); |
225 |
mtx_null_mark_release(); |
226 |
return; |
227 |
} |
228 |
hrow = mtx_null_sum(mtx->order); |
229 |
if (ISNULL(hrow)) { |
230 |
FPRINTF(g_mtxerr,"Error: (mtx.c) Householder. Insufficient memory.\n"); |
231 |
mtx_null_mark_release(); |
232 |
mtx_null_vector_release(); |
233 |
return; |
234 |
} |
235 |
irow = mtx_null_index(reg->col.high - reg->col.low +1); |
236 |
if (ISNULL(irow)) { |
237 |
FPRINTF(g_mtxerr,"Error: (mtx.c) Householder. Insufficient memory.\n"); |
238 |
mtx_null_mark_release(); |
239 |
mtx_null_vector_release(); |
240 |
mtx_null_sum_release(); |
241 |
return; |
242 |
} |
243 |
/* are we happy yet? ! */ |
244 |
|
245 |
ctoorg = mtx->perm.col.cur_to_org; |
246 |
|
247 |
/* accumulate the dot products for the stuff in the region.no range check! */ |
248 |
/* in I- tau u u^T . A this is hrow=u^T.A. idata[ku] is an org row index. */ |
249 |
for (ku=0; ku < sp->len; ku++) { |
250 |
u = sp->data[ku]; |
251 |
if (u != D_ZERO) { |
252 |
elt = mtx->hdr.row[sp->idata[ku]]; |
253 |
while (NOTNULL(elt)) { |
254 |
hrow[elt->col] += u*elt->value; /* 15% */ |
255 |
elt = elt->next.col; /* 4% */ |
256 |
} |
257 |
} |
258 |
} |
259 |
/* accumulate the indices needed to drive zeroing */ |
260 |
kr = 0; |
261 |
for (ku = reg->col.low; ku <= reg->col.high; ku++) { |
262 |
org = ctoorg[ku]; |
263 |
if (hrow[org]!=D_ZERO) { |
264 |
irow[kr] = org; /* collect index */ |
265 |
ev[org] = DUMELT; /* init elt array with bogus ptr */ |
266 |
kr++; |
267 |
} |
268 |
} |
269 |
/* irow 0..kr-1 now has the org col indexes of nonzero elements in hrow,ev */ |
270 |
numcols = kr; |
271 |
/* now add hh transform to rows of A in u, cases with and without coef = 1 */ |
272 |
if (coef == D_ONE) { |
273 |
for (ku=0; ku < sp->len; ku++) { |
274 |
if (sp->data[ku] != D_ZERO) { |
275 |
mult = -sp->data[ku]; |
276 |
org = sp->idata[ku]; |
277 |
/* expand row of interest into locations of interest */ |
278 |
elt = mtx->hdr.row[org]; |
279 |
while (NOTNULL(elt)) { |
280 |
if (NOTNULL(ev[elt->col])) ev[elt->col] = elt; /* 11% */ |
281 |
elt = elt->next.col; /* 4% */ |
282 |
} |
283 |
/* run through irow doing the math (finally) */ |
284 |
for (kr=0; kr < numcols; kr++) { |
285 |
kcol = irow[kr]; /* 2 */ |
286 |
if (ev[kcol] > DUMELT) { /* 12% */ |
287 |
ev[kcol]->value += mult*hrow[kcol]; /* 14% */ |
288 |
ev[kcol] = DUMELT; /* 9% */ |
289 |
/* reinit ev col */ |
290 |
} else { |
291 |
mtx_create_element_value(mtx, org, kcol, mult*hrow[kcol]); /*6*/ |
292 |
/* let ev[irow[kr]] col stay DUMELT */ |
293 |
} |
294 |
} |
295 |
} |
296 |
} |
297 |
} else { |
298 |
for (ku=0; ku < sp->len; ku++) { |
299 |
mult = -sp->data[ku]*coef; |
300 |
if (mult != D_ZERO) { |
301 |
org = sp->idata[ku]; |
302 |
/* expand row of interest into locations of interest */ |
303 |
elt = mtx->hdr.row[org]; |
304 |
while (NOTNULL(elt)) { |
305 |
if (NOTNULL(ev[elt->col])) ev[elt->col] = elt; |
306 |
elt = elt->next.col; |
307 |
} |
308 |
/* run through irow doing the math (finally) */ |
309 |
for (kr=0; kr < numcols; kr++) { |
310 |
kcol = irow[kr]; |
311 |
if (ev[kcol] > DUMELT) { |
312 |
ev[kcol]->value += mult*hrow[kcol]; |
313 |
ev[kcol] = DUMELT; |
314 |
/* reinit ev col */ |
315 |
} else { |
316 |
mtx_create_element_value(mtx, org, kcol, mult*hrow[kcol]); |
317 |
/* let ev[irow[kr]] col stay DUMELT */ |
318 |
} |
319 |
} |
320 |
} |
321 |
} |
322 |
} |
323 |
/* end case coef != 1 */ |
324 |
for (kr=0; kr < numcols; kr++) { |
325 |
ev[irow[kr]] = NULL; |
326 |
hrow[irow[kr]] = D_ZERO; |
327 |
irow[kr] = 0; |
328 |
} |
329 |
/* pack up and go home */ |
330 |
mtx_null_mark_release(); |
331 |
mtx_null_vector_release(); |
332 |
mtx_null_sum_release(); |
333 |
mtx_null_index_release(); |
334 |
return; |
335 |
} |
336 |
#undef DUMELT |
337 |
|
338 |
|
339 |
/* |
340 |
********************************************************************* |
341 |
* mtx_eliminate_row. |
342 |
* |
343 |
* Start of some new routines, so that we can do so micro |
344 |
* management of the elimination routines. |
345 |
********************************************************************* |
346 |
*/ |
347 |
struct mtx_linklist { |
348 |
int32 index; |
349 |
struct mtx_linklist *prev; |
350 |
}; |
351 |
|
352 |
/* |
353 |
* This function assumes that head is not NULL. |
354 |
* we dont check !! |
355 |
*/ |
356 |
#ifdef THIS_IS_AN_UNUSED_FUNCTION |
357 |
static struct mtx_linklist *insert_link(struct mtx_linklist *head, |
358 |
int k, |
359 |
mem_store_t eltbuffer) |
360 |
{ |
361 |
struct mtx_linklist *ptr1, *ptr2; |
362 |
struct mtx_linklist *target; |
363 |
|
364 |
target = (struct mtx_linklist *)mem_get_element(eltbuffer); |
365 |
target->index = k; |
366 |
|
367 |
ptr2 = ptr1 = head; |
368 |
ptr1 = head->prev; |
369 |
while (ptr1) { |
370 |
if (ptr1->index < k) { |
371 |
target->prev = ptr1; |
372 |
ptr2->prev = target; |
373 |
return target; |
374 |
} |
375 |
ptr2 = ptr1; |
376 |
ptr1 = ptr1->prev; |
377 |
} |
378 |
/* |
379 |
* If we are here then we reached the end of |
380 |
* the chain. Just add target and quit. |
381 |
*/ |
382 |
target->prev = ptr1; |
383 |
ptr2->prev = target; |
384 |
return target; |
385 |
} |
386 |
#endif /* THIS_IS_AN_UNUSED_FUNCTION */ |
387 |
|
388 |
/* |
389 |
* head may be NULL for this function. We simply add |
390 |
* the new index *unsorted* and return the end of top |
391 |
* of the chain. |
392 |
*/ |
393 |
#ifdef THIS_IS_AN_UNUSED_FUNCTION |
394 |
static struct mtx_linklist *add_link(struct mtx_linklist *head, |
395 |
int k, |
396 |
mem_store_t eltbuffer) |
397 |
{ |
398 |
struct mtx_linklist *target; |
399 |
|
400 |
target = (struct mtx_linklist *)mem_get_element(eltbuffer); |
401 |
target->index = k; |
402 |
|
403 |
target->prev = head; |
404 |
return target; |
405 |
} |
406 |
#endif /* THIS_IS_AN_UNUSED_FUNCTION */ |
407 |
|
408 |
/* |
409 |
* NOTE: This function needs a droptol as a parameter |
410 |
*/ |
411 |
|
412 |
#ifdef THIS_IS_AN_UNUSED_FUNCTION |
413 |
static |
414 |
void mtx_eliminate_row2(mtx_matrix_t mtx, |
415 |
mtx_matrix_t upper_mtx, |
416 |
mtx_range_t *rng, |
417 |
int32 row, |
418 |
real64 *vec, |
419 |
real64 *pivots, |
420 |
int32 *inserted, |
421 |
mem_store_t eltbuffer) |
422 |
{ |
423 |
struct element_t *elt; |
424 |
struct mtx_linklist *diag, *right=NULL, *ptr; |
425 |
int32 *tocur; |
426 |
real64 multiplier; |
427 |
int k, j; |
428 |
mtx_coord_t nz; |
429 |
|
430 |
(void)rng; /* stop gcc whine about unused parameter */ |
431 |
|
432 |
/* |
433 |
* Move all non-zeros from current row to full array. |
434 |
* The full array would have been initialized before, |
435 |
* we must put it back in the clean state when we leave. |
436 |
* All operations are done over mtx_ALL_COLS. |
437 |
*/ |
438 |
/* |
439 |
* we use the following code fragment instead of : |
440 |
* mtx_cur_row_vec(mtx,row,vec,mtx_ALL_COLS); |
441 |
* so that we can put the elements in the correct place. |
442 |
*/ |
443 |
|
444 |
diag = (struct mtx_linklist *)mem_get_element(eltbuffer); |
445 |
diag->index = row; |
446 |
diag->prev = NULL; |
447 |
inserted[row] = 1; |
448 |
|
449 |
/* |
450 |
* the insertion for this phase. |
451 |
*/ |
452 |
tocur = mtx->perm.col.org_to_cur; |
453 |
elt = mtx->hdr.row[mtx->perm.row.cur_to_org[row]]; |
454 |
ptr = diag; |
455 |
for ( ;NOTNULL(elt); elt = elt->next.col) { |
456 |
if (elt->value == 0.0) { |
457 |
continue; /* hard zeros */ |
458 |
} |
459 |
k = tocur[elt->col]; |
460 |
vec[k] = elt->value; |
461 |
if (k < row) { /* the less than is critical */ |
462 |
(void)insert_link(diag,k,eltbuffer); |
463 |
inserted[k] = 1; |
464 |
} |
465 |
else if (k > row) { |
466 |
right = add_link(right,k,eltbuffer); |
467 |
inserted[k] = 1; |
468 |
} |
469 |
else |
470 |
continue; |
471 |
} |
472 |
|
473 |
mtx_clear_row(mtx,row,mtx_ALL_COLS); |
474 |
|
475 |
/* we shuold be trapping for these 0 multipliers before. !!! */ |
476 |
|
477 |
for (ptr = diag->prev; NOTNULL(ptr); ) { |
478 |
k = ptr->index; |
479 |
multiplier = vec[k]/pivots[k]; |
480 |
if (multiplier==D_ZERO) { |
481 |
ptr = ptr->prev; |
482 |
/* FPRINTF(stderr,"0 multiplier found at %d\n",k); */ |
483 |
continue; |
484 |
} |
485 |
#ifdef NOP_DEBUG |
486 |
mtx_number_ops++; |
487 |
#endif /* NOP_DEBUG */ |
488 |
elt = mtx->hdr.row[mtx->perm.row.cur_to_org[k]]; |
489 |
for ( ;NOTNULL(elt); elt = elt->next.col) { |
490 |
j = tocur[elt->col]; |
491 |
if (!inserted[j]) { |
492 |
if (j < k) |
493 |
(void)insert_link(ptr,j,eltbuffer); |
494 |
else |
495 |
right = add_link(right,j,eltbuffer); |
496 |
inserted[j] = 1; |
497 |
} |
498 |
vec[j] = vec[j] - multiplier * elt->value; |
499 |
#ifdef NOP_DEBUG |
500 |
mtx_number_ops++; |
501 |
#endif /* NOP_DEBUG */ |
502 |
} |
503 |
vec[k] = multiplier; /* backpatch multiplier */ |
504 |
ptr = ptr->prev; |
505 |
} |
506 |
|
507 |
/* |
508 |
* Map the data back to the appropriate matrices. |
509 |
*/ |
510 |
|
511 |
/* |
512 |
* Fill up the upper_matrix with the multipliers. |
513 |
*/ |
514 |
nz.row = row; |
515 |
for (ptr = diag->prev; NOTNULL(ptr); ptr = ptr->prev) { |
516 |
nz.col = ptr->index; |
517 |
if (vec[nz.col] != D_ZERO) { /* dont fill hard 0's */ |
518 |
mtx_fill_value(upper_mtx,&nz,vec[nz.col]); |
519 |
} |
520 |
vec[nz.col] = D_ZERO; |
521 |
inserted[nz.col] = 0; |
522 |
} |
523 |
|
524 |
/* |
525 |
* Fill the diagonal back to the regular matrix. |
526 |
*/ |
527 |
nz.col = row; |
528 |
if (vec[nz.col] != D_ZERO) { /* dont fill hard 0's */ |
529 |
mtx_fill_value(mtx,&nz,vec[nz.col]); |
530 |
} |
531 |
vec[row] = D_ZERO; |
532 |
inserted[row] = 0; |
533 |
|
534 |
/* |
535 |
* Fill the lower matrix with the stuff to the right of |
536 |
* diagonal. |
537 |
*/ |
538 |
for (ptr = right; NOTNULL(ptr); ptr = ptr->prev) { |
539 |
nz.col = ptr->index; |
540 |
if (fabs(vec[nz.col]) > 1.0e-16) { /* THIS NEEDS A DROP TOL DEFINE */ |
541 |
mtx_fill_value(mtx,&nz,vec[nz.col]); |
542 |
} |
543 |
vec[nz.col] = D_ZERO; |
544 |
inserted[nz.col] = 0; |
545 |
} |
546 |
} |
547 |
#endif /* THIS_IS_AN_UNUSED_FUNCTION */ |
548 |
|
549 |
#undef __MTX_C_SEEN__ |