/[ascend]/trunk/ascend4/solver/plainqr.c
ViewVC logotype

Contents of /trunk/ascend4/solver/plainqr.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations) (download) (as text)
Fri Oct 29 20:54:12 2004 UTC (17 years, 6 months ago) by aw0a
File MIME type: text/x-csrc
File size: 30063 byte(s)
Setting up web subdirectory in repository
1 /*
2 * Author: Benjamin A Allan
3 * Version: $Revision: 1.9 $
4 * Version control file: $RCSfile: plainqr.c,v $
5 * Date last modified: $Date: 1997/07/18 12:15:22 $
6 * Last modified by: $Author: mthomas $
7 */
8 /***************************************************************************\
9 CPQR implementation.
10 an insert to linsolqr.c of a column pivoted (with ptol) householder qr
11 method.
12 \***************************************************************************/
13 #define CPQR_DEBUG FALSE
14
15 #ifndef SQR
16 #define SQR(x) (x)*(x)
17 #endif
18 /**
19 *** Finds the largest rectangle within the region given which has the
20 *** UPPERLEFT corner on the diagonal. Also sets sys->rng to be the
21 *** appropriate range of columns to factorize.
22 *** Returns 1 if for any reason it cannot do so.
23 **/
24 static int ul_rectangle_region(linsolqr_system_t sys, mtx_region_t *r)
25 {
26 int32 cmax;
27 if (ISNULL(sys) || ISNULL(r) || ISNULL(sys->coef)) return 1;
28 cmax = mtx_order(sys->coef)-1;
29 if ( r->row.low < 0 || r->row.high < 0 ||
30 r->col.low < 0 || r->col.high < 0 ||
31 r->row.low > cmax || r->row.high > cmax ||
32 r->col.low > cmax || r->col.high > cmax ||
33 r->col.low > r->col.high || r->row.low > r->row.high
34 ) {
35 FPRINTF(stderr,"(linsolqr.c) ul_rectangle_region: Insane region given.\n");
36 return 1;
37 }
38 if (r->row.low > r->col.low) {
39 r->col.low = r->row.low;
40 } else {
41 r->row.low = r->col.low;
42 }
43 if ( r->col.low > r->col.high || r->row.low > r->row.high ) {
44 FPRINTF(stderr,
45 "(linsolqr.c) ul_rectangle_region: Region given off diagonal.\n");
46 return 1;
47 }
48 sys->rng.low = r->col.low;
49 sys->rng.high = MIN(r->col.high,r->row.high);
50 #if CPQR_DEBUG
51 FPRINTF(stderr,"factor region: rows (%d,%d) cols (%d,%d)\n",
52 r->row.low,r->row.high, r->col.low,r->col.high);
53 FPRINTF(stderr,"factor range: cols %d- %d\n",sys->rng.low,sys->rng.high);
54 #endif
55 return 0;
56 }
57
58
59 static real64 cpqr_compute_alpha(real64 * const alpha,
60 const mtx_matrix_t mtx,
61 const mtx_region_t * const reg)
62 /**
63 *** Computes alphas for columns in a region indicated by reg.
64 *** alpha are Frobenius norms : sqrt( sum(sqr(aij)|i,j) )
65 *** as opposed
66 *** to matrix 2-norm: (the largest eigenvalue of At dot A, see stewart p 180)
67 *** or the matrix 1 norm: max( sum(abs(aij) | i) |j)
68 *** or the matrix inf norm: max( sum(abs(aij) | j) |i)
69 *** Returns the Frobenius norm of the region calculated.
70 *** Stores the alphas in curcol order in the vector alpha given.
71 *** If it turns out dragging this cur ordered alpha ends up taking
72 *** much more time than the pivot search, the alpha storage format
73 *** should change to org ordered and the search do the depermutation.
74 **/
75 {
76 int32 col;
77 real64 anorm = D_ZERO, tsqr;
78 for (col = reg->col.low; col <= reg->col.high; col++) {
79 tsqr = mtx_sum_sqrs_in_col(mtx,col,&(reg->row));
80 alpha[col] = sqrt(tsqr);
81 #if CPQR_DEBUG
82 FPRINTF(stderr,"alpha[%d](org %d) = %g\n",
83 col,mtx_col_to_org(mtx,col),alpha[col]);
84 #endif
85 anorm += tsqr;
86 }
87 #if CPQR_DEBUG
88 FPRINTF(stderr,"region frobenius norm: %g\n",sqrt(anorm));
89 #endif
90 return sqrt(anorm);
91 }
92
93
94 static int cpqr_reduce_region(linsolqr_system_t sys,
95 mtx_region_t *r,
96 int32 ccol)
97 /*
98 * Reduce, or recompute as needed, the column norms of the region r
99 * given less its leading row and column.
100 * Assumes the leftmost column of the region is empty in the topmost row!
101 * When done, also increases r->row.low and r->col.low by 1.
102 * RESUM is an adjustable parameter to control when to recompute the
103 * column norm from scratch.
104 * This function does not update the overall A norm.
105 * returns 1 if ccol was changed, 0 if not.
106 */
107 #ifdef RESUM
108 #undef RESUM
109 #endif
110 #define RESUM (double)1.0e+8
111 {
112 real64 sanew, as; /* sanew is pronounced S. A. New */
113 real64 *data, *alpha;
114 int32 *idata, lim, i;
115 int ret;
116
117 idata = sys->qrdata->sp.idata;
118 data = sys->qrdata->sp.data;
119 alpha = sys->qrdata->alpha;
120 ret = 0;
121
122 /* get top row incidence */
123 mtx_cur_row_sparse(sys->factors,r->row.low, &(sys->qrdata->sp),
124 mtx_ALL_COLS, mtx_SOFT_ZEROES);
125 /* bump down or recompute the changed rows. an orthogonal transformation
126 is alleged to not change the norms, so it is sufficient reduce the
127 norms with a little checking for floating point inanity. */
128 for (i=0, lim = sys->qrdata->sp.len;
129 i < lim;
130 i++) {
131 int32 col;
132
133 col = idata[i];
134 if (col == ccol) ret = 1;
135 as = SQR(alpha[col]);
136 sanew = as - SQR(data[i]);
137 if (sanew * RESUM <= as) {
138 alpha[col] = sqrt(sanew);
139 } else {
140 alpha[col] = sqrt(mtx_sum_sqrs_in_col(sys->factors,col,&(r->row)));
141 }
142 }
143 r->row.low++;
144 r->col.low++;
145 return ret;
146 }
147 #undef RESUM
148
149 static real64 cpqr_permute(linsolqr_system_t sys,
150 int32 newcol,
151 mtx_range_t *active)
152 /**
153 *** Permute the region bounded by active and search so that
154 *** the newcol given is now on the left edge. Rows may also
155 *** be permuted if deemed wise.
156 *** In particular if things have not gone in some measure dense:
157 ***
158 *** For a square or tall region,
159 *** the diagonal location (newcol,newcol) will be dragged up to
160 *** (active.low, active.low). That this should be a col swap
161 *** instead is debatable and should be experimentally tested later.
162 ***
163 *** For a wide region, the newcol is dragged up if it is currently
164 *** within the range active, otherwise it is swapped to active.high
165 *** and dragged up to active.low (n log n ouch) in hopes that
166 *** the column punted from range is one most full of junk we don't
167 *** want to use in the future. This strategy is, again, debatable.
168 ***
169 *** System vectors (alpha mainly) will be permuted to stay consistent.
170 *** We should probably also return the sparse corresponding to this
171 *** column at the end of permutation.
172 **/
173 {
174 real64 tmp;
175 #if CPQR_DEBUG
176 FPRINTF(stderr,"cpqr_permuted called to move %d back to %d\n",
177 newcol,active->low);
178 #endif
179 if (newcol <= active->low) return D_ZERO;
180 if (newcol > active->high) {
181 /* swap cols */
182 mtx_swap_cols(sys->factors,newcol,active->high); /* also does hhvects */
183 /* likewise alphas */
184 tmp = sys->qrdata->alpha[newcol];
185 sys->qrdata->alpha[newcol] = sys->qrdata->alpha[active->high];
186 sys->qrdata->alpha[active->high] = tmp;
187 newcol = active->high; /* prepare for drag */
188 }
189 mtx_drag(sys->factors,newcol,active->low);
190 number_drag(sys->qrdata->alpha,newcol,active->low);
191 return D_ZERO;
192 }
193
194 static boolean cpqr_get_householder(linsolqr_system_t sys,
195 int32 curcol,
196 mtx_range_t *rng,
197 mtx_sparse_t *sp)
198 /**
199 *** A Householder matrix is H = I - hhcol dot Transpose(hhcol)
200 *** where hhcol is a Householder vector constructed so H dot x = -s * e1.
201 *** I.e. we want a vector hhcol so H zeros all below the diagonal.
202 *** We suppose the matrix column (x) being transformed is well-scaled:
203 *** i.e. no gymnastics to avoid over or underflow are needed.
204 *** The transform H is unitary. At present note we are constructing
205 *** hhcol so that no multiplier is necessary in applying hhcol later.
206 ***
207 *** 4 Cases:
208 ***
209 *1* If there is nothing in the column (or the column is negligible)
210 *1* we return FALSE. tau and alpha [curcol] are set to 0.
211 ***
212 *2* If there is only one nonzero and it is at the top, we are already
213 *2* in the desired form, so sp->len gets set to 0 as does tau[curcol].
214 *2* For consistency with R in this case we also flop the sign on alpha[curcol].
215 *2* We return TRUE.
216 ***
217 *** Calculate the Householder vector hhcol as follows
218 *** (compare to Intro to Matrix Computations, Stewart, 1971, p233):
219 ***
220 *** s = x^T dot x = norm2(x) a.k.a. alpha[i] with appropriate sign.
221 *** e1 = unit vector: [1,0,...,0].
222 *** hhcol = (Sqrt(s^2 + x1*s))^(-1) (x + s dot e1)
223 *** - the order of operations has been slightly improved.
224 *** - the topmost of element, x1, of x need not be nonzero.
225 *** - hhcol returned in sp.
226 *** - the transform vector (sp) is not put back into any matrix.
227 *** - tau has been eliminated (=1.0) from Stewart's algorithm.
228 ***
229 *** In more particular:
230 ***
231 *3* If x1 != 0:
232 *3* 0) alpha = 2norm of curcol, precomputed.
233 *3* 1) x1 = A(c,c)
234 *3* 2) s = alpha = copysign(alpha,x1) (avoid cancellation in x1+s)
235 *3* 3) xdothhcoll = s*(x1+s) (Stewart's pi, local alias t)
236 *3* 4) x1 = (x1+s)
237 *3* 5) tau[curcol] = 1 unless xdothhcol==0 in which case tau=0
238 *3* and we get out of here mighty confused.
239 *3* Note, tau can now be stored as a boolean.
240 ***
241 *4* If x1 == 0:
242 *4* 0) alpha = 2norm of curcol, precomputed.
243 *4* 1) x1=0.
244 *4* 2) s = alpha
245 *4* R(c,c) is now -s, (alias -alpha[c].)
246 *4* 3) xdothhcoll = s*s (Stewart's pi, alias t)
247 *4* 4) new incidence x1 = s (appended at end of sparse)
248 *4* 5) tau[curcol] = 1 unless xdothhcol==0 in which case tau=0
249 *4* and we get out of here.
250 ***
251 3*4 Then, for x1 == 0 or x1 != 0:
252 3*4 x = x/sqrt(xdothhcol)
253 ***
254 *** The result is a vector stuffed in sp yielding orthogonal H.
255 *** Any time you want to do something with the normalized vector, don't
256 *** forget to check tau[c] of the coefficient for the operation.
257 *** The sparse returned has org row indexed data.
258 ***
259 *** Be sure to watch the signs if updating alpha later: not just the norm.
260 *** It is best if mtx(curcol,curcol) is the largest element in its column,
261 *** but hey, we assumed well scaled, so who cares?
262 ***
263 ! ! Column curcol will have all elements of x removed from it in all cases.
264 ***
265 *** On an empty or zero curcol this will return FALSE, but this should
266 *** never have been called with such a column. If FALSE, nothing was
267 *** done to A (except perhaps a 0.0 valued incidence was removed)
268 *** and alpha and tau[curcol] are 0.
269 **/
270 {
271 real64 s,t;
272 real64 *alpha /* , x1 */;
273 mtx_matrix_t mtx;
274 int32 orgrow,acc_loc,i;
275
276 alpha = sys->qrdata->alpha;
277 if ( alpha[curcol] == D_ZERO) {
278 /* somebody dumb called us and we set them straight. Case 1 */
279 sys->qrdata->tau[curcol] = D_ZERO; /* signal weirdness */
280 FPRINTF(stderr,"ERROR: (linsolqr.c) cpqr_get_householder called\n");
281 FPRINTF(stderr," with alpha[col] = 0.\n");
282 return FALSE;
283 }
284
285 mtx = sys->factors;
286
287 /* find the org row index of A(c,c) */
288 orgrow = mtx_row_to_org(mtx,curcol);
289
290 /* fetch and empty the column we build u from */
291 mtx_steal_org_col_sparse(mtx,curcol,sp,rng);
292 #if CPQR_DEBUG
293 FPRINTF(stderr,"cpqr_get_householder found column %d:\n",curcol);
294 mtx_write_sparse(stderr,sp);
295 #endif
296 /* check sp sanity backup Case 1 */
297 if ( sp->len < 1 ) {
298 alpha[curcol] = D_ZERO; /* somebody lied and we set them straight */
299 sys->qrdata->tau[curcol] = D_ZERO; /* signal weirdness */
300 FPRINTF(stderr,"ERROR: (linsolqr.c) cpqr_get_householder called\n");
301 FPRINTF(stderr," with empty column, norm = 0.\n");
302 return FALSE;
303 }
304 /* check for Identity = H , Case 2 */
305 if (sp->len == 1 && mtx_row_to_org(mtx,curcol) == sp->idata[0]) {
306 /**
307 ** singletons cheap: Anew = Aold, no fill.
308 ** tau(col)= 0.0 (no transform signal later)
309 ** alpha(col) = -Anew(col,col) [alpha(i)= -Rii, remember?]
310 **/
311 sys->qrdata->tau[curcol] = D_ZERO;/* should be defaulted, but in case */
312 sys->qrdata->alpha[curcol] = (-sp->data[0]);
313 sp->len = 0; /* tell outer loop to screw off */
314 return TRUE;
315 }
316 /* search to see where if we have A(c,c) incident. if no, acc_loc=len */
317 for (acc_loc=0; acc_loc < sp->len && orgrow != sp->idata[acc_loc]; acc_loc++);
318 /* whether it is best to search this forward or backward is questionable.
319 in any case, acc_loc should be the location of acc, or the new location
320 for a created acc when done. At beginning, with relatively little fill
321 it shouldn't matter. as fill grows, it may be better to search backward
322 except in columns where the diagonal is prior fill. */
323
324 if (acc_loc < sp->len) { /* proceeding if A(c,c) found */
325 /* x1 = sp->data[acc_loc]; */
326 /* alpha = -Rcc, change sign if needed and add to x1 */
327 s = alpha[curcol] = copysign(alpha[curcol],sp->data[acc_loc]);
328 sp->data[acc_loc] += s;
329 t = 1/sqrt(s*(sp->data[acc_loc]));
330 /*
331 If, like some gcc compilers, your compiler is too amazingly braindead
332 to have copysign in the math library as IEEE recommends, link in the
333 math library on your system that DOES follow the recommendation.
334 On HPs this is /lib/pa1.1/libm.a or libM.a.
335 */
336 } else { /* A(c,c) is not there yet, append */
337 /* x1 = 0; */
338 /* s = alpha[curcol]; */
339 sp->len++;
340 sp->idata[acc_loc] = orgrow;
341 sp->data[acc_loc] = alpha[curcol];
342 t = 1/alpha[curcol];
343 }
344
345 /*
346 If t is 0, then the current column will be effectively
347 empty somehow and we will have a zero pivot. In this case, we
348 can not use this column so GET_HOUSEHOLDER is FALSE
349 and column is undisturbed, except for having been cleaned.
350 An untrapped underflow must have occurred, because we had a nonzero above.
351 */
352 if (t != D_ZERO) {
353 register double apiv;
354 /* scale the hhcol by stewarts 1/pi sqrt, essentially */
355 for (i = 0; i < sp->len; i++) {
356 sp->data[i] *= t;
357 }
358 /* record tau. archaic. */
359 sys->qrdata->tau[curcol] = D_ONE;
360 /* record min pivot size */
361 apiv = fabs(alpha[curcol]);
362 #if CPQR_DEBUG
363 FPRINTF(stderr,"pivotsize = %g\n",apiv);
364 mtx_write_sparse(stderr,sp);
365 #endif
366 if (apiv < sys->smallest_pivot) {
367 sys->smallest_pivot = apiv;
368 }
369 return TRUE;
370 } else {
371 FPRINTF(stderr,"ERROR: (linsolqr.c) cpqr_get_householder called\n");
372 FPRINTF(stderr," with small column, underflow.\n");
373 return FALSE;
374 }
375 }
376
377 static void cpqr_apply_householder(linsolqr_system_t sys,
378 int32 curcol,
379 mtx_region_t *reg)
380 /**
381 *** Takes the rectangle region A indicated by reg and does a Householder
382 *** transformation on it in place. Curcol should be the same as reg->col.low.
383 *** The first column (curcol) is transformed into
384 *** the orthogonal Householder vector unless this is impossible.
385 *** The householder vector is stored in hhvects and removed from
386 *** the matrix.
387 ***
388 *** By construction the curcolth diagonal element of R is the negative of
389 *** alpha[curcol] on exit, but the element is not stored in R.
390 *** The remaining columns in reg are transformed as follows:
391 ***
392 *** A' = A' - hhcol dot hhcol dot A'
393 *** where A' is A sans column curcol.
394 ***
395 *** This is equivalent to: T
396 *** A = [ I - hhcol dot hhcol ] dot A = H dot A
397 *** This will change the sparsity of A.
398 ***
399 **/
400 {
401
402 /* what to do with this? scrap?
403 if (rng->high < rng->low) {
404 return;
405 }
406 */
407 /* Do transform even if last col of square region (to get matrices right). */
408 if (cpqr_get_householder(sys,curcol,&(reg->row),&(sys->qrdata->sp))) {
409 mtx_sparse_t *sp;
410 sp = &(sys->qrdata->sp);
411
412 if (sp->len > 0) {
413 mtx_householder_transform_region(sys->factors,sys->qrdata->tau[curcol],
414 sp,reg,D_ZERO,FALSE);
415 mtx_fill_org_col_sparse(sys->qrdata->hhvects,curcol,sp);
416 }
417 }
418 /* else we have a very empty column and what are we doing here? */
419 }
420
421 static int cpqr_factor(linsolqr_system_t sys)
422 /**
423 *** FACTORIZATION
424 *** -------------
425 *** The QR factorization calculated of the rectangular
426 *** sys->qrdata->facreg using columnwise Householder.
427 *** If the region is taller than wide, the solution will
428 *** will be that of linear least squares. If the region
429 *** is wider than tall, the solution will be one with
430 *** a basis chosen to give large diagonal
431 *** values in R: a reasonably well conditioned basis
432 *** except in rare circumstances.
433 ***
434 *** d = sys->rng,
435 *** Q = H(NR-1)H(NR-2)..H(1),
436 *** R = Q*A where A is the coef matrix,
437 *** H(i) = I - v(i)*Transpose(v(i)) with d.low <= i <= d.high.
438 ***
439 *** The hhvects matrix contains the H(i) in the lower triangle,
440 *** while the diagonal of R is stored densely (in col order)
441 *** as -alpha[] and the off-diagonal elements of R are in the
442 *** superdiagonal triangle of factors.
443 ***
444 *** Note we are using a normalized v(i) that avoids extra
445 *** multiplications when handling multiple RHS. This is
446 *** done at the cost of some extra *,sqrt operations during
447 *** factorization. ||v(i)||_2 = 2.0.
448 *** These operations are trivial compared to the cost of
449 *** Householder transformations.
450 **/
451 {
452 mtx_range_t active; /* range within sys->rng yet to be pivoted */
453 mtx_range_t search; /* range of columns to be examined for pivots */
454 mtx_region_t ak; /* reduced A at each step */
455 int32 lastmax,col,newcol = (-1);
456 int32 colfound = 0, *ibuf;
457 real64 *rbuf, *alpha;
458 struct qr_auxdata *data;
459 int pivotstyle; /* -1, no pivot, 0 ptol pivoting 1, exact col pivoting */
460
461 /* set method variation based on ptol */
462 if (sys->ptol == D_ZERO) {
463 pivotstyle = -1;
464 } else {
465 if (sys->ptol == D_ONE) {
466 pivotstyle = 1;
467 } else {
468 pivotstyle = 0;
469 }
470 }
471
472 /* do initializations need for factoring or refactoring here */
473 data = sys->qrdata;
474 active = sys->rng;
475 search = data->facreg.col;
476 ak = data->facreg;
477 lastmax = 0;
478 data->nu = D_ZERO;
479 ibuf = (int *)data->hhrow;
480 rbuf = data->hhcol;
481 alpha = data->alpha;
482
483 if (pivotstyle >= 0) {
484 /* compute starting column norms and matrix norm*/
485 data->anu = data->asubnu =
486 cpqr_compute_alpha(data->alpha,sys->factors,&(data->facreg));
487 }
488
489 for (col = active.low; col <= active.high;) {
490 /* select newcol, setting colfound to 1 if pivotable. If
491 nothing is pivotable or not pivoting, newcol is active.low */
492 switch (pivotstyle) {
493 case -1:
494 newcol = active.low;
495 alpha[newcol] = sqrt(mtx_sum_sqrs_in_col(sys->factors,
496 newcol,&(ak.row)));
497 break;
498 case 0:
499 /* get the distance of the pivot away from where we are (col). */
500 if (lastmax == 0) {
501 newcol = find_pivot_number(&(alpha[col]), search.high-search.low+1,
502 sys->ptol,sys->pivot_zero,
503 ibuf,rbuf,&lastmax);
504 } else {
505 /* the max hasn't moved from where it was. Don't search past it.
506 Since col has increased one, the previous value is now the len. */
507 newcol = find_pivot_number(&(alpha[col]), lastmax,
508 sys->ptol,sys->pivot_zero,
509 ibuf,rbuf,&lastmax);
510 }
511 newcol += col; /* convert distance to pivot location. */
512 break;
513 case 1:
514 newcol = find_pivot_number(&(alpha[col]), search.high-search.low+1,
515 sys->ptol,sys->pivot_zero,
516 ibuf,rbuf,&lastmax);
517 lastmax = 0;
518 newcol += col; /* convert distance to pivot location. */
519 break;
520 default: /* not reached */
521 break;
522 }
523 if (alpha[newcol] < sys->pivot_zero) {
524 colfound = 0;
525 /* jump out of the loop: remaining cols singular, or in case of
526 no pivoting, we have hit our first empty column and are stuck. */
527 break;
528 } else {
529 colfound = 1;
530 }
531 ++(sys->rank);
532 cpqr_permute(sys,newcol,&active);
533 #if CPQR_DEBUG
534 mtx_write_region_human_cols(stderr,sys->factors,&ak);
535 #endif
536 cpqr_apply_householder(sys,col,&ak);
537 if (pivotstyle >= 0) {
538 if (cpqr_reduce_region(sys,&ak,lastmax+col)) lastmax = 0;
539 } else {
540 ak.col.low = (++ak.row.low);
541 }
542 search.low = active.low = (++col);
543 }
544
545 #if CPQR_DEBUG
546 FPRINTF(stderr,"R\n");
547 mtx_write_region_human_cols(stderr,sys->factors,&(data->facreg));
548 FPRINTF(stderr,"hhcols\n");
549 mtx_write_region_human_cols(stderr,data->hhvects,&(data->facreg));
550 for (col = data->facreg.col.low; col <= data->facreg.col.high; col++) {
551 FPRINTF(stderr,"alpha[%d](org %d) = %g\n",
552 col,mtx_col_to_org(sys->factors,col),alpha[col]);
553 }
554 #endif
555 if (!colfound) {
556 /* handle setup for singularity, if any */
557 return 1;
558 }
559 return 0;
560 }
561
562 #define TAU_ONE TRUE
563 static void cpqr_forward_eliminate(linsolqr_system_t sys,
564 real64 *arr,
565 boolean transpose)
566 /**
567 *** cpqr_forward_eliminate(sys,c,transpose)
568 *** convert rhs to c in place (only stored u of H= I-tau u dot Transpose(u)
569 ***
570 *** transpose=FALSE
571 *** c=Q.rhs.
572 *** (assuming independent columns/rows 1 to rank)
573 *** for j= 1 to rank (apply H(j) to rhs, HH foward elim)
574 *** if (tau(j)!= 0)
575 *** w=tau(j)* (Transpose(u(j)) dot c)
576 *** if (w!=0)
577 *** c -= w*u(j)
578 *** endif
579 *** endif
580 *** endfor
581 ***
582 *** transpose=TRUE
583 *** Solve Transpose(R).c=rhs. (given R in untransposed form)
584 *** 0<=k<r ==> x(k) = [c(k) - R((0..k-1),k) dot x(0..k-1)]/R(k,k)
585 ***
586 *** Neither of these operations require range checking the dots and
587 *** adds as in both cases we know the rest of the column in question
588 *** are empty.
589 ***
590 *** If !transpose && TAU_ONE, then assumes tau[i]=1 if tau[i]!=0.
591 **/
592 {
593 mtx_range_t dot_rng;
594 real64 sum;
595 mtx_matrix_t mtx;
596
597 dot_rng.high = sys->rng.low + sys->rank -1;
598
599 if (transpose) { /* ok */
600 /* arr is indexed by original column number */
601 int32 dotlim,col;
602 const real64 *diag;
603 register int32 org_col;
604
605 mtx = sys->factors;
606 diag = sys->qrdata->alpha;
607 dot_rng.low = sys->rng.low;
608 dotlim = dot_rng.low+sys->rank;
609 /* 0 <= k <r */
610 for( col=dot_rng.low; col <dotlim; ++col) {
611 /* rows of transpose are cols of R */
612 dot_rng.high = col-1;
613 /* sum = R((0..k-1),k) dot x(0..k-1) */
614 sum = mtx_col_dot_full_org_vec(mtx,col,arr,mtx_ALL_ROWS,TRUE);
615 org_col = mtx_col_to_org(mtx,col);
616 /* arr[org_col] = (arr[org_col] - sum)/ -diag[k]; */
617 arr[org_col] = (sum - arr[org_col])/diag[col];
618 }
619 } else {
620
621 /* arr is indexed by original row number */
622 /* apply Q to it */
623 real64 *tau;
624
625 mtx = sys->qrdata->hhvects;
626 tau = sys->qrdata->tau;
627
628 for( dot_rng.low = sys->rng.low;
629 dot_rng.low <= dot_rng.high;
630 dot_rng.low++) {
631 if (tau[dot_rng.low]!=D_ZERO) {
632 sum = mtx_col_dot_full_org_vec(mtx,dot_rng.low,arr,mtx_ALL_ROWS,FALSE);
633 if (sum != D_ZERO) {
634 #if TAU_ONE
635 mtx_org_vec_add_col(mtx,arr,dot_rng.low,-sum,mtx_ALL_ROWS,FALSE);
636 #else
637 mtx_org_vec_add_col(mtx,arr,dot_rng.low,
638 -sum*tau[dot_rng.low],mtx_ALL_ROWS,FALSE);
639 #endif
640 }
641 }
642 }
643 }
644 }
645
646 static void cpqr_backward_substitute(linsolqr_system_t sys,
647 real64 *arr,
648 boolean transpose)
649 /**
650 *** cpqr_backward_substitute(sys,rhs,transpose):
651 *** It is assumed that the R (or Q) part of sys->factors is computed.
652 *** This function converts rhs to c in place by solving one of the
653 *** following:
654 ***
655 *** transpose = FALSE transpose = TRUE
656 *** R.c = rhs Q.c = rhs
657 ***
658 *** The following formulae hold:
659 *** (for rank=r, upper left is R(0,0) transpose= FALSE
660 *** r>k>=0 --> c(k) = [rhs(k) - R(k,(k+1..r-1)) dot c(k+1..r-1)] / R(k,k)
661 *** -R(k,k) is assumed to be in sys->qrdata->alpha[k]
662 *** or
663 *** (for rank=r, upper left is Q(0,0) transpose= TRUE
664 *** c=Transpose(Q).rhs ==>
665 *** for k = rank..1
666 *** c = H(k).rhs = rhs - tau*(Transpose(uk) dot rhs) *uk
667 *** rhs <-- c
668 *** endfor
669 ***
670 *** For transpose == FALSE, this requires filtering out the leftover
671 *** righthand columns of R unless the problem is square or tall and of
672 *** full rank.
673 *** For transpose == TRUE, this uses the fast add and dot.
674 *** This counts on the fact that the diagonal was removed from R during
675 *** processing and is stored in alpha.
676 *** If transpose && TAU_ONE, then assumes tau[i]=1 if tau[i]!=0.
677 **/
678 {
679 mtx_range_t dot_rng;
680 real64 sum;
681 mtx_matrix_t mtx;
682 int32 dotlim;
683
684 dot_rng.high = sys->rng.low+sys->rank -1; /* ultimate pivoted row/col */
685 dotlim = sys->rng.low; /* upleft corner row/col */
686
687 if (transpose) {
688 /* arr is indexed by original column number */
689 /* apply Q */
690 real64 *tau;
691 mtx = sys->qrdata->hhvects;
692 tau = sys->qrdata->tau;
693
694 /*** for k = rank..1
695 *** c = H(k).rhs = rhs - tau*(Transpose(uk) dot rhs) *uk
696 *** rhs <-- c
697 *** endfor
698 */
699 for (dot_rng.low=dot_rng.high; dot_rng.low >=dotlim; dot_rng.low--) {
700 if (tau[dot_rng.low]!=D_ZERO) {
701 sum = mtx_col_dot_full_org_vec(mtx,dot_rng.low,arr,mtx_ALL_ROWS,TRUE);
702 if (sum != D_ZERO) {
703 #if TAU_ONE
704 mtx_org_vec_add_col(mtx,arr,dot_rng.low,-sum, mtx_ALL_ROWS,TRUE);
705 #else
706 mtx_org_vec_add_col(mtx,arr,dot_rng.low,-sum*tau[dot_rng.low],
707 mtx_ALL_ROWS,TRUE);
708 #endif
709 }
710 }
711 }
712 } else {
713
714 int32 org_row,row;
715 real64 *diag;
716
717 mtx = sys->factors;
718 diag = sys->qrdata->alpha;
719
720 /* apply R */
721 /* r >k>=0 we are working backwards through the pivoted rows. */
722 /* dot_rng is stuff to the right of the current pivot, (row,row),
723 and within the basis columns. */
724
725 if ( dot_rng.high == sys->qrdata->facreg.col.high ) {
726 /* cool, we can use the fast math since all cols are in basis */
727 for( row = dot_rng.high; row >= dotlim; --row) {
728 dot_rng.low = row+1; /* our dot left edge is just after the pivot */
729
730 /* sum = R(k,(k+1..r-1)) dot c(k+1..r-1) */
731 sum = mtx_row_dot_full_org_vec(mtx,row,arr,mtx_ALL_COLS,TRUE);
732 org_row = mtx_row_to_org(mtx,row);
733
734 /* c(k) = [rhs(k) -sum] /R(k,k) */
735 /* arr[org_row] = (arr[org_row] - sum) / -diag[row]; */
736 arr[org_row] = (sum - arr[org_row])/diag[row];
737 }
738 } else {
739 /* wah! we have to shuffle all the crap in R from either rank
740 deficiency or extra cols */
741 for( row = dot_rng.high; row >=dotlim; --row) {
742 dot_rng.low = row+1; /* our dot left edge is just after the pivot */
743
744 /* sum = R(k,(k+1..r-1)) dot c(k+1..r-1) */
745 sum = mtx_row_dot_full_org_vec(mtx,row,arr,&dot_rng,TRUE);
746 org_row = mtx_row_to_org(mtx,row);
747
748 /* c(k) = [rhs(k) -sum] /R(k,k) */
749 /* arr[org_row] = (arr[org_row] - sum) / -diag[row]; */
750 arr[org_row] = (sum - arr[org_row])/diag[row];
751 }
752 }
753 }
754 }
755 #undef TAU_ONE
756
757 static int cpqr_entry(linsolqr_system_t sys,mtx_region_t *region)
758 /**
759 *** The region to factor is first isolated by truncating the region
760 *** provided to the largest rectangular region with an upper left
761 *** corner on the diagonal. (I.E. it may be over or under specified.)
762 *** Then the
763 *** It is presumed it will contain no empty rows or columns and that it has
764 *** been previously reordered using linsolqr_reorder(sys,region,tspk1 or
765 *** some other QR friendly method).
766 *** on exit, sys->factors, and sys->inverse will have been
767 *** permuted identically by solution process. sys->coef will not be
768 *** permuted.
769 **/
770 {
771 struct rhs_list *rl;
772 boolean rank_deficient;
773 mtx_region_t factor_region;
774
775 CHECK_SYSTEM(sys);
776 if( sys->factored )
777 return 0;
778 if( sys->fmethod!=plain_qr )
779 return 1;
780 if(ISNULL(sys->qrdata))
781 return 1;
782
783 if (region == mtx_ENTIRE_MATRIX) {
784 determine_pivot_range(sys);
785 factor_region = sys->reg;
786 } else {
787 factor_region = *region;
788 }
789 if (ul_rectangle_region(sys,&factor_region)) return 1;
790 sys->qrdata->facreg = factor_region;
791
792 if( NOTNULL(sys->inverse) ) {
793 mtx_destroy(sys->inverse);
794 sys->inverse = NULL;
795 }
796 if( NOTNULL(sys->factors) ) mtx_destroy(sys->factors);
797
798 sys->factors = mtx_copy_region(sys->coef, &(sys->qrdata->facreg));
799 sys->qrdata->hhvects = mtx_create_slave(sys->factors);
800 sys->rank = 0;
801 sys->smallest_pivot = MAXDOUBLE;
802 for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next )
803 rl->solved = FALSE;
804 insure_capacity(sys); /* this should zero the vectors if needed */
805 insure_qr_capacity(sys); /* this should zero the vectors if needed */
806
807 rank_deficient = cpqr_factor(sys);
808 if (rank_deficient) {
809 #if LINSOLQR_DEBUG
810 int j;
811 #endif
812 FPRINTF(stderr,"Warning: cpqr found column rank %d of %d\n",sys->rank,
813 sys->rng.high-sys->rng.low+1);
814 #if LINSOLQR_DEBUG
815 FPRINTF(stderr,"alpha vec:(curcol,val)\n");
816 for (j=sys->qrdata->facreg.col.low;
817 j<= sys->qrdata->facreg.col.high; j++)
818 FPRINTF(stderr,"alpha[%d] = %.8g\n",j,sys->qrdata->alpha[j]);
819 FPRINTF(stderr,"tau vec:(curcol,val)\n");
820 for (j=sys->qrdata->facreg.col.low;
821 j<= sys->qrdata->facreg.col.high; j++)
822 FPRINTF(stderr,"tau[%d] = %.8g\n",j,sys->qrdata->tau[j]);
823 #endif
824 }
825 sys->factored = TRUE;
826 return 0;
827 }
828
829 /**
830 *** Solve a previously qr factorized matrix with a rhs b.
831 *** If b is not transposed (is org row ordered):
832 *** c:=Q.b, then solve R.x=c for x.
833 **/
834 static int cpqr_solve(linsolqr_system_t sys,struct rhs_list *rl)
835 {
836 cpqr_forward_eliminate(sys,rl->varvalue,rl->transpose);
837 cpqr_backward_substitute(sys,rl->varvalue,rl->transpose);
838 /* doesn't the following destroy the least squares solution? */
839 zero_unpivoted_vars(sys,rl->varvalue,rl->transpose);
840 return 0;
841 }
842 /***************************************************************************\
843 End of CPQR implementation.
844 \***************************************************************************/

john.pye@anu.edu.au
ViewVC Help
Powered by ViewVC 1.1.22