1 |
/* |
2 |
* Author: Benjamin A Allan |
3 |
* Version: $Revision: 1.9 $ |
4 |
* Version control file: $RCSfile: plainqr.c,v $ |
5 |
* Date last modified: $Date: 1997/07/18 12:15:22 $ |
6 |
* Last modified by: $Author: mthomas $ |
7 |
*/ |
8 |
/***************************************************************************\ |
9 |
CPQR implementation. |
10 |
an insert to linsolqr.c of a column pivoted (with ptol) householder qr |
11 |
method. |
12 |
\***************************************************************************/ |
13 |
#define CPQR_DEBUG FALSE |
14 |
|
15 |
#ifndef SQR |
16 |
#define SQR(x) (x)*(x) |
17 |
#endif |
18 |
/** |
19 |
*** Finds the largest rectangle within the region given which has the |
20 |
*** UPPERLEFT corner on the diagonal. Also sets sys->rng to be the |
21 |
*** appropriate range of columns to factorize. |
22 |
*** Returns 1 if for any reason it cannot do so. |
23 |
**/ |
24 |
static int ul_rectangle_region(linsolqr_system_t sys, mtx_region_t *r) |
25 |
{ |
26 |
int32 cmax; |
27 |
if (ISNULL(sys) || ISNULL(r) || ISNULL(sys->coef)) return 1; |
28 |
cmax = mtx_order(sys->coef)-1; |
29 |
if ( r->row.low < 0 || r->row.high < 0 || |
30 |
r->col.low < 0 || r->col.high < 0 || |
31 |
r->row.low > cmax || r->row.high > cmax || |
32 |
r->col.low > cmax || r->col.high > cmax || |
33 |
r->col.low > r->col.high || r->row.low > r->row.high |
34 |
) { |
35 |
FPRINTF(stderr,"(linsolqr.c) ul_rectangle_region: Insane region given.\n"); |
36 |
return 1; |
37 |
} |
38 |
if (r->row.low > r->col.low) { |
39 |
r->col.low = r->row.low; |
40 |
} else { |
41 |
r->row.low = r->col.low; |
42 |
} |
43 |
if ( r->col.low > r->col.high || r->row.low > r->row.high ) { |
44 |
FPRINTF(stderr, |
45 |
"(linsolqr.c) ul_rectangle_region: Region given off diagonal.\n"); |
46 |
return 1; |
47 |
} |
48 |
sys->rng.low = r->col.low; |
49 |
sys->rng.high = MIN(r->col.high,r->row.high); |
50 |
#if CPQR_DEBUG |
51 |
FPRINTF(stderr,"factor region: rows (%d,%d) cols (%d,%d)\n", |
52 |
r->row.low,r->row.high, r->col.low,r->col.high); |
53 |
FPRINTF(stderr,"factor range: cols %d- %d\n",sys->rng.low,sys->rng.high); |
54 |
#endif |
55 |
return 0; |
56 |
} |
57 |
|
58 |
|
59 |
static real64 cpqr_compute_alpha(real64 * const alpha, |
60 |
const mtx_matrix_t mtx, |
61 |
const mtx_region_t * const reg) |
62 |
/** |
63 |
*** Computes alphas for columns in a region indicated by reg. |
64 |
*** alpha are Frobenius norms : sqrt( sum(sqr(aij)|i,j) ) |
65 |
*** as opposed |
66 |
*** to matrix 2-norm: (the largest eigenvalue of At dot A, see stewart p 180) |
67 |
*** or the matrix 1 norm: max( sum(abs(aij) | i) |j) |
68 |
*** or the matrix inf norm: max( sum(abs(aij) | j) |i) |
69 |
*** Returns the Frobenius norm of the region calculated. |
70 |
*** Stores the alphas in curcol order in the vector alpha given. |
71 |
*** If it turns out dragging this cur ordered alpha ends up taking |
72 |
*** much more time than the pivot search, the alpha storage format |
73 |
*** should change to org ordered and the search do the depermutation. |
74 |
**/ |
75 |
{ |
76 |
int32 col; |
77 |
real64 anorm = D_ZERO, tsqr; |
78 |
for (col = reg->col.low; col <= reg->col.high; col++) { |
79 |
tsqr = mtx_sum_sqrs_in_col(mtx,col,&(reg->row)); |
80 |
alpha[col] = sqrt(tsqr); |
81 |
#if CPQR_DEBUG |
82 |
FPRINTF(stderr,"alpha[%d](org %d) = %g\n", |
83 |
col,mtx_col_to_org(mtx,col),alpha[col]); |
84 |
#endif |
85 |
anorm += tsqr; |
86 |
} |
87 |
#if CPQR_DEBUG |
88 |
FPRINTF(stderr,"region frobenius norm: %g\n",sqrt(anorm)); |
89 |
#endif |
90 |
return sqrt(anorm); |
91 |
} |
92 |
|
93 |
|
94 |
static int cpqr_reduce_region(linsolqr_system_t sys, |
95 |
mtx_region_t *r, |
96 |
int32 ccol) |
97 |
/* |
98 |
* Reduce, or recompute as needed, the column norms of the region r |
99 |
* given less its leading row and column. |
100 |
* Assumes the leftmost column of the region is empty in the topmost row! |
101 |
* When done, also increases r->row.low and r->col.low by 1. |
102 |
* RESUM is an adjustable parameter to control when to recompute the |
103 |
* column norm from scratch. |
104 |
* This function does not update the overall A norm. |
105 |
* returns 1 if ccol was changed, 0 if not. |
106 |
*/ |
107 |
#ifdef RESUM |
108 |
#undef RESUM |
109 |
#endif |
110 |
#define RESUM (double)1.0e+8 |
111 |
{ |
112 |
real64 sanew, as; /* sanew is pronounced S. A. New */ |
113 |
real64 *data, *alpha; |
114 |
int32 *idata, lim, i; |
115 |
int ret; |
116 |
|
117 |
idata = sys->qrdata->sp.idata; |
118 |
data = sys->qrdata->sp.data; |
119 |
alpha = sys->qrdata->alpha; |
120 |
ret = 0; |
121 |
|
122 |
/* get top row incidence */ |
123 |
mtx_cur_row_sparse(sys->factors,r->row.low, &(sys->qrdata->sp), |
124 |
mtx_ALL_COLS, mtx_SOFT_ZEROES); |
125 |
/* bump down or recompute the changed rows. an orthogonal transformation |
126 |
is alleged to not change the norms, so it is sufficient reduce the |
127 |
norms with a little checking for floating point inanity. */ |
128 |
for (i=0, lim = sys->qrdata->sp.len; |
129 |
i < lim; |
130 |
i++) { |
131 |
int32 col; |
132 |
|
133 |
col = idata[i]; |
134 |
if (col == ccol) ret = 1; |
135 |
as = SQR(alpha[col]); |
136 |
sanew = as - SQR(data[i]); |
137 |
if (sanew * RESUM <= as) { |
138 |
alpha[col] = sqrt(sanew); |
139 |
} else { |
140 |
alpha[col] = sqrt(mtx_sum_sqrs_in_col(sys->factors,col,&(r->row))); |
141 |
} |
142 |
} |
143 |
r->row.low++; |
144 |
r->col.low++; |
145 |
return ret; |
146 |
} |
147 |
#undef RESUM |
148 |
|
149 |
static real64 cpqr_permute(linsolqr_system_t sys, |
150 |
int32 newcol, |
151 |
mtx_range_t *active) |
152 |
/** |
153 |
*** Permute the region bounded by active and search so that |
154 |
*** the newcol given is now on the left edge. Rows may also |
155 |
*** be permuted if deemed wise. |
156 |
*** In particular if things have not gone in some measure dense: |
157 |
*** |
158 |
*** For a square or tall region, |
159 |
*** the diagonal location (newcol,newcol) will be dragged up to |
160 |
*** (active.low, active.low). That this should be a col swap |
161 |
*** instead is debatable and should be experimentally tested later. |
162 |
*** |
163 |
*** For a wide region, the newcol is dragged up if it is currently |
164 |
*** within the range active, otherwise it is swapped to active.high |
165 |
*** and dragged up to active.low (n log n ouch) in hopes that |
166 |
*** the column punted from range is one most full of junk we don't |
167 |
*** want to use in the future. This strategy is, again, debatable. |
168 |
*** |
169 |
*** System vectors (alpha mainly) will be permuted to stay consistent. |
170 |
*** We should probably also return the sparse corresponding to this |
171 |
*** column at the end of permutation. |
172 |
**/ |
173 |
{ |
174 |
real64 tmp; |
175 |
#if CPQR_DEBUG |
176 |
FPRINTF(stderr,"cpqr_permuted called to move %d back to %d\n", |
177 |
newcol,active->low); |
178 |
#endif |
179 |
if (newcol <= active->low) return D_ZERO; |
180 |
if (newcol > active->high) { |
181 |
/* swap cols */ |
182 |
mtx_swap_cols(sys->factors,newcol,active->high); /* also does hhvects */ |
183 |
/* likewise alphas */ |
184 |
tmp = sys->qrdata->alpha[newcol]; |
185 |
sys->qrdata->alpha[newcol] = sys->qrdata->alpha[active->high]; |
186 |
sys->qrdata->alpha[active->high] = tmp; |
187 |
newcol = active->high; /* prepare for drag */ |
188 |
} |
189 |
mtx_drag(sys->factors,newcol,active->low); |
190 |
number_drag(sys->qrdata->alpha,newcol,active->low); |
191 |
return D_ZERO; |
192 |
} |
193 |
|
194 |
static boolean cpqr_get_householder(linsolqr_system_t sys, |
195 |
int32 curcol, |
196 |
mtx_range_t *rng, |
197 |
mtx_sparse_t *sp) |
198 |
/** |
199 |
*** A Householder matrix is H = I - hhcol dot Transpose(hhcol) |
200 |
*** where hhcol is a Householder vector constructed so H dot x = -s * e1. |
201 |
*** I.e. we want a vector hhcol so H zeros all below the diagonal. |
202 |
*** We suppose the matrix column (x) being transformed is well-scaled: |
203 |
*** i.e. no gymnastics to avoid over or underflow are needed. |
204 |
*** The transform H is unitary. At present note we are constructing |
205 |
*** hhcol so that no multiplier is necessary in applying hhcol later. |
206 |
*** |
207 |
*** 4 Cases: |
208 |
*** |
209 |
*1* If there is nothing in the column (or the column is negligible) |
210 |
*1* we return FALSE. tau and alpha [curcol] are set to 0. |
211 |
*** |
212 |
*2* If there is only one nonzero and it is at the top, we are already |
213 |
*2* in the desired form, so sp->len gets set to 0 as does tau[curcol]. |
214 |
*2* For consistency with R in this case we also flop the sign on alpha[curcol]. |
215 |
*2* We return TRUE. |
216 |
*** |
217 |
*** Calculate the Householder vector hhcol as follows |
218 |
*** (compare to Intro to Matrix Computations, Stewart, 1971, p233): |
219 |
*** |
220 |
*** s = x^T dot x = norm2(x) a.k.a. alpha[i] with appropriate sign. |
221 |
*** e1 = unit vector: [1,0,...,0]. |
222 |
*** hhcol = (Sqrt(s^2 + x1*s))^(-1) (x + s dot e1) |
223 |
*** - the order of operations has been slightly improved. |
224 |
*** - the topmost of element, x1, of x need not be nonzero. |
225 |
*** - hhcol returned in sp. |
226 |
*** - the transform vector (sp) is not put back into any matrix. |
227 |
*** - tau has been eliminated (=1.0) from Stewart's algorithm. |
228 |
*** |
229 |
*** In more particular: |
230 |
*** |
231 |
*3* If x1 != 0: |
232 |
*3* 0) alpha = 2norm of curcol, precomputed. |
233 |
*3* 1) x1 = A(c,c) |
234 |
*3* 2) s = alpha = copysign(alpha,x1) (avoid cancellation in x1+s) |
235 |
*3* 3) xdothhcoll = s*(x1+s) (Stewart's pi, local alias t) |
236 |
*3* 4) x1 = (x1+s) |
237 |
*3* 5) tau[curcol] = 1 unless xdothhcol==0 in which case tau=0 |
238 |
*3* and we get out of here mighty confused. |
239 |
*3* Note, tau can now be stored as a boolean. |
240 |
*** |
241 |
*4* If x1 == 0: |
242 |
*4* 0) alpha = 2norm of curcol, precomputed. |
243 |
*4* 1) x1=0. |
244 |
*4* 2) s = alpha |
245 |
*4* R(c,c) is now -s, (alias -alpha[c].) |
246 |
*4* 3) xdothhcoll = s*s (Stewart's pi, alias t) |
247 |
*4* 4) new incidence x1 = s (appended at end of sparse) |
248 |
*4* 5) tau[curcol] = 1 unless xdothhcol==0 in which case tau=0 |
249 |
*4* and we get out of here. |
250 |
*** |
251 |
3*4 Then, for x1 == 0 or x1 != 0: |
252 |
3*4 x = x/sqrt(xdothhcol) |
253 |
*** |
254 |
*** The result is a vector stuffed in sp yielding orthogonal H. |
255 |
*** Any time you want to do something with the normalized vector, don't |
256 |
*** forget to check tau[c] of the coefficient for the operation. |
257 |
*** The sparse returned has org row indexed data. |
258 |
*** |
259 |
*** Be sure to watch the signs if updating alpha later: not just the norm. |
260 |
*** It is best if mtx(curcol,curcol) is the largest element in its column, |
261 |
*** but hey, we assumed well scaled, so who cares? |
262 |
*** |
263 |
! ! Column curcol will have all elements of x removed from it in all cases. |
264 |
*** |
265 |
*** On an empty or zero curcol this will return FALSE, but this should |
266 |
*** never have been called with such a column. If FALSE, nothing was |
267 |
*** done to A (except perhaps a 0.0 valued incidence was removed) |
268 |
*** and alpha and tau[curcol] are 0. |
269 |
**/ |
270 |
{ |
271 |
real64 s,t; |
272 |
real64 *alpha /* , x1 */; |
273 |
mtx_matrix_t mtx; |
274 |
int32 orgrow,acc_loc,i; |
275 |
|
276 |
alpha = sys->qrdata->alpha; |
277 |
if ( alpha[curcol] == D_ZERO) { |
278 |
/* somebody dumb called us and we set them straight. Case 1 */ |
279 |
sys->qrdata->tau[curcol] = D_ZERO; /* signal weirdness */ |
280 |
FPRINTF(stderr,"ERROR: (linsolqr.c) cpqr_get_householder called\n"); |
281 |
FPRINTF(stderr," with alpha[col] = 0.\n"); |
282 |
return FALSE; |
283 |
} |
284 |
|
285 |
mtx = sys->factors; |
286 |
|
287 |
/* find the org row index of A(c,c) */ |
288 |
orgrow = mtx_row_to_org(mtx,curcol); |
289 |
|
290 |
/* fetch and empty the column we build u from */ |
291 |
mtx_steal_org_col_sparse(mtx,curcol,sp,rng); |
292 |
#if CPQR_DEBUG |
293 |
FPRINTF(stderr,"cpqr_get_householder found column %d:\n",curcol); |
294 |
mtx_write_sparse(stderr,sp); |
295 |
#endif |
296 |
/* check sp sanity backup Case 1 */ |
297 |
if ( sp->len < 1 ) { |
298 |
alpha[curcol] = D_ZERO; /* somebody lied and we set them straight */ |
299 |
sys->qrdata->tau[curcol] = D_ZERO; /* signal weirdness */ |
300 |
FPRINTF(stderr,"ERROR: (linsolqr.c) cpqr_get_householder called\n"); |
301 |
FPRINTF(stderr," with empty column, norm = 0.\n"); |
302 |
return FALSE; |
303 |
} |
304 |
/* check for Identity = H , Case 2 */ |
305 |
if (sp->len == 1 && mtx_row_to_org(mtx,curcol) == sp->idata[0]) { |
306 |
/** |
307 |
** singletons cheap: Anew = Aold, no fill. |
308 |
** tau(col)= 0.0 (no transform signal later) |
309 |
** alpha(col) = -Anew(col,col) [alpha(i)= -Rii, remember?] |
310 |
**/ |
311 |
sys->qrdata->tau[curcol] = D_ZERO;/* should be defaulted, but in case */ |
312 |
sys->qrdata->alpha[curcol] = (-sp->data[0]); |
313 |
sp->len = 0; /* tell outer loop to screw off */ |
314 |
return TRUE; |
315 |
} |
316 |
/* search to see where if we have A(c,c) incident. if no, acc_loc=len */ |
317 |
for (acc_loc=0; acc_loc < sp->len && orgrow != sp->idata[acc_loc]; acc_loc++); |
318 |
/* whether it is best to search this forward or backward is questionable. |
319 |
in any case, acc_loc should be the location of acc, or the new location |
320 |
for a created acc when done. At beginning, with relatively little fill |
321 |
it shouldn't matter. as fill grows, it may be better to search backward |
322 |
except in columns where the diagonal is prior fill. */ |
323 |
|
324 |
if (acc_loc < sp->len) { /* proceeding if A(c,c) found */ |
325 |
/* x1 = sp->data[acc_loc]; */ |
326 |
/* alpha = -Rcc, change sign if needed and add to x1 */ |
327 |
s = alpha[curcol] = copysign(alpha[curcol],sp->data[acc_loc]); |
328 |
sp->data[acc_loc] += s; |
329 |
t = 1/sqrt(s*(sp->data[acc_loc])); |
330 |
/* |
331 |
If, like some gcc compilers, your compiler is too amazingly braindead |
332 |
to have copysign in the math library as IEEE recommends, link in the |
333 |
math library on your system that DOES follow the recommendation. |
334 |
On HPs this is /lib/pa1.1/libm.a or libM.a. |
335 |
*/ |
336 |
} else { /* A(c,c) is not there yet, append */ |
337 |
/* x1 = 0; */ |
338 |
/* s = alpha[curcol]; */ |
339 |
sp->len++; |
340 |
sp->idata[acc_loc] = orgrow; |
341 |
sp->data[acc_loc] = alpha[curcol]; |
342 |
t = 1/alpha[curcol]; |
343 |
} |
344 |
|
345 |
/* |
346 |
If t is 0, then the current column will be effectively |
347 |
empty somehow and we will have a zero pivot. In this case, we |
348 |
can not use this column so GET_HOUSEHOLDER is FALSE |
349 |
and column is undisturbed, except for having been cleaned. |
350 |
An untrapped underflow must have occurred, because we had a nonzero above. |
351 |
*/ |
352 |
if (t != D_ZERO) { |
353 |
register double apiv; |
354 |
/* scale the hhcol by stewarts 1/pi sqrt, essentially */ |
355 |
for (i = 0; i < sp->len; i++) { |
356 |
sp->data[i] *= t; |
357 |
} |
358 |
/* record tau. archaic. */ |
359 |
sys->qrdata->tau[curcol] = D_ONE; |
360 |
/* record min pivot size */ |
361 |
apiv = fabs(alpha[curcol]); |
362 |
#if CPQR_DEBUG |
363 |
FPRINTF(stderr,"pivotsize = %g\n",apiv); |
364 |
mtx_write_sparse(stderr,sp); |
365 |
#endif |
366 |
if (apiv < sys->smallest_pivot) { |
367 |
sys->smallest_pivot = apiv; |
368 |
} |
369 |
return TRUE; |
370 |
} else { |
371 |
FPRINTF(stderr,"ERROR: (linsolqr.c) cpqr_get_householder called\n"); |
372 |
FPRINTF(stderr," with small column, underflow.\n"); |
373 |
return FALSE; |
374 |
} |
375 |
} |
376 |
|
377 |
static void cpqr_apply_householder(linsolqr_system_t sys, |
378 |
int32 curcol, |
379 |
mtx_region_t *reg) |
380 |
/** |
381 |
*** Takes the rectangle region A indicated by reg and does a Householder |
382 |
*** transformation on it in place. Curcol should be the same as reg->col.low. |
383 |
*** The first column (curcol) is transformed into |
384 |
*** the orthogonal Householder vector unless this is impossible. |
385 |
*** The householder vector is stored in hhvects and removed from |
386 |
*** the matrix. |
387 |
*** |
388 |
*** By construction the curcolth diagonal element of R is the negative of |
389 |
*** alpha[curcol] on exit, but the element is not stored in R. |
390 |
*** The remaining columns in reg are transformed as follows: |
391 |
*** |
392 |
*** A' = A' - hhcol dot hhcol dot A' |
393 |
*** where A' is A sans column curcol. |
394 |
*** |
395 |
*** This is equivalent to: T |
396 |
*** A = [ I - hhcol dot hhcol ] dot A = H dot A |
397 |
*** This will change the sparsity of A. |
398 |
*** |
399 |
**/ |
400 |
{ |
401 |
|
402 |
/* what to do with this? scrap? |
403 |
if (rng->high < rng->low) { |
404 |
return; |
405 |
} |
406 |
*/ |
407 |
/* Do transform even if last col of square region (to get matrices right). */ |
408 |
if (cpqr_get_householder(sys,curcol,&(reg->row),&(sys->qrdata->sp))) { |
409 |
mtx_sparse_t *sp; |
410 |
sp = &(sys->qrdata->sp); |
411 |
|
412 |
if (sp->len > 0) { |
413 |
mtx_householder_transform_region(sys->factors,sys->qrdata->tau[curcol], |
414 |
sp,reg,D_ZERO,FALSE); |
415 |
mtx_fill_org_col_sparse(sys->qrdata->hhvects,curcol,sp); |
416 |
} |
417 |
} |
418 |
/* else we have a very empty column and what are we doing here? */ |
419 |
} |
420 |
|
421 |
static int cpqr_factor(linsolqr_system_t sys) |
422 |
/** |
423 |
*** FACTORIZATION |
424 |
*** ------------- |
425 |
*** The QR factorization calculated of the rectangular |
426 |
*** sys->qrdata->facreg using columnwise Householder. |
427 |
*** If the region is taller than wide, the solution will |
428 |
*** will be that of linear least squares. If the region |
429 |
*** is wider than tall, the solution will be one with |
430 |
*** a basis chosen to give large diagonal |
431 |
*** values in R: a reasonably well conditioned basis |
432 |
*** except in rare circumstances. |
433 |
*** |
434 |
*** d = sys->rng, |
435 |
*** Q = H(NR-1)H(NR-2)..H(1), |
436 |
*** R = Q*A where A is the coef matrix, |
437 |
*** H(i) = I - v(i)*Transpose(v(i)) with d.low <= i <= d.high. |
438 |
*** |
439 |
*** The hhvects matrix contains the H(i) in the lower triangle, |
440 |
*** while the diagonal of R is stored densely (in col order) |
441 |
*** as -alpha[] and the off-diagonal elements of R are in the |
442 |
*** superdiagonal triangle of factors. |
443 |
*** |
444 |
*** Note we are using a normalized v(i) that avoids extra |
445 |
*** multiplications when handling multiple RHS. This is |
446 |
*** done at the cost of some extra *,sqrt operations during |
447 |
*** factorization. ||v(i)||_2 = 2.0. |
448 |
*** These operations are trivial compared to the cost of |
449 |
*** Householder transformations. |
450 |
**/ |
451 |
{ |
452 |
mtx_range_t active; /* range within sys->rng yet to be pivoted */ |
453 |
mtx_range_t search; /* range of columns to be examined for pivots */ |
454 |
mtx_region_t ak; /* reduced A at each step */ |
455 |
int32 lastmax,col,newcol = (-1); |
456 |
int32 colfound = 0, *ibuf; |
457 |
real64 *rbuf, *alpha; |
458 |
struct qr_auxdata *data; |
459 |
int pivotstyle; /* -1, no pivot, 0 ptol pivoting 1, exact col pivoting */ |
460 |
|
461 |
/* set method variation based on ptol */ |
462 |
if (sys->ptol == D_ZERO) { |
463 |
pivotstyle = -1; |
464 |
} else { |
465 |
if (sys->ptol == D_ONE) { |
466 |
pivotstyle = 1; |
467 |
} else { |
468 |
pivotstyle = 0; |
469 |
} |
470 |
} |
471 |
|
472 |
/* do initializations need for factoring or refactoring here */ |
473 |
data = sys->qrdata; |
474 |
active = sys->rng; |
475 |
search = data->facreg.col; |
476 |
ak = data->facreg; |
477 |
lastmax = 0; |
478 |
data->nu = D_ZERO; |
479 |
ibuf = (int *)data->hhrow; |
480 |
rbuf = data->hhcol; |
481 |
alpha = data->alpha; |
482 |
|
483 |
if (pivotstyle >= 0) { |
484 |
/* compute starting column norms and matrix norm*/ |
485 |
data->anu = data->asubnu = |
486 |
cpqr_compute_alpha(data->alpha,sys->factors,&(data->facreg)); |
487 |
} |
488 |
|
489 |
for (col = active.low; col <= active.high;) { |
490 |
/* select newcol, setting colfound to 1 if pivotable. If |
491 |
nothing is pivotable or not pivoting, newcol is active.low */ |
492 |
switch (pivotstyle) { |
493 |
case -1: |
494 |
newcol = active.low; |
495 |
alpha[newcol] = sqrt(mtx_sum_sqrs_in_col(sys->factors, |
496 |
newcol,&(ak.row))); |
497 |
break; |
498 |
case 0: |
499 |
/* get the distance of the pivot away from where we are (col). */ |
500 |
if (lastmax == 0) { |
501 |
newcol = find_pivot_number(&(alpha[col]), search.high-search.low+1, |
502 |
sys->ptol,sys->pivot_zero, |
503 |
ibuf,rbuf,&lastmax); |
504 |
} else { |
505 |
/* the max hasn't moved from where it was. Don't search past it. |
506 |
Since col has increased one, the previous value is now the len. */ |
507 |
newcol = find_pivot_number(&(alpha[col]), lastmax, |
508 |
sys->ptol,sys->pivot_zero, |
509 |
ibuf,rbuf,&lastmax); |
510 |
} |
511 |
newcol += col; /* convert distance to pivot location. */ |
512 |
break; |
513 |
case 1: |
514 |
newcol = find_pivot_number(&(alpha[col]), search.high-search.low+1, |
515 |
sys->ptol,sys->pivot_zero, |
516 |
ibuf,rbuf,&lastmax); |
517 |
lastmax = 0; |
518 |
newcol += col; /* convert distance to pivot location. */ |
519 |
break; |
520 |
default: /* not reached */ |
521 |
break; |
522 |
} |
523 |
if (alpha[newcol] < sys->pivot_zero) { |
524 |
colfound = 0; |
525 |
/* jump out of the loop: remaining cols singular, or in case of |
526 |
no pivoting, we have hit our first empty column and are stuck. */ |
527 |
break; |
528 |
} else { |
529 |
colfound = 1; |
530 |
} |
531 |
++(sys->rank); |
532 |
cpqr_permute(sys,newcol,&active); |
533 |
#if CPQR_DEBUG |
534 |
mtx_write_region_human_cols(stderr,sys->factors,&ak); |
535 |
#endif |
536 |
cpqr_apply_householder(sys,col,&ak); |
537 |
if (pivotstyle >= 0) { |
538 |
if (cpqr_reduce_region(sys,&ak,lastmax+col)) lastmax = 0; |
539 |
} else { |
540 |
ak.col.low = (++ak.row.low); |
541 |
} |
542 |
search.low = active.low = (++col); |
543 |
} |
544 |
|
545 |
#if CPQR_DEBUG |
546 |
FPRINTF(stderr,"R\n"); |
547 |
mtx_write_region_human_cols(stderr,sys->factors,&(data->facreg)); |
548 |
FPRINTF(stderr,"hhcols\n"); |
549 |
mtx_write_region_human_cols(stderr,data->hhvects,&(data->facreg)); |
550 |
for (col = data->facreg.col.low; col <= data->facreg.col.high; col++) { |
551 |
FPRINTF(stderr,"alpha[%d](org %d) = %g\n", |
552 |
col,mtx_col_to_org(sys->factors,col),alpha[col]); |
553 |
} |
554 |
#endif |
555 |
if (!colfound) { |
556 |
/* handle setup for singularity, if any */ |
557 |
return 1; |
558 |
} |
559 |
return 0; |
560 |
} |
561 |
|
562 |
#define TAU_ONE TRUE |
563 |
static void cpqr_forward_eliminate(linsolqr_system_t sys, |
564 |
real64 *arr, |
565 |
boolean transpose) |
566 |
/** |
567 |
*** cpqr_forward_eliminate(sys,c,transpose) |
568 |
*** convert rhs to c in place (only stored u of H= I-tau u dot Transpose(u) |
569 |
*** |
570 |
*** transpose=FALSE |
571 |
*** c=Q.rhs. |
572 |
*** (assuming independent columns/rows 1 to rank) |
573 |
*** for j= 1 to rank (apply H(j) to rhs, HH foward elim) |
574 |
*** if (tau(j)!= 0) |
575 |
*** w=tau(j)* (Transpose(u(j)) dot c) |
576 |
*** if (w!=0) |
577 |
*** c -= w*u(j) |
578 |
*** endif |
579 |
*** endif |
580 |
*** endfor |
581 |
*** |
582 |
*** transpose=TRUE |
583 |
*** Solve Transpose(R).c=rhs. (given R in untransposed form) |
584 |
*** 0<=k<r ==> x(k) = [c(k) - R((0..k-1),k) dot x(0..k-1)]/R(k,k) |
585 |
*** |
586 |
*** Neither of these operations require range checking the dots and |
587 |
*** adds as in both cases we know the rest of the column in question |
588 |
*** are empty. |
589 |
*** |
590 |
*** If !transpose && TAU_ONE, then assumes tau[i]=1 if tau[i]!=0. |
591 |
**/ |
592 |
{ |
593 |
mtx_range_t dot_rng; |
594 |
real64 sum; |
595 |
mtx_matrix_t mtx; |
596 |
|
597 |
dot_rng.high = sys->rng.low + sys->rank -1; |
598 |
|
599 |
if (transpose) { /* ok */ |
600 |
/* arr is indexed by original column number */ |
601 |
int32 dotlim,col; |
602 |
const real64 *diag; |
603 |
register int32 org_col; |
604 |
|
605 |
mtx = sys->factors; |
606 |
diag = sys->qrdata->alpha; |
607 |
dot_rng.low = sys->rng.low; |
608 |
dotlim = dot_rng.low+sys->rank; |
609 |
/* 0 <= k <r */ |
610 |
for( col=dot_rng.low; col <dotlim; ++col) { |
611 |
/* rows of transpose are cols of R */ |
612 |
dot_rng.high = col-1; |
613 |
/* sum = R((0..k-1),k) dot x(0..k-1) */ |
614 |
sum = mtx_col_dot_full_org_vec(mtx,col,arr,mtx_ALL_ROWS,TRUE); |
615 |
org_col = mtx_col_to_org(mtx,col); |
616 |
/* arr[org_col] = (arr[org_col] - sum)/ -diag[k]; */ |
617 |
arr[org_col] = (sum - arr[org_col])/diag[col]; |
618 |
} |
619 |
} else { |
620 |
|
621 |
/* arr is indexed by original row number */ |
622 |
/* apply Q to it */ |
623 |
real64 *tau; |
624 |
|
625 |
mtx = sys->qrdata->hhvects; |
626 |
tau = sys->qrdata->tau; |
627 |
|
628 |
for( dot_rng.low = sys->rng.low; |
629 |
dot_rng.low <= dot_rng.high; |
630 |
dot_rng.low++) { |
631 |
if (tau[dot_rng.low]!=D_ZERO) { |
632 |
sum = mtx_col_dot_full_org_vec(mtx,dot_rng.low,arr,mtx_ALL_ROWS,FALSE); |
633 |
if (sum != D_ZERO) { |
634 |
#if TAU_ONE |
635 |
mtx_org_vec_add_col(mtx,arr,dot_rng.low,-sum,mtx_ALL_ROWS,FALSE); |
636 |
#else |
637 |
mtx_org_vec_add_col(mtx,arr,dot_rng.low, |
638 |
-sum*tau[dot_rng.low],mtx_ALL_ROWS,FALSE); |
639 |
#endif |
640 |
} |
641 |
} |
642 |
} |
643 |
} |
644 |
} |
645 |
|
646 |
static void cpqr_backward_substitute(linsolqr_system_t sys, |
647 |
real64 *arr, |
648 |
boolean transpose) |
649 |
/** |
650 |
*** cpqr_backward_substitute(sys,rhs,transpose): |
651 |
*** It is assumed that the R (or Q) part of sys->factors is computed. |
652 |
*** This function converts rhs to c in place by solving one of the |
653 |
*** following: |
654 |
*** |
655 |
*** transpose = FALSE transpose = TRUE |
656 |
*** R.c = rhs Q.c = rhs |
657 |
*** |
658 |
*** The following formulae hold: |
659 |
*** (for rank=r, upper left is R(0,0) transpose= FALSE |
660 |
*** r>k>=0 --> c(k) = [rhs(k) - R(k,(k+1..r-1)) dot c(k+1..r-1)] / R(k,k) |
661 |
*** -R(k,k) is assumed to be in sys->qrdata->alpha[k] |
662 |
*** or |
663 |
*** (for rank=r, upper left is Q(0,0) transpose= TRUE |
664 |
*** c=Transpose(Q).rhs ==> |
665 |
*** for k = rank..1 |
666 |
*** c = H(k).rhs = rhs - tau*(Transpose(uk) dot rhs) *uk |
667 |
*** rhs <-- c |
668 |
*** endfor |
669 |
*** |
670 |
*** For transpose == FALSE, this requires filtering out the leftover |
671 |
*** righthand columns of R unless the problem is square or tall and of |
672 |
*** full rank. |
673 |
*** For transpose == TRUE, this uses the fast add and dot. |
674 |
*** This counts on the fact that the diagonal was removed from R during |
675 |
*** processing and is stored in alpha. |
676 |
*** If transpose && TAU_ONE, then assumes tau[i]=1 if tau[i]!=0. |
677 |
**/ |
678 |
{ |
679 |
mtx_range_t dot_rng; |
680 |
real64 sum; |
681 |
mtx_matrix_t mtx; |
682 |
int32 dotlim; |
683 |
|
684 |
dot_rng.high = sys->rng.low+sys->rank -1; /* ultimate pivoted row/col */ |
685 |
dotlim = sys->rng.low; /* upleft corner row/col */ |
686 |
|
687 |
if (transpose) { |
688 |
/* arr is indexed by original column number */ |
689 |
/* apply Q */ |
690 |
real64 *tau; |
691 |
mtx = sys->qrdata->hhvects; |
692 |
tau = sys->qrdata->tau; |
693 |
|
694 |
/*** for k = rank..1 |
695 |
*** c = H(k).rhs = rhs - tau*(Transpose(uk) dot rhs) *uk |
696 |
*** rhs <-- c |
697 |
*** endfor |
698 |
*/ |
699 |
for (dot_rng.low=dot_rng.high; dot_rng.low >=dotlim; dot_rng.low--) { |
700 |
if (tau[dot_rng.low]!=D_ZERO) { |
701 |
sum = mtx_col_dot_full_org_vec(mtx,dot_rng.low,arr,mtx_ALL_ROWS,TRUE); |
702 |
if (sum != D_ZERO) { |
703 |
#if TAU_ONE |
704 |
mtx_org_vec_add_col(mtx,arr,dot_rng.low,-sum, mtx_ALL_ROWS,TRUE); |
705 |
#else |
706 |
mtx_org_vec_add_col(mtx,arr,dot_rng.low,-sum*tau[dot_rng.low], |
707 |
mtx_ALL_ROWS,TRUE); |
708 |
#endif |
709 |
} |
710 |
} |
711 |
} |
712 |
} else { |
713 |
|
714 |
int32 org_row,row; |
715 |
real64 *diag; |
716 |
|
717 |
mtx = sys->factors; |
718 |
diag = sys->qrdata->alpha; |
719 |
|
720 |
/* apply R */ |
721 |
/* r >k>=0 we are working backwards through the pivoted rows. */ |
722 |
/* dot_rng is stuff to the right of the current pivot, (row,row), |
723 |
and within the basis columns. */ |
724 |
|
725 |
if ( dot_rng.high == sys->qrdata->facreg.col.high ) { |
726 |
/* cool, we can use the fast math since all cols are in basis */ |
727 |
for( row = dot_rng.high; row >= dotlim; --row) { |
728 |
dot_rng.low = row+1; /* our dot left edge is just after the pivot */ |
729 |
|
730 |
/* sum = R(k,(k+1..r-1)) dot c(k+1..r-1) */ |
731 |
sum = mtx_row_dot_full_org_vec(mtx,row,arr,mtx_ALL_COLS,TRUE); |
732 |
org_row = mtx_row_to_org(mtx,row); |
733 |
|
734 |
/* c(k) = [rhs(k) -sum] /R(k,k) */ |
735 |
/* arr[org_row] = (arr[org_row] - sum) / -diag[row]; */ |
736 |
arr[org_row] = (sum - arr[org_row])/diag[row]; |
737 |
} |
738 |
} else { |
739 |
/* wah! we have to shuffle all the crap in R from either rank |
740 |
deficiency or extra cols */ |
741 |
for( row = dot_rng.high; row >=dotlim; --row) { |
742 |
dot_rng.low = row+1; /* our dot left edge is just after the pivot */ |
743 |
|
744 |
/* sum = R(k,(k+1..r-1)) dot c(k+1..r-1) */ |
745 |
sum = mtx_row_dot_full_org_vec(mtx,row,arr,&dot_rng,TRUE); |
746 |
org_row = mtx_row_to_org(mtx,row); |
747 |
|
748 |
/* c(k) = [rhs(k) -sum] /R(k,k) */ |
749 |
/* arr[org_row] = (arr[org_row] - sum) / -diag[row]; */ |
750 |
arr[org_row] = (sum - arr[org_row])/diag[row]; |
751 |
} |
752 |
} |
753 |
} |
754 |
} |
755 |
#undef TAU_ONE |
756 |
|
757 |
static int cpqr_entry(linsolqr_system_t sys,mtx_region_t *region) |
758 |
/** |
759 |
*** The region to factor is first isolated by truncating the region |
760 |
*** provided to the largest rectangular region with an upper left |
761 |
*** corner on the diagonal. (I.E. it may be over or under specified.) |
762 |
*** Then the |
763 |
*** It is presumed it will contain no empty rows or columns and that it has |
764 |
*** been previously reordered using linsolqr_reorder(sys,region,tspk1 or |
765 |
*** some other QR friendly method). |
766 |
*** on exit, sys->factors, and sys->inverse will have been |
767 |
*** permuted identically by solution process. sys->coef will not be |
768 |
*** permuted. |
769 |
**/ |
770 |
{ |
771 |
struct rhs_list *rl; |
772 |
boolean rank_deficient; |
773 |
mtx_region_t factor_region; |
774 |
|
775 |
CHECK_SYSTEM(sys); |
776 |
if( sys->factored ) |
777 |
return 0; |
778 |
if( sys->fmethod!=plain_qr ) |
779 |
return 1; |
780 |
if(ISNULL(sys->qrdata)) |
781 |
return 1; |
782 |
|
783 |
if (region == mtx_ENTIRE_MATRIX) { |
784 |
determine_pivot_range(sys); |
785 |
factor_region = sys->reg; |
786 |
} else { |
787 |
factor_region = *region; |
788 |
} |
789 |
if (ul_rectangle_region(sys,&factor_region)) return 1; |
790 |
sys->qrdata->facreg = factor_region; |
791 |
|
792 |
if( NOTNULL(sys->inverse) ) { |
793 |
mtx_destroy(sys->inverse); |
794 |
sys->inverse = NULL; |
795 |
} |
796 |
if( NOTNULL(sys->factors) ) mtx_destroy(sys->factors); |
797 |
|
798 |
sys->factors = mtx_copy_region(sys->coef, &(sys->qrdata->facreg)); |
799 |
sys->qrdata->hhvects = mtx_create_slave(sys->factors); |
800 |
sys->rank = 0; |
801 |
sys->smallest_pivot = MAXDOUBLE; |
802 |
for( rl = sys->rl ; NOTNULL(rl) ; rl = rl->next ) |
803 |
rl->solved = FALSE; |
804 |
insure_capacity(sys); /* this should zero the vectors if needed */ |
805 |
insure_qr_capacity(sys); /* this should zero the vectors if needed */ |
806 |
|
807 |
rank_deficient = cpqr_factor(sys); |
808 |
if (rank_deficient) { |
809 |
#if LINSOLQR_DEBUG |
810 |
int j; |
811 |
#endif |
812 |
FPRINTF(stderr,"Warning: cpqr found column rank %d of %d\n",sys->rank, |
813 |
sys->rng.high-sys->rng.low+1); |
814 |
#if LINSOLQR_DEBUG |
815 |
FPRINTF(stderr,"alpha vec:(curcol,val)\n"); |
816 |
for (j=sys->qrdata->facreg.col.low; |
817 |
j<= sys->qrdata->facreg.col.high; j++) |
818 |
FPRINTF(stderr,"alpha[%d] = %.8g\n",j,sys->qrdata->alpha[j]); |
819 |
FPRINTF(stderr,"tau vec:(curcol,val)\n"); |
820 |
for (j=sys->qrdata->facreg.col.low; |
821 |
j<= sys->qrdata->facreg.col.high; j++) |
822 |
FPRINTF(stderr,"tau[%d] = %.8g\n",j,sys->qrdata->tau[j]); |
823 |
#endif |
824 |
} |
825 |
sys->factored = TRUE; |
826 |
return 0; |
827 |
} |
828 |
|
829 |
/** |
830 |
*** Solve a previously qr factorized matrix with a rhs b. |
831 |
*** If b is not transposed (is org row ordered): |
832 |
*** c:=Q.b, then solve R.x=c for x. |
833 |
**/ |
834 |
static int cpqr_solve(linsolqr_system_t sys,struct rhs_list *rl) |
835 |
{ |
836 |
cpqr_forward_eliminate(sys,rl->varvalue,rl->transpose); |
837 |
cpqr_backward_substitute(sys,rl->varvalue,rl->transpose); |
838 |
/* doesn't the following destroy the least squares solution? */ |
839 |
zero_unpivoted_vars(sys,rl->varvalue,rl->transpose); |
840 |
return 0; |
841 |
} |
842 |
/***************************************************************************\ |
843 |
End of CPQR implementation. |
844 |
\***************************************************************************/ |