1 |
/* ASCEND modelling environment |
2 |
Copyright (C) 2006 Carnegie Mellon University |
3 |
|
4 |
This program is free software; you can redistribute it and/or modify |
5 |
it under the terms of the GNU General Public License as published by |
6 |
the Free Software Foundation; either version 2, or (at your option) |
7 |
any later version. |
8 |
|
9 |
This program is distributed in the hope that it will be useful, |
10 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 |
GNU General Public License for more details. |
13 |
|
14 |
You should have received a copy of the GNU General Public License |
15 |
along with this program; if not, write to the Free Software |
16 |
Foundation, Inc., 59 Temple Place - Suite 330, |
17 |
Boston, MA 02111-1307, USA. |
18 |
*//** |
19 |
Vector math implementation |
20 |
*/ |
21 |
|
22 |
#include "mtx_vector.h" |
23 |
#include <ascend/general/ascMalloc.h> |
24 |
#include <ascend/general/panic.h> |
25 |
#include <ascend/general/mem.h> |
26 |
#include <math.h> |
27 |
|
28 |
#define MTXVECTOR_DEBUG |
29 |
|
30 |
struct vec_vector *vec_create(int32 low, int32 high) |
31 |
{ |
32 |
struct vec_vector *result; |
33 |
|
34 |
result = ASC_NEW(struct vec_vector); |
35 |
if (NULL == result) |
36 |
return NULL; |
37 |
|
38 |
result->rng = NULL; |
39 |
result->vec = NULL; |
40 |
#ifdef MTXVECTOR_DEBUG |
41 |
/* set these elements to zero only if we're debugging (eg valgrinding) */ |
42 |
result->accurate = 0; |
43 |
result->norm2 = 0; |
44 |
#endif |
45 |
if (0 != vec_init(result, low, high)) { |
46 |
ASC_FREE(result); |
47 |
result = NULL; |
48 |
} |
49 |
return result; |
50 |
} |
51 |
|
52 |
int vec_init(struct vec_vector *vec, int32 low, int32 high) |
53 |
{ |
54 |
int32 new_size; |
55 |
|
56 |
if ((low < 0) || (high < low)) |
57 |
return 1; |
58 |
|
59 |
if (NULL == vec) |
60 |
return 2; |
61 |
|
62 |
if (NULL == vec->rng) { |
63 |
vec->rng = ASC_NEW(mtx_range_t); |
64 |
if (NULL == vec->rng) |
65 |
return 3; |
66 |
} |
67 |
vec->rng = mtx_range(vec->rng, low, high); |
68 |
|
69 |
new_size = high + 1; |
70 |
if (NULL == vec->vec) { |
71 |
#ifdef MTXVECTOR_DEBUG |
72 |
/* set these elements to zero only if we're debugging (eg valgrinding) */ |
73 |
vec->vec = ASC_NEW_ARRAY_CLEAR(real64,new_size); |
74 |
#else |
75 |
vec->vec = ASC_NEW_ARRAY(real64,new_size); |
76 |
#endif |
77 |
if (NULL == vec->vec) { |
78 |
ASC_FREE(vec->rng); |
79 |
vec->rng = NULL; |
80 |
return 3; |
81 |
} |
82 |
} |
83 |
else { |
84 |
vec->vec = (real64 *)ascrealloc(vec->vec, (new_size)*sizeof(real64)); |
85 |
} |
86 |
|
87 |
vec->accurate = FALSE; |
88 |
return 0; |
89 |
} |
90 |
|
91 |
void vec_destroy(struct vec_vector *vec) |
92 |
{ |
93 |
if (NULL != vec) { |
94 |
if (NULL != vec->rng) |
95 |
ASC_FREE(vec->rng); |
96 |
if (NULL != vec->vec) |
97 |
ASC_FREE(vec->vec); |
98 |
ASC_FREE(vec); |
99 |
} |
100 |
} |
101 |
|
102 |
void vec_zero( struct vec_vector *vec) |
103 |
{ |
104 |
real64 *p; |
105 |
int32 len; |
106 |
|
107 |
asc_assert((NULL != vec) && |
108 |
(NULL != vec->rng) && |
109 |
(NULL != vec->vec) && |
110 |
(vec->rng->low >= 0) && |
111 |
(vec->rng->low <= vec->rng->high)); |
112 |
|
113 |
p = vec->vec + vec->rng->low; |
114 |
len = vec->rng->high - vec->rng->low + 1; |
115 |
mtx_zero_real64(p,len); |
116 |
} |
117 |
|
118 |
void vec_copy( struct vec_vector *vec1,struct vec_vector *vec2) |
119 |
{ |
120 |
real64 *p1,*p2; |
121 |
int32 len; |
122 |
|
123 |
asc_assert((NULL != vec1) && |
124 |
(NULL != vec1->rng) && |
125 |
(NULL != vec1->vec) && |
126 |
(vec1->rng->low >= 0) && |
127 |
(vec1->rng->low <= vec1->rng->high) && |
128 |
(NULL != vec2) && |
129 |
(NULL != vec2->rng) && |
130 |
(NULL != vec2->vec) && |
131 |
(vec2->rng->low >= 0)); |
132 |
|
133 |
p1 = vec1->vec + vec1->rng->low; |
134 |
p2 = vec2->vec + vec2->rng->low; |
135 |
len = vec1->rng->high - vec1->rng->low + 1; |
136 |
/*mem_copy_cast not in order here, probably */ |
137 |
mem_move_cast(p1,p2,len*sizeof(real64)); |
138 |
} |
139 |
|
140 |
#define USEDOT TRUE |
141 |
/**< |
142 |
USEDOT = TRUE is a winner on alphas, hps, and sparc20 |
143 |
@TODO we definitely should be deferring to ATLAS/BLAS routines here, right? |
144 |
*/ |
145 |
|
146 |
/** |
147 |
Computes inner product between vec1 and vec2, returning result. |
148 |
vec1 and vec2 may overlap or even be identical. |
149 |
*/ |
150 |
real64 vec_inner_product(struct vec_vector *vec1 , |
151 |
struct vec_vector *vec2 |
152 |
){ |
153 |
real64 *p1,*p2; |
154 |
#if !USEDOT |
155 |
real64 sum; |
156 |
#endif |
157 |
int32 len; |
158 |
|
159 |
asc_assert((NULL != vec1) && |
160 |
(NULL != vec1->rng) && |
161 |
(NULL != vec1->vec) && |
162 |
(vec1->rng->low >= 0) && |
163 |
(vec1->rng->low <= vec1->rng->high) && |
164 |
(NULL != vec2) && |
165 |
(NULL != vec2->rng) && |
166 |
(NULL != vec2->vec) && |
167 |
(vec2->rng->low >= 0)); |
168 |
|
169 |
p1 = vec1->vec + vec1->rng->low; |
170 |
p2 = vec2->vec + vec2->rng->low; |
171 |
len = vec1->rng->high - vec1->rng->low + 1; |
172 |
#if !USEDOT |
173 |
if (p1 != p2) { |
174 |
for( sum=0.0 ; --len >= 0 ; ++p1,++p2 ) { |
175 |
sum += (*p1) * (*p2); |
176 |
} |
177 |
return(sum); |
178 |
} else { |
179 |
for( sum=0.0 ; --len >= 0 ; ++p1 ) { |
180 |
sum += (*p1) * (*p1); |
181 |
} |
182 |
return(sum); |
183 |
} |
184 |
#else |
185 |
return vec_dot(len,p1,p2); |
186 |
#endif |
187 |
} |
188 |
|
189 |
/** |
190 |
Computes norm^2 of vector, assigning the result to vec->norm2 |
191 |
and returning the result as well. |
192 |
*/ |
193 |
real64 vec_square_norm(struct vec_vector *vec){ |
194 |
vec->norm2 = vec_inner_product(vec,vec); |
195 |
return vec->norm2; |
196 |
} |
197 |
|
198 |
/** |
199 |
Stores prod := (scale)*(mtx)(vec) or (scale)*(mtx-transpose)(vec). |
200 |
vec and prod must be completely different. |
201 |
*/ |
202 |
void vec_matrix_product(mtx_matrix_t mtx, struct vec_vector *vec, |
203 |
struct vec_vector *prod, real64 scale, |
204 |
boolean transpose |
205 |
){ |
206 |
mtx_coord_t nz; |
207 |
real64 value, *vvec, *pvec; |
208 |
int32 lim; |
209 |
|
210 |
asc_assert((NULL != vec) && |
211 |
(NULL != vec->rng) && |
212 |
(NULL != vec->vec) && |
213 |
(vec->rng->low >= 0) && |
214 |
(vec->rng->low <= vec->rng->high) && |
215 |
(NULL != prod) && |
216 |
(NULL != prod->rng) && |
217 |
(NULL != prod->vec) && |
218 |
(prod->rng->low >= 0) && |
219 |
(prod->rng->low <= prod->rng->high) && |
220 |
(NULL != mtx)); |
221 |
|
222 |
lim = prod->rng->high; |
223 |
pvec = prod->vec; |
224 |
vvec = vec->vec; |
225 |
if( transpose ) { |
226 |
for(nz.col = prod->rng->low ; nz.col <= lim ; ++(nz.col) ) { |
227 |
pvec[nz.col] = 0.0; |
228 |
nz.row = mtx_FIRST; |
229 |
while( value = mtx_next_in_col(mtx,&nz,vec->rng), |
230 |
nz.row != mtx_LAST ) |
231 |
pvec[nz.col] += value*vvec[nz.row]; |
232 |
pvec[nz.col] *= scale; |
233 |
} |
234 |
} else { |
235 |
for(nz.row = prod->rng->low ; nz.row <= lim ; ++(nz.row) ) { |
236 |
pvec[nz.row] = 0.0; |
237 |
nz.col = mtx_FIRST; |
238 |
while( value = mtx_next_in_row(mtx,&nz,vec->rng), |
239 |
nz.col != mtx_LAST ) |
240 |
pvec[nz.row] += value*vvec[nz.col]; |
241 |
pvec[nz.row] *= scale; |
242 |
} |
243 |
} |
244 |
} |
245 |
|
246 |
/* outputs a vector */ |
247 |
void vec_write(FILE *fp, struct vec_vector *vec){ |
248 |
int32 ndx,hi; |
249 |
real64 *vvec; |
250 |
|
251 |
if (NULL == fp) { |
252 |
FPRINTF(ASCERR, "Error writing vector in vec_write: NULL file pointer.\n"); |
253 |
return; |
254 |
} |
255 |
if ((NULL == vec) || |
256 |
(NULL == vec->rng) || |
257 |
(NULL == vec->vec) || |
258 |
(vec->rng->low < 0) || |
259 |
(vec->rng->low > vec->rng->high)) { |
260 |
FPRINTF(ASCERR, "Error writing vector in vec_write: uninitialized vector.\n"); |
261 |
return; |
262 |
} |
263 |
|
264 |
vvec = vec->vec; |
265 |
hi = vec->rng->high; |
266 |
FPRINTF(fp,"Norm = %g, Accurate = %s, Vector range = %d to %d\n", |
267 |
sqrt(fabs(vec->norm2)), vec->accurate?"TRUE":"FALSE", |
268 |
vec->rng->low,vec->rng->high); |
269 |
FPRINTF(fp,"Vector --> "); |
270 |
for( ndx=vec->rng->low ; ndx<=hi ; ++ndx ) |
271 |
FPRINTF(fp, "%g ", vvec[ndx]); |
272 |
PUTC('\n',fp); |
273 |
} |
274 |
|
275 |
/* Dot product for loop unrolled vector norms */ |
276 |
real64 vec_dot(int32 len, const real64 *p1, const real64 *p2) |
277 |
{ |
278 |
register double sum,lsum; |
279 |
int m,n; |
280 |
|
281 |
/* |
282 |
AVMAGIC in fact isn't magic. |
283 |
only goes to 2-10. change the code below if you mess with it. |
284 |
Default AVMAGIC is 4, which works well on alphas, tika. |
285 |
*/ |
286 |
#define AVMAGIC 4 |
287 |
#ifdef sun |
288 |
#undef AVMAGIC |
289 |
#define AVMAGIC 6 |
290 |
#endif |
291 |
#ifdef __hpux |
292 |
#undef AVMAGIC |
293 |
#define AVMAGIC 4 |
294 |
/* |
295 |
2 was best value on ranier(9000/720) but tika (9000/715) likes 4 |
296 |
there are no recognizable (defines) compiler differences, so tika |
297 |
will set the hp default |
298 |
*/ |
299 |
#endif |
300 |
/* |
301 |
hands down best avmagic on atlas is 4, ranier 2, unxi21 6 |
302 |
under native compilers. no bets on the billions of gcc variants. |
303 |
needs to be tried on sgi. |
304 |
Note, these are tuned for speed, not for accuracy. on very large |
305 |
vectors something like dnrm2 may be more appropriate, though |
306 |
it is very slow. upping avmagic to 10 will help accuracy some. |
307 |
*/ |
308 |
|
309 |
#if (AVMAGIC>10) |
310 |
#undef AVMAGIC |
311 |
#define AVMAGIC 10 |
312 |
#endif |
313 |
|
314 |
asc_assert((NULL != p1) && (NULL != p2) && (len >= 0)); |
315 |
|
316 |
m = len / AVMAGIC; |
317 |
n = len % AVMAGIC; |
318 |
if (p1!=p2) { |
319 |
/* get leading leftovers */ |
320 |
for( sum=0.0 ; --n >= 0 ; ) { |
321 |
sum += (*p1) * (*p2); |
322 |
++p1; ++p2; |
323 |
} |
324 |
/* p1,p2 now point at first unadded location, or just after the end (m=0)*/ |
325 |
/* eat m chunks */ |
326 |
for( n=0; n <m; n++) { |
327 |
/* now, as i am too lazy to figure out a macro that expands itself */ |
328 |
lsum = (*p1) * (*p2); /* zeroth term is assigned */ |
329 |
p1++; p2++; |
330 |
lsum += (*p1) * (*p2); /* add 1th */ |
331 |
#if (AVMAGIC>2) |
332 |
p1++; p2++; |
333 |
lsum += (*p1) * (*p2); /* add 2th */ |
334 |
#if (AVMAGIC>3) |
335 |
p1++; p2++; |
336 |
lsum += (*p1) * (*p2); /* add 3th */ |
337 |
#if (AVMAGIC>4) |
338 |
p1++; p2++; |
339 |
lsum += (*p1) * (*p2); /* add 4th */ |
340 |
#if (AVMAGIC>5) |
341 |
p1++; p2++; |
342 |
lsum += (*p1) * (*p2); /* add 5th */ |
343 |
#if (AVMAGIC>6) |
344 |
p1++; p2++; |
345 |
lsum += (*p1) * (*p2); /* add 6th */ |
346 |
#if (AVMAGIC>7) |
347 |
p1++; p2++; |
348 |
lsum += (*p1) * (*p2); /* add 7th */ |
349 |
#if (AVMAGIC>8) |
350 |
p1++; p2++; |
351 |
lsum += (*p1) * (*p2); /* add 8th */ |
352 |
#if (AVMAGIC>9) |
353 |
p1++; p2++; |
354 |
lsum += (*p1) * (*p2); /* add 9th */ |
355 |
#endif |
356 |
#endif |
357 |
#endif |
358 |
#endif |
359 |
#endif |
360 |
#endif |
361 |
#endif |
362 |
#endif |
363 |
p1++; p2++; /* leave p1,p2 pointing at next zeroth */ |
364 |
sum += lsum; |
365 |
} |
366 |
} else { |
367 |
/* get leading leftovers */ |
368 |
for( sum=0.0 ; --n >= 0 ; ++p1 ) { |
369 |
sum += (*p1) * (*p1); |
370 |
} |
371 |
/* p1 now points at first unadded location, or just after the end (m=0)*/ |
372 |
/* eat m chunks */ |
373 |
for( n=0; n <m; n++) { |
374 |
/* now, as i am too lazy to figure out a macro that expands itself */ |
375 |
lsum = (*p1) * (*p1); /* zeroth term is assigned */ |
376 |
p1++; |
377 |
lsum += (*p1) * (*p1); /* add 1th */ |
378 |
#if (AVMAGIC>2) |
379 |
p1++; |
380 |
lsum += (*p1) * (*p1); /* add 2th */ |
381 |
#if (AVMAGIC>3) |
382 |
p1++; |
383 |
lsum += (*p1) * (*p1); /* add 3th */ |
384 |
#if (AVMAGIC>4) |
385 |
p1++; |
386 |
lsum += (*p1) * (*p1); /* add 4th */ |
387 |
#if (AVMAGIC>5) |
388 |
p1++; |
389 |
lsum += (*p1) * (*p1); /* add 5th */ |
390 |
#if (AVMAGIC>6) |
391 |
p1++; |
392 |
lsum += (*p1) * (*p1); /* add 6th */ |
393 |
#if (AVMAGIC>7) |
394 |
p1++; |
395 |
lsum += (*p1) * (*p1); /* add 7th */ |
396 |
#if (AVMAGIC>8) |
397 |
p1++; |
398 |
lsum += (*p1) * (*p1); /* add 8th */ |
399 |
#if (AVMAGIC>9) |
400 |
p1++; |
401 |
lsum += (*p1) * (*p1); /* add 9th */ |
402 |
#endif |
403 |
#endif |
404 |
#endif |
405 |
#endif |
406 |
#endif |
407 |
#endif |
408 |
#endif |
409 |
#endif |
410 |
p1++; /* leave p1 pointing at next zeroth */ |
411 |
sum += lsum; |
412 |
} |
413 |
} |
414 |
return(sum); |
415 |
#undef AVMAGIC |
416 |
} |