/[ascend]/trunk/ascend/linear/mtx_vector.c
ViewVC logotype

Contents of /trunk/ascend/linear/mtx_vector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2336 - (show annotations) (download) (as text)
Sun Dec 26 03:44:24 2010 UTC (11 years, 6 months ago) by jpye
File MIME type: text/x-csrc
File size: 10674 byte(s)
Suppress some debug output.
In mtx_vector, initialise some members of mtx_vector class, because otherwise they trigger valgrind errors.
1 /* ASCEND modelling environment
2 Copyright (C) 2006 Carnegie Mellon University
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA.
18 *//**
19 Vector math implementation
20 */
21
22 #include "mtx_vector.h"
23 #include <ascend/general/ascMalloc.h>
24 #include <ascend/general/panic.h>
25 #include <ascend/general/mem.h>
26 #include <math.h>
27
28 #define MTXVECTOR_DEBUG
29
30 struct vec_vector *vec_create(int32 low, int32 high)
31 {
32 struct vec_vector *result;
33
34 result = ASC_NEW(struct vec_vector);
35 if (NULL == result)
36 return NULL;
37
38 result->rng = NULL;
39 result->vec = NULL;
40 #ifdef MTXVECTOR_DEBUG
41 /* set these elements to zero only if we're debugging (eg valgrinding) */
42 result->accurate = 0;
43 result->norm2 = 0;
44 #endif
45 if (0 != vec_init(result, low, high)) {
46 ASC_FREE(result);
47 result = NULL;
48 }
49 return result;
50 }
51
52 int vec_init(struct vec_vector *vec, int32 low, int32 high)
53 {
54 int32 new_size;
55
56 if ((low < 0) || (high < low))
57 return 1;
58
59 if (NULL == vec)
60 return 2;
61
62 if (NULL == vec->rng) {
63 vec->rng = ASC_NEW(mtx_range_t);
64 if (NULL == vec->rng)
65 return 3;
66 }
67 vec->rng = mtx_range(vec->rng, low, high);
68
69 new_size = high + 1;
70 if (NULL == vec->vec) {
71 #ifdef MTXVECTOR_DEBUG
72 /* set these elements to zero only if we're debugging (eg valgrinding) */
73 vec->vec = ASC_NEW_ARRAY_CLEAR(real64,new_size);
74 #else
75 vec->vec = ASC_NEW_ARRAY(real64,new_size);
76 #endif
77 if (NULL == vec->vec) {
78 ASC_FREE(vec->rng);
79 vec->rng = NULL;
80 return 3;
81 }
82 }
83 else {
84 vec->vec = (real64 *)ascrealloc(vec->vec, (new_size)*sizeof(real64));
85 }
86
87 vec->accurate = FALSE;
88 return 0;
89 }
90
91 void vec_destroy(struct vec_vector *vec)
92 {
93 if (NULL != vec) {
94 if (NULL != vec->rng)
95 ASC_FREE(vec->rng);
96 if (NULL != vec->vec)
97 ASC_FREE(vec->vec);
98 ASC_FREE(vec);
99 }
100 }
101
102 void vec_zero( struct vec_vector *vec)
103 {
104 real64 *p;
105 int32 len;
106
107 asc_assert((NULL != vec) &&
108 (NULL != vec->rng) &&
109 (NULL != vec->vec) &&
110 (vec->rng->low >= 0) &&
111 (vec->rng->low <= vec->rng->high));
112
113 p = vec->vec + vec->rng->low;
114 len = vec->rng->high - vec->rng->low + 1;
115 mtx_zero_real64(p,len);
116 }
117
118 void vec_copy( struct vec_vector *vec1,struct vec_vector *vec2)
119 {
120 real64 *p1,*p2;
121 int32 len;
122
123 asc_assert((NULL != vec1) &&
124 (NULL != vec1->rng) &&
125 (NULL != vec1->vec) &&
126 (vec1->rng->low >= 0) &&
127 (vec1->rng->low <= vec1->rng->high) &&
128 (NULL != vec2) &&
129 (NULL != vec2->rng) &&
130 (NULL != vec2->vec) &&
131 (vec2->rng->low >= 0));
132
133 p1 = vec1->vec + vec1->rng->low;
134 p2 = vec2->vec + vec2->rng->low;
135 len = vec1->rng->high - vec1->rng->low + 1;
136 /*mem_copy_cast not in order here, probably */
137 mem_move_cast(p1,p2,len*sizeof(real64));
138 }
139
140 #define USEDOT TRUE
141 /**<
142 USEDOT = TRUE is a winner on alphas, hps, and sparc20
143 @TODO we definitely should be deferring to ATLAS/BLAS routines here, right?
144 */
145
146 /**
147 Computes inner product between vec1 and vec2, returning result.
148 vec1 and vec2 may overlap or even be identical.
149 */
150 real64 vec_inner_product(struct vec_vector *vec1 ,
151 struct vec_vector *vec2
152 ){
153 real64 *p1,*p2;
154 #if !USEDOT
155 real64 sum;
156 #endif
157 int32 len;
158
159 asc_assert((NULL != vec1) &&
160 (NULL != vec1->rng) &&
161 (NULL != vec1->vec) &&
162 (vec1->rng->low >= 0) &&
163 (vec1->rng->low <= vec1->rng->high) &&
164 (NULL != vec2) &&
165 (NULL != vec2->rng) &&
166 (NULL != vec2->vec) &&
167 (vec2->rng->low >= 0));
168
169 p1 = vec1->vec + vec1->rng->low;
170 p2 = vec2->vec + vec2->rng->low;
171 len = vec1->rng->high - vec1->rng->low + 1;
172 #if !USEDOT
173 if (p1 != p2) {
174 for( sum=0.0 ; --len >= 0 ; ++p1,++p2 ) {
175 sum += (*p1) * (*p2);
176 }
177 return(sum);
178 } else {
179 for( sum=0.0 ; --len >= 0 ; ++p1 ) {
180 sum += (*p1) * (*p1);
181 }
182 return(sum);
183 }
184 #else
185 return vec_dot(len,p1,p2);
186 #endif
187 }
188
189 /**
190 Computes norm^2 of vector, assigning the result to vec->norm2
191 and returning the result as well.
192 */
193 real64 vec_square_norm(struct vec_vector *vec){
194 vec->norm2 = vec_inner_product(vec,vec);
195 return vec->norm2;
196 }
197
198 /**
199 Stores prod := (scale)*(mtx)(vec) or (scale)*(mtx-transpose)(vec).
200 vec and prod must be completely different.
201 */
202 void vec_matrix_product(mtx_matrix_t mtx, struct vec_vector *vec,
203 struct vec_vector *prod, real64 scale,
204 boolean transpose
205 ){
206 mtx_coord_t nz;
207 real64 value, *vvec, *pvec;
208 int32 lim;
209
210 asc_assert((NULL != vec) &&
211 (NULL != vec->rng) &&
212 (NULL != vec->vec) &&
213 (vec->rng->low >= 0) &&
214 (vec->rng->low <= vec->rng->high) &&
215 (NULL != prod) &&
216 (NULL != prod->rng) &&
217 (NULL != prod->vec) &&
218 (prod->rng->low >= 0) &&
219 (prod->rng->low <= prod->rng->high) &&
220 (NULL != mtx));
221
222 lim = prod->rng->high;
223 pvec = prod->vec;
224 vvec = vec->vec;
225 if( transpose ) {
226 for(nz.col = prod->rng->low ; nz.col <= lim ; ++(nz.col) ) {
227 pvec[nz.col] = 0.0;
228 nz.row = mtx_FIRST;
229 while( value = mtx_next_in_col(mtx,&nz,vec->rng),
230 nz.row != mtx_LAST )
231 pvec[nz.col] += value*vvec[nz.row];
232 pvec[nz.col] *= scale;
233 }
234 } else {
235 for(nz.row = prod->rng->low ; nz.row <= lim ; ++(nz.row) ) {
236 pvec[nz.row] = 0.0;
237 nz.col = mtx_FIRST;
238 while( value = mtx_next_in_row(mtx,&nz,vec->rng),
239 nz.col != mtx_LAST )
240 pvec[nz.row] += value*vvec[nz.col];
241 pvec[nz.row] *= scale;
242 }
243 }
244 }
245
246 /* outputs a vector */
247 void vec_write(FILE *fp, struct vec_vector *vec){
248 int32 ndx,hi;
249 real64 *vvec;
250
251 if (NULL == fp) {
252 FPRINTF(ASCERR, "Error writing vector in vec_write: NULL file pointer.\n");
253 return;
254 }
255 if ((NULL == vec) ||
256 (NULL == vec->rng) ||
257 (NULL == vec->vec) ||
258 (vec->rng->low < 0) ||
259 (vec->rng->low > vec->rng->high)) {
260 FPRINTF(ASCERR, "Error writing vector in vec_write: uninitialized vector.\n");
261 return;
262 }
263
264 vvec = vec->vec;
265 hi = vec->rng->high;
266 FPRINTF(fp,"Norm = %g, Accurate = %s, Vector range = %d to %d\n",
267 sqrt(fabs(vec->norm2)), vec->accurate?"TRUE":"FALSE",
268 vec->rng->low,vec->rng->high);
269 FPRINTF(fp,"Vector --> ");
270 for( ndx=vec->rng->low ; ndx<=hi ; ++ndx )
271 FPRINTF(fp, "%g ", vvec[ndx]);
272 PUTC('\n',fp);
273 }
274
275 /* Dot product for loop unrolled vector norms */
276 real64 vec_dot(int32 len, const real64 *p1, const real64 *p2)
277 {
278 register double sum,lsum;
279 int m,n;
280
281 /*
282 AVMAGIC in fact isn't magic.
283 only goes to 2-10. change the code below if you mess with it.
284 Default AVMAGIC is 4, which works well on alphas, tika.
285 */
286 #define AVMAGIC 4
287 #ifdef sun
288 #undef AVMAGIC
289 #define AVMAGIC 6
290 #endif
291 #ifdef __hpux
292 #undef AVMAGIC
293 #define AVMAGIC 4
294 /*
295 2 was best value on ranier(9000/720) but tika (9000/715) likes 4
296 there are no recognizable (defines) compiler differences, so tika
297 will set the hp default
298 */
299 #endif
300 /*
301 hands down best avmagic on atlas is 4, ranier 2, unxi21 6
302 under native compilers. no bets on the billions of gcc variants.
303 needs to be tried on sgi.
304 Note, these are tuned for speed, not for accuracy. on very large
305 vectors something like dnrm2 may be more appropriate, though
306 it is very slow. upping avmagic to 10 will help accuracy some.
307 */
308
309 #if (AVMAGIC>10)
310 #undef AVMAGIC
311 #define AVMAGIC 10
312 #endif
313
314 asc_assert((NULL != p1) && (NULL != p2) && (len >= 0));
315
316 m = len / AVMAGIC;
317 n = len % AVMAGIC;
318 if (p1!=p2) {
319 /* get leading leftovers */
320 for( sum=0.0 ; --n >= 0 ; ) {
321 sum += (*p1) * (*p2);
322 ++p1; ++p2;
323 }
324 /* p1,p2 now point at first unadded location, or just after the end (m=0)*/
325 /* eat m chunks */
326 for( n=0; n <m; n++) {
327 /* now, as i am too lazy to figure out a macro that expands itself */
328 lsum = (*p1) * (*p2); /* zeroth term is assigned */
329 p1++; p2++;
330 lsum += (*p1) * (*p2); /* add 1th */
331 #if (AVMAGIC>2)
332 p1++; p2++;
333 lsum += (*p1) * (*p2); /* add 2th */
334 #if (AVMAGIC>3)
335 p1++; p2++;
336 lsum += (*p1) * (*p2); /* add 3th */
337 #if (AVMAGIC>4)
338 p1++; p2++;
339 lsum += (*p1) * (*p2); /* add 4th */
340 #if (AVMAGIC>5)
341 p1++; p2++;
342 lsum += (*p1) * (*p2); /* add 5th */
343 #if (AVMAGIC>6)
344 p1++; p2++;
345 lsum += (*p1) * (*p2); /* add 6th */
346 #if (AVMAGIC>7)
347 p1++; p2++;
348 lsum += (*p1) * (*p2); /* add 7th */
349 #if (AVMAGIC>8)
350 p1++; p2++;
351 lsum += (*p1) * (*p2); /* add 8th */
352 #if (AVMAGIC>9)
353 p1++; p2++;
354 lsum += (*p1) * (*p2); /* add 9th */
355 #endif
356 #endif
357 #endif
358 #endif
359 #endif
360 #endif
361 #endif
362 #endif
363 p1++; p2++; /* leave p1,p2 pointing at next zeroth */
364 sum += lsum;
365 }
366 } else {
367 /* get leading leftovers */
368 for( sum=0.0 ; --n >= 0 ; ++p1 ) {
369 sum += (*p1) * (*p1);
370 }
371 /* p1 now points at first unadded location, or just after the end (m=0)*/
372 /* eat m chunks */
373 for( n=0; n <m; n++) {
374 /* now, as i am too lazy to figure out a macro that expands itself */
375 lsum = (*p1) * (*p1); /* zeroth term is assigned */
376 p1++;
377 lsum += (*p1) * (*p1); /* add 1th */
378 #if (AVMAGIC>2)
379 p1++;
380 lsum += (*p1) * (*p1); /* add 2th */
381 #if (AVMAGIC>3)
382 p1++;
383 lsum += (*p1) * (*p1); /* add 3th */
384 #if (AVMAGIC>4)
385 p1++;
386 lsum += (*p1) * (*p1); /* add 4th */
387 #if (AVMAGIC>5)
388 p1++;
389 lsum += (*p1) * (*p1); /* add 5th */
390 #if (AVMAGIC>6)
391 p1++;
392 lsum += (*p1) * (*p1); /* add 6th */
393 #if (AVMAGIC>7)
394 p1++;
395 lsum += (*p1) * (*p1); /* add 7th */
396 #if (AVMAGIC>8)
397 p1++;
398 lsum += (*p1) * (*p1); /* add 8th */
399 #if (AVMAGIC>9)
400 p1++;
401 lsum += (*p1) * (*p1); /* add 9th */
402 #endif
403 #endif
404 #endif
405 #endif
406 #endif
407 #endif
408 #endif
409 #endif
410 p1++; /* leave p1 pointing at next zeroth */
411 sum += lsum;
412 }
413 }
414 return(sum);
415 #undef AVMAGIC
416 }

john.pye@anu.edu.au
ViewVC Help
Powered by ViewVC 1.1.22