Parent Directory
|
Revision Log
Setting up web subdirectory in repository
1 | /* |
2 | * SLV: Ascend Nonlinear Solver |
3 | * by Karl Michael Westerberg |
4 | * Created: 2/6/90 |
5 | * Version: $Revision: 1.44 $ |
6 | * Version control file: $RCSfile: slv7.c,v $ |
7 | * Date last modified: $Date: 2000/01/25 02:27:43 $ |
8 | * Last modified by: $Author: ballan $ |
9 | * |
10 | * |
11 | * This file is part of the SLV solver. |
12 | * |
13 | * Copyright (C) 1990 Karl Michael Westerberg |
14 | * Copyright (C) 1993 Joseph Zaher |
15 | * Copyright (C) 1994 Joseph Zaher, Benjamin Andrew Allan |
16 | * Copyright (C) 1996 Kenneth Harrison Tyner |
17 | * |
18 | * The SLV solver is free software; you can redistribute |
19 | * it and/or modify it under the terms of the GNU General Public License as |
20 | * published by the Free Software Foundation; either version 2 of the |
21 | * License, or (at your option) any later version. |
22 | * |
23 | * The SLV solver is distributed in hope that it will be |
24 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
25 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
26 | * General Public License for more details. |
27 | * |
28 | * You should have received a copy of the GNU General Public License |
29 | * along with the program; if not, write to the Free Software Foundation, |
30 | * Inc., 675 Mass Ave, Cambridge, MA 02139 USA. Check the file named |
31 | * COPYING. COPYING is found in ../compiler. |
32 | * |
33 | */ |
34 | |
35 | #include <math.h> |
36 | #include <stdarg.h> |
37 | #include "utilities/ascConfig.h" |
38 | #include "utilities/ascSignal.h" |
39 | #include "utilities/ascMalloc.h" |
40 | #include "utilities/set.h" |
41 | #include "general/tm_time.h" |
42 | #include "utilities/mem.h" |
43 | #include "compiler/compiler.h" |
44 | #include "utilities/ascPanic.h" |
45 | #include "general/list.h" |
46 | #include "compiler/fractions.h" |
47 | #include "compiler/dimen.h" |
48 | #include "compiler/functype.h" |
49 | #include "compiler/func.h" |
50 | #include "solver/mtx.h" |
51 | #include "solver/linsol.h" |
52 | #include "solver/linsolqr.h" |
53 | #include "solver/slv_types.h" |
54 | #include "solver/var.h" |
55 | #include "solver/rel.h" |
56 | #include "solver/discrete.h" |
57 | #include "solver/conditional.h" |
58 | #include "solver/logrel.h" |
59 | #include "solver/bnd.h" |
60 | #include "solver/calc.h" |
61 | #include "solver/relman.h" |
62 | #include "solver/slv_common.h" |
63 | #include "solver/slv_client.h" |
64 | #include "solver/slv7.h" |
65 | #include "solver/slv_stdcalls.h" |
66 | |
67 | #if !defined(STATIC_NGSLV) && !defined(DYNAMIC_NGSLV) |
68 | int slv7_register(SlvFunctionsT *f) |
69 | { |
70 | (void)f; /* stop gcc whine about unused parameter */ |
71 | |
72 | FPRINTF(stderr,"NGSlv not compiled in this ASCEND IV.\n"); |
73 | return 1; |
74 | } |
75 | #else /* either STATIC_NGSLV or DYNAMIC_NGSLV is defined */ |
76 | #ifdef DYNAMIC_NGSLV |
77 | /* do dynamic loading stuff. yeah, right */ |
78 | #else /* following is used if STATIC_NGSLV is defined */ |
79 | |
80 | #define NGSLV 1 /* if = 1, include NGSLV functions and data structures */ |
81 | |
82 | #define KDEBUG TRUE |
83 | #define KILL 0 |
84 | /* code that needs to be deleted compiles only with kill = 1 */ |
85 | #define DEBUG FALSE |
86 | #define TERMSCALE FALSE |
87 | /* if TERMSCALE TRUE try to use rel_nominals */ |
88 | |
89 | #define D_ZERO (double)0.0 |
90 | #define SLV7(s) ((slv7_system_t)(s)) |
91 | #define SERVER (sys->slv) |
92 | |
93 | #define slv7_IA_SIZE 13 |
94 | #define slv7_RA_SIZE 11 |
95 | #define slv7_CA_SIZE 0 |
96 | #define slv7_VA_SIZE 0 |
97 | /* subscripts for iarray */ |
98 | #define SP7_LIFDS 0 |
99 | #define SP7_SAVLIN 1 |
100 | #define SP7_RELNOM 2 |
101 | #define SP7_CUTOFF 3 |
102 | #define SP7_UPJAC 4 |
103 | #define SP7_UPWTS 5 |
104 | #define SP7_UPNOM 6 |
105 | #define SP7_REDUCE 7 |
106 | #define SP7_EXACT 8 |
107 | #define SP7_CNCOLS 9 |
108 | #define SP7_BTRUNC 10 |
109 | #define SP7_ORDERM 11 |
110 | #define SP7_SAFECALC 12 |
111 | static char *slv7_ianames[slv7_IA_SIZE] = |
112 | {"lifds","savlin","relnom","cutoff","upjac","upwts", |
113 | "upnom","reduce","exact","cncols","btrunc","reorder" |
114 | "safe_calc"}; |
115 | static char *slv7_iaexpln[slv7_IA_SIZE] = { |
116 | "If lifds != 0 and showlessimportant is TRUE, show direct solve details", |
117 | "If savlin != 0, write out matrix data file at each iteration to SlvLinsol.dat", |
118 | "If relnom != 0, use relation nominals to scale the jacobian, else row 2 norm", |
119 | "Cutoff is the block size cutoff for MODEL-based reordering of partitions", |
120 | "Update jacobian every this many major iterations", |
121 | "Update row scalings every this many major iterations", |
122 | "Update column scalings every this many major iterations", |
123 | "Require misunderstood reduction somewhere in the stepping algorithm", |
124 | "Require residual >= some other number in the stepping algorithm", |
125 | "Check jacobian for poorly scaled columns and whine if found", |
126 | "Truncate whole step vector rather than componentwise at variable bound", |
127 | "Reorder option. 0 = MODEL based, 1 = MODEL based2, 2 = simple spk1" |
128 | "Use safe calculation routines" |
129 | }; |
130 | #define UPDATE_JACOBIAN (sys->iarray[SP7_UPJAC]) |
131 | #define UPDATE_WEIGHTS (sys->iarray[SP7_UPWTS]) |
132 | #define UPDATE_NOMINALS (sys->iarray[SP7_UPNOM]) |
133 | #define REDUCE (sys->iarray[SP7_REDUCE]) |
134 | #define EXACT_LINE_SEARCH (sys->iarray[SP7_EXACT]) |
135 | #define DUMPCNORM (sys->iarray[SP7_CNCOLS]) |
136 | #define TRUNCATE (sys->iarray[SP7_BTRUNC]) |
137 | #define SAFE_CALC (sys->iarray[SP7_SAFECALC]) |
138 | |
139 | /* subscripts for rarray */ |
140 | #define SP7_2SMALL 0 |
141 | #define SP7_CNLOW 1 |
142 | #define SP7_CNHIGH 2 |
143 | #define SP7_TBNDS 3 |
144 | #define SP7_POSDEF 4 |
145 | #define SP7_DET0 5 |
146 | #define SP7_SSERRM 6 |
147 | #define SP7_PRMAX 7 |
148 | #define SP7_MINCO 8 |
149 | #define SP7_MAXCO 9 |
150 | #define SP7_GMULT 10 |
151 | static char *slv7_ranames[slv7_RA_SIZE] = |
152 | {"toosmall","cnlow","cnhigh","tobnds","posdef","detzero", |
153 | "steperrmax","prngmin","mincoef","maxcoef","gradmult"}; |
154 | static char *slv7_raexpln[slv7_RA_SIZE] = { |
155 | "Var nominal to use if user specifies 0.0", |
156 | "Smallest column norm we won't complain about if checking", |
157 | "Largest column norm we won't complain about if checking", |
158 | "If bound is in the way, we go this fraction toward it", |
159 | "Hessian fudge number when optimizing", |
160 | "Minimum 2x2 determinant of newton/gradient we consider non-parallel", |
161 | "Step size must be determined this precisely, or prngmin happy", |
162 | "Parameter range must be this narrow to exit inner loop if step size unhappy", |
163 | "'Largest' drop in maxstep allowed", |
164 | "'Smallest' drop in maxstep allowed", |
165 | "Multiplier for gradient step in unpivoted region"}; |
166 | |
167 | #define TOO_SMALL (sys->rarray[SP7_2SMALL]) |
168 | #define CNLOW (sys->rarray[SP7_CNLOW]) |
169 | #define CNHIGH (sys->rarray[SP7_CNHIGH]) |
170 | #define TOWARD_BOUNDS (sys->rarray[SP7_TBNDS]) |
171 | #define POSITIVE_DEFINITE (sys->rarray[SP7_POSDEF]) |
172 | #define DETZERO (sys->rarray[SP7_DET0]) |
173 | #define STEPSIZEERR_MAX (sys->rarray[SP7_SSERRM]) |
174 | #define PARMRNG_MIN (sys->rarray[SP7_PRMAX]) |
175 | #define MIN_COEF (sys->rarray[SP7_MINCO]) |
176 | #define MAX_COEF (sys->rarray[SP7_MAXCO]) |
177 | #define GRAD_MULT (sys->rarray[SP7_GMULT]) |
178 | |
179 | /*********************************************************************\ |
180 | Subparameters implemented: (value/meaning) |
181 | sp.ia[SP7_LIFDS] 0=>do not show full detail info for singletons |
182 | 1=>do (this value ignored if detailed solve info not on. |
183 | sp.ia[SP7_SAVLIN] 0=>do not append linearizations arising in the newton |
184 | scheme to the file SlvLinsol.dat. |
185 | 1=>do. |
186 | sp.ia[SP7_RELNOM] 0=>use row 2 norm in calc_weights. |
187 | 1=>use rel_nominal in calc_weights. |
188 | sp.ia[SP7_CUTOFF] MODEL tearing/reordering cutoff number. |
189 | |
190 | sp.rarray[*] Generally cryptic parameters left by Joe. Someone |
191 | should play with and document them. See the defaults. |
192 | |
193 | if TERMSCALE FALSE, SP7_RELNOM is ignored. |
194 | \*********************************************************************/ |
195 | struct update_data { |
196 | int jacobian; /* Countdown on jacobian updating */ |
197 | int weights; /* Countdown on weights updating */ |
198 | int nominals; /* Countdown on nominals updating */ |
199 | }; |
200 | |
201 | /* varpivots, relpivots used only in optimizing, if we rewrite calc_pivots |
202 | without them. */ |
203 | struct jacobian_data { |
204 | linsolqr_system_t sys; /* Linear system */ |
205 | mtx_matrix_t mtx; /* Transpose gradient of residuals */ |
206 | real64 *rhs; /* RHS from linear system */ |
207 | unsigned *varpivots; /* Pivoted variables */ |
208 | unsigned *relpivots; /* Pivoted relations */ |
209 | unsigned *subregions; /* Set of subregion indeces */ |
210 | dof_t *dofdata; /* dof data pointer from server */ |
211 | mtx_region_t reg; /* Current block region */ |
212 | int32 rank; /* Numerical rank of the jacobian */ |
213 | enum factor_method fm; /* Linear factorization method */ |
214 | boolean accurate; /* ? Recalculate matrix */ |
215 | boolean singular; /* ? Can matrix be inverted */ |
216 | boolean old_partition; /* old value of partition flag */ |
217 | #if NGSLV |
218 | int32 rank_defect; /* Numerical rank defect of the jacobian*/ |
219 | mtx_sparse_t *singcols; |
220 | mtx_sparse_t *pivrows; |
221 | mtx_sparse_t *singrows; |
222 | mtx_sparse_t *pivcols; |
223 | mtx_region_t un_p_col_reg; /* Unpivoted col region of current block */ |
224 | mtx_region_t un_p_row_reg; /* Unpivoted row region of current block */ |
225 | mtx_region_t A12_reg; /* A12 region */ |
226 | mtx_region_t A21_reg; /* A21 region */ |
227 | mtx_region_t A22_reg; /* A22 region */ |
228 | #endif /*NGSLV*/ |
229 | }; |
230 | |
231 | struct hessian_data { |
232 | struct vector_data Bs; /* Product of B and s */ |
233 | struct vector_data y; /* Difference in stationaries */ |
234 | real64 ys; /* inner product of y and s */ |
235 | real64 sBs; /* inner product of s and Bs */ |
236 | struct hessian_data *next; /* previous iteration data */ |
237 | }; |
238 | |
239 | struct reduced_data { |
240 | real64 **mtx; /* Dense matrix */ |
241 | real64 *ZBs; /* Reduced Bs */ |
242 | real64 *Zy; /* Reduced y */ |
243 | int32 order; /* Degrees of freedom */ |
244 | boolean accurate; /* Ready to re-compute ? */ |
245 | }; |
246 | |
247 | /** |
248 | *** line search data for ngslv |
249 | **/ |
250 | struct linesearch_data { |
251 | real64 obj; /* objective function value */ |
252 | real64 obj2; /* 2nd (newt) objective function value */ |
253 | real64 grad; /* objective function gradient */ |
254 | real64 newton_mult; /* scaling factor for newton step (0-1) */ |
255 | real64 full_grad_mult;/* calculated gradient multiplier */ |
256 | real64 grad_mult; /* additional scaling for grad step (0-1) */ |
257 | real64 p_error; |
258 | real64 un_p_error; |
259 | int32 rank; |
260 | int32 rank_defect; |
261 | real64 newton_norm; |
262 | real64 grad_norm; |
263 | }; |
264 | |
265 | struct slv7_system_structure { |
266 | |
267 | /** |
268 | *** Problem definition |
269 | **/ |
270 | slv_system_t slv; /* slv_system_t back-link */ |
271 | struct rel_relation *obj; /* Objective function: NULL = none */ |
272 | struct var_variable **vlist; /* Variable list (NULL terminated) */ |
273 | struct rel_relation **rlist; /* Relation list (NULL terminated) */ |
274 | |
275 | /** |
276 | *** Solver information |
277 | **/ |
278 | int integrity; /* ? Has the system been created */ |
279 | int32 presolved; /* ? Has the system been presolved */ |
280 | slv_parameters_t p; /* Parameters */ |
281 | slv_status_t s; /* Status (as of iteration end) */ |
282 | struct update_data update; /* Jacobian frequency counters */ |
283 | int32 cap; /* Order of matrix/vectors */ |
284 | int32 rank; /* Symbolic rank of problem */ |
285 | int32 vused; /* Free and incident variables */ |
286 | int32 vtot; /* length of varlist */ |
287 | int32 rused; /* Included relations */ |
288 | int32 rtot; /* length of varlist */ |
289 | double clock; /* CPU time */ |
290 | double rarray[slv7_RA_SIZE]; |
291 | int iarray[slv7_IA_SIZE]; |
292 | |
293 | /** |
294 | *** Calculated data (scaled) |
295 | **/ |
296 | struct jacobian_data J; /* linearized system */ |
297 | struct hessian_data *B; /* Curvature information */ |
298 | struct reduced_data ZBZ; /* Reduced hessian */ |
299 | |
300 | struct vector_data nominals; /* Variable nominals */ |
301 | struct vector_data weights; /* Relation weights */ |
302 | struct vector_data variables; /* Variable values */ |
303 | struct vector_data residuals; /* Relation residuals */ |
304 | struct vector_data gradient; /* Objective gradient */ |
305 | struct vector_data multipliers; /* Relation multipliers */ |
306 | struct vector_data stationary; /* Lagrange gradient */ |
307 | struct vector_data gamma; /* Feasibility steepest descent */ |
308 | struct vector_data Jgamma; /* Product of J and gamma */ |
309 | struct vector_data newton; /* Dependent variables */ |
310 | struct vector_data Bnewton; /* Product of B and newton */ |
311 | struct vector_data nullspace; /* Independent variables */ |
312 | struct vector_data varstep1; /* 1st order in variables */ |
313 | struct vector_data Bvarstep1; /* Product of B and varstep1 */ |
314 | struct vector_data varstep2; /* 2nd order in variables */ |
315 | struct vector_data Bvarstep2; /* Product of B and varstep2 */ |
316 | struct vector_data mulstep1; /* 1st order in multipliers */ |
317 | struct vector_data mulstep2; /* 2nd order in multipliers */ |
318 | struct vector_data varstep; /* Step in variables */ |
319 | struct vector_data mulstep; /* Step in multipliers */ |
320 | #if NGSLV |
321 | struct vector_data grad_newton; /* newton grad vec KHACK */ |
322 | struct vector_data grad_newton2; /* 2nd newton grad vec KHACK */ |
323 | struct vector_data un_p_grad; /* Gradient for unpivoted variables */ |
324 | struct vector_data tmp; /* tmp vector for backsolve on U */ |
325 | struct vector_data tmp2; /* tmp vector for grad mult calc */ |
326 | struct vector_data tmp_ls; /* tmp vector for line search */ |
327 | struct linesearch_data line_search; /* data for ngslv line search */ |
328 | #endif /*NGSLV*/ |
329 | real64 objective; /* Objective function evaluation */ |
330 | real64 phi; /* Unconstrained minimizer */ |
331 | real64 maxstep; /* Maximum step size allowed */ |
332 | real64 progress; /* Steepest directional derivative */ |
333 | }; |
334 | |
335 | |
336 | /** |
337 | *** Integrity checks |
338 | *** ---------------- |
339 | *** check_system(sys) |
340 | **/ |
341 | |
342 | #define OK ((int)813029392) |
343 | #define DESTROYED ((int)103289182) |
344 | static int check_system(slv7_system_t sys) |
345 | /** |
346 | *** Checks sys for NULL and for integrity. |
347 | **/ |
348 | { |
349 | if( sys == NULL ) { |
350 | FPRINTF(stderr,"ERROR: (slv7) check_system\n"); |
351 | FPRINTF(stderr," NULL system handle.\n"); |
352 | return 1; |
353 | } |
354 | |
355 | switch( sys->integrity ) { |
356 | case OK: |
357 | return 0; |
358 | case DESTROYED: |
359 | FPRINTF(stderr,"ERROR: (slv7) check_system\n"); |
360 | FPRINTF(stderr," System was recently destroyed.\n"); |
361 | return 1; |
362 | default: |
363 | FPRINTF(stderr,"ERROR: (slv7) check_system\n"); |
364 | FPRINTF(stderr," System reused or never allocated.\n"); |
365 | return 1; |
366 | } |
367 | } |
368 | |
369 | /** |
370 | *** General input/output routines |
371 | *** ----------------------------- |
372 | *** print_var_name(out,sys,var) |
373 | *** print_rel_name(out,sys,rel) |
374 | **/ |
375 | |
376 | #define print_var_name(a,b,c) slv_print_var_name((a),(b)->slv,(c)) |
377 | #define print_rel_name(a,b,c) slv_print_rel_name((a),(b)->slv,(c)) |
378 | |
379 | /** |
380 | *** Debug output routines |
381 | *** --------------------- |
382 | *** debug_delimiter(fp) |
383 | *** debug_out_vector(fp,vec) |
384 | *** debug_out_var_values(fp,sys) |
385 | *** debug_out_rel_residuals(fp,sys) |
386 | *** debug_out_jacobian(fp,sys) |
387 | *** debug_out_hessian(fp,sys) |
388 | *** debug_write_array(fp,real64 *,length) |
389 | **/ |
390 | |
391 | static void debug_delimiter( FILE *fp) |
392 | /** |
393 | *** Outputs a hyphenated line. |
394 | **/ |
395 | { |
396 | int i; |
397 | for( i=0; i<60; i++ ) PUTC('-',fp); |
398 | PUTC('\n',fp); |
399 | } |
400 | |
401 | #if DEBUG |
402 | static void debug_out_vector( FILE *fp, slv7_system_t sys, |
403 | struct vector_data *vec) |
404 | /** |
405 | *** Outputs a vector. |
406 | **/ |
407 | { |
408 | int32 ndx; |
409 | FPRINTF(fp,"Norm = %g, Accurate = %s, Vector range = %d to %d\n", |
410 | calc_sqrt_D0(vec->norm2), vec->accurate?"TRUE":"FALSE", |
411 | vec->rng->low,vec->rng->high); |
412 | FPRINTF(fp,"Vector --> "); |
413 | for( ndx=vec->rng->low ; ndx<=vec->rng->high ; ++ndx ) |
414 | FPRINTF(fp, "%g ", vec->vec[ndx]); |
415 | PUTC('\n',fp); |
416 | } |
417 | |
418 | static void debug_out_var_values( FILE *fp, slv7_system_t sys) |
419 | /** |
420 | *** Outputs all variable values in current block. |
421 | **/ |
422 | { |
423 | int32 col; |
424 | struct var_variable *var; |
425 | |
426 | FPRINTF(fp,"Var values --> \n"); |
427 | for( col = sys->J.reg.col.low; col <= sys->J.reg.col.high ; col++ ) { |
428 | var = sys->vlist[mtx_col_to_org(sys->J.mtx,col)]; |
429 | print_var_name(fp,sys,var); |
430 | FPRINTF(fp, "\nI Lb Value Ub Scale Col INom\n"); |
431 | FPRINTF(fp,"%d\t%.4g\t%.4g\t%.4g\t%.4g\t%d\t%.4g\n", |
432 | var_sindex(var),var_lower_bound(var),var_value(var), |
433 | var_upper_bound(var),var_nominal(var), |
434 | col,sys->nominals.vec[col]); |
435 | } |
436 | } |
437 | |
438 | static void debug_out_rel_residuals( FILE *fp, slv7_system_t sys) |
439 | /** |
440 | *** Outputs all relation residuals in current block. |
441 | **/ |
442 | { |
443 | int32 row; |
444 | |
445 | FPRINTF(fp,"Rel residuals --> \n"); |
446 | for( row = sys->J.reg.row.low; row <= sys->J.reg.row.high ; row++ ) { |
447 | struct rel_relation *rel; |
448 | rel = sys->rlist[mtx_row_to_org(sys->J.mtx,row)]; |
449 | FPRINTF(fp," %g : ",rel_residual(rel)); |
450 | print_rel_name(fp,sys,rel); |
451 | PUTC('\n',fp); |
452 | } |
453 | PUTC('\n',fp); |
454 | } |
455 | |
456 | static void debug_out_jacobian( FILE *fp, slv7_system_t sys) |
457 | /** |
458 | *** Outputs permutation and values of the nonzero elements in the |
459 | *** the jacobian matrix. |
460 | **/ |
461 | { |
462 | mtx_coord_t nz; |
463 | real64 value; |
464 | |
465 | nz.row = sys->J.reg.row.low; |
466 | for( ; nz.row <= sys->J.reg.row.high; ++(nz.row) ) { |
467 | FPRINTF(fp," Row %d (rel %d)\n", nz.row, |
468 | mtx_row_to_org(sys->J.mtx,nz.row)); |
469 | nz.col = mtx_FIRST; |
470 | while( value = mtx_next_in_row(sys->J.mtx,&nz,&(sys->J.reg.col)), |
471 | nz.col != mtx_LAST ) { |
472 | FPRINTF(fp," Col %d (var %d) has value %g\n", nz.col, |
473 | mtx_col_to_org(sys->J.mtx,nz.col), value); |
474 | } |
475 | } |
476 | } |
477 | |
478 | static void debug_out_hessian( FILE *fp, slv7_system_t sys) |
479 | /** |
480 | *** Outputs permutation and values of the nonzero elements in the |
481 | *** reduced hessian matrix. |
482 | **/ |
483 | { |
484 | mtx_coord_t nz; |
485 | |
486 | for( nz.row = 0; nz.row < sys->ZBZ.order; nz.row++ ) { |
487 | nz.col = nz.row + sys->J.reg.col.high + 1 - sys->ZBZ.order; |
488 | FPRINTF(fp," ZBZ[%d (var %d)] = ", |
489 | nz.row, mtx_col_to_org(sys->J.mtx,nz.col)); |
490 | for( nz.col = 0; nz.col < sys->ZBZ.order; nz.col++ ) { |
491 | FPRINTF(fp,"%10g ",sys->ZBZ.mtx[nz.row][nz.col]); |
492 | } |
493 | PUTC('\n',fp); |
494 | } |
495 | } |
496 | |
497 | #endif |
498 | |
499 | static void debug_write_array(FILE *fp,real64 *vec, int32 length) |
500 | { |
501 | int32 i; |
502 | for (i=0; i< length;i++) |
503 | FPRINTF(fp,"%.20g\n",vec[i]); |
504 | } |
505 | |
506 | static char savlinfilename[]="SlvLinsol.dat. \0"; |
507 | static char savlinfilebase[]="SlvLinsol.dat.\0"; |
508 | static int savlinnum=0; |
509 | /** The number to postfix to savlinfilebase. increases with file accesses. **/ |
510 | |
511 | /** |
512 | *** Array/vector operations |
513 | *** ---------------------------- |
514 | *** destroy_array(p) |
515 | *** create_array(len,type) |
516 | *** zero_array(arr,len,type) |
517 | *** |
518 | *** zero_vector(vec) |
519 | *** copy_vector(vec1,vec2) |
520 | *** prod = inner_product(vec1,vec2) |
521 | *** norm2 = square_norm(vec) |
522 | *** matrix_product(mtx,vec,prod,scale,transpose) |
523 | **/ |
524 | |
525 | #define destroy_array(p) \ |
526 | if( (p) != NULL ) ascfree((p)) |
527 | #define create_array(len,type) \ |
528 | ((len) > 0 ? (type *)ascmalloc((len)*sizeof(type)) : NULL) |
529 | #define create_zero_array(len,type) \ |
530 | ((len) > 0 ? (type *)asccalloc((len),sizeof(type)) : NULL) |
531 | #define zero_array(arr,nelts,type) \ |
532 | mem_zero_byte_cast((arr),0,(nelts)*sizeof(type)) |
533 | /* Zeros an array of nelts objects, each having given type. */ |
534 | |
535 | #define zero_vector(v) slv_zero_vector(v) |
536 | #define copy_vector(v,t) slv_copy_vector((v),(t)) |
537 | #define inner_product(v,u) slv_inner_product((v),(u)) |
538 | #define square_norm(v) slv_square_norm(v) |
539 | #define matrix_product(m,v,p,s,t) slv_matrix_product((m),(v),(p),(s),(t)) |
540 | |
541 | /** |
542 | *** Calculation routines |
543 | *** -------------------- |
544 | *** ok = calc_objective(sys) |
545 | *** ok = calc_boundaries(sys) |
546 | *** ok = calc_residuals(sys) |
547 | *** ok = calc_J(sys) |
548 | *** calc_nominals(sys) |
549 | *** calc_weights(sys) |
550 | *** scale_J(sys) |
551 | *** scale_variables(sys) |
552 | *** scale_residuals(sys) |
553 | *** ok = calc_gradient(sys) |
554 | *** calc_B(sys) |
555 | *** calc_ZBZ(sys) |
556 | *** calc_pivots(sys) |
557 | *** calc_rhs(sys) |
558 | *** calc_multipliers(sys) |
559 | *** calc_stationary(sys) |
560 | *** calc_newton(sys) |
561 | *** calc_Bnewton(sys) |
562 | *** calc_nullspace(sys) |
563 | *** calc_gamma(sys) |
564 | *** calc_Jgamma(sys) |
565 | *** calc_varstep1(sys) |
566 | *** calc_Bvarstep1(sys) |
567 | *** calc_varstep2(sys) |
568 | *** calc_Bvarstep2(sys) |
569 | *** calc_mulstep1(sys) |
570 | *** calc_mulstep2(sys) |
571 | *** calc_varstep(sys) |
572 | *** calc_mulstep(sys) |
573 | *** calc_phi(sys) |
574 | **/ |
575 | |
576 | |
577 | #define OPTIMIZING(sys) ((sys)->ZBZ.order > 0) |
578 | |
579 | static boolean calc_objective( slv7_system_t sys) |
580 | /** |
581 | *** Evaluate the objective function. |
582 | **/ |
583 | { |
584 | calc_ok = TRUE; |
585 | Asc_SignalHandlerPush(SIGFPE,SIG_IGN); |
586 | sys->objective = (sys->obj ? relman_eval(sys->obj,&calc_ok,SAFE_CALC) : 0.0); |
587 | Asc_SignalHandlerPop(SIGFPE,SIG_IGN); |
588 | return calc_ok; |
589 | } |
590 | |
591 | |
592 | static boolean calc_inequalities( slv7_system_t sys) |
593 | /** |
594 | *** Calculates all of the residuals of included inequalities. |
595 | *** Returns true iff (calculations preceded without error and |
596 | *** all inequalities are satisfied.) |
597 | **/ |
598 | { |
599 | struct rel_relation **rp; |
600 | boolean satisfied=TRUE; |
601 | static rel_filter_t rfilter; |
602 | rfilter.matchbits = (REL_INCLUDED | REL_EQUALITY | REL_ACTIVE); |
603 | rfilter.matchvalue = (REL_INCLUDED | REL_ACTIVE); |
604 | |
605 | calc_ok = TRUE; |
606 | Asc_SignalHandlerPush(SIGFPE,SIG_IGN); |
607 | for (rp=sys->rlist;*rp != NULL; rp++) { |
608 | if (rel_apply_filter(*rp,&rfilter)) { |
609 | relman_eval(*rp,&calc_ok,SAFE_CALC); |
610 | satisfied= satisfied && |
611 | #if TERMSCALE |
612 | relman_calc_satisfied_scaled(*rp,sys->p.tolerance.feasible); |
613 | #else |
614 | relman_calc_satisfied(*rp,sys->p.tolerance.feasible); |
615 | #endif |
616 | } |
617 | } |
618 | Asc_SignalHandlerPop(SIGFPE,SIG_IGN); |
619 | return (calc_ok && satisfied); |
620 | } |
621 | |
622 | static boolean calc_residuals( slv7_system_t sys) |
623 | /** |
624 | *** Calculates all of the residuals in the current block and computes |
625 | *** the residual norm for block status. Returns true iff calculations |
626 | *** preceded without error. |
627 | **/ |
628 | { |
629 | int32 row; |
630 | struct rel_relation *rel; |
631 | double time0; |
632 | |
633 | if( sys->residuals.accurate ) return TRUE; |
634 | |
635 | calc_ok = TRUE; |
636 | row = sys->residuals.rng->low; |
637 | time0=tm_cpu_time(); |
638 | Asc_SignalHandlerPush(SIGFPE,SIG_IGN); |
639 | for( ; row <= sys->residuals.rng->high; row++ ) { |
640 | rel = sys->rlist[mtx_row_to_org(sys->J.mtx,row)]; |
641 | #if DEBUG |
642 | if (!rel) { |
643 | int r; |
644 | r=mtx_row_to_org(sys->J.mtx,row); |
645 | FPRINTF(stderr,"NULL relation found !!\n"); |
646 | FPRINTF(stderr,"at row %d rel %d in calc_residuals\n",(int)row,r); |
647 | FFLUSH(stderr); |
648 | } |
649 | #endif |
650 | sys->residuals.vec[row] = relman_eval(rel,&calc_ok,SAFE_CALC); |
651 | #if TERMSCALE |
652 | relman_calc_satisfied_scaled(rel,sys->p.tolerance.feasible); |
653 | #else |
654 | relman_calc_satisfied(rel,sys->p.tolerance.feasible); |
655 | #endif |
656 | } |
657 | Asc_SignalHandlerPop(SIGFPE,SIG_IGN); |
658 | sys->s.block.functime += (tm_cpu_time() -time0); |
659 | sys->s.block.funcs++; |
660 | square_norm( &(sys->residuals) ); |
661 | sys->s.block.residual = calc_sqrt_D0(sys->residuals.norm2); |
662 | return(calc_ok); |
663 | } |
664 | |
665 | |
666 | static boolean calc_J( slv7_system_t sys) |
667 | /** |
668 | *** Calculates the current block of the jacobian. |
669 | *** It is initially unscaled. |
670 | **/ |
671 | { |
672 | int32 row; |
673 | var_filter_t vfilter; |
674 | double time0; |
675 | real64 resid; |
676 | |
677 | if( sys->J.accurate ) |
678 | return TRUE; |
679 | |
680 | calc_ok = TRUE; |
681 | vfilter.matchbits = (VAR_INBLOCK | VAR_ACTIVE); |
682 | vfilter.matchvalue = (VAR_INBLOCK | VAR_ACTIVE); |
683 | time0=tm_cpu_time(); |
684 | mtx_clear_region(sys->J.mtx,&(sys->J.reg)); |
685 | for( row = sys->J.reg.row.low; row <= sys->J.reg.row.high; row++ ) { |
686 | struct rel_relation *rel; |
687 | rel = sys->rlist[mtx_row_to_org(sys->J.mtx,row)]; |
688 | relman_diffs(rel,&vfilter,sys->J.mtx,&resid,SAFE_CALC); |
689 | } |
690 | sys->s.block.jactime += (tm_cpu_time() - time0); |
691 | sys->s.block.jacs++; |
692 | |
693 | if( --(sys->update.nominals) <= 0 ) sys->nominals.accurate = FALSE; |
694 | if( --(sys->update.weights) <= 0 ) sys->weights.accurate = FALSE; |
695 | |
696 | linsolqr_matrix_was_changed(sys->J.sys); |
697 | return(calc_ok); |
698 | } |
699 | |
700 | |
701 | static void calc_nominals( slv7_system_t sys) |
702 | /** |
703 | *** Retrieves the nominal values of all of the block variables, |
704 | *** insuring that they are all strictly positive. |
705 | **/ |
706 | { |
707 | int32 col; |
708 | FILE *fp = MIF(sys); |
709 | if( sys->nominals.accurate ) return; |
710 | fp = MIF(sys); |
711 | col = sys->nominals.rng->low; |
712 | for( ; col <= sys->nominals.rng->high; col++ ) { |
713 | struct var_variable *var; |
714 | real64 n; |
715 | var = sys->vlist[mtx_col_to_org(sys->J.mtx,col)]; |
716 | n = var_nominal(var); |
717 | if( n <= 0.0 ) { |
718 | if( n == 0.0 ) { |
719 | n = TOO_SMALL; |
720 | FPRINTF(fp,"ERROR: (slv7) calc_nominals\n"); |
721 | FPRINTF(fp," Variable "); |
722 | print_var_name(fp,sys,var); |
723 | FPRINTF(fp," \nhas nominal value of zero.\n"); |
724 | FPRINTF(fp," Resetting to %g.\n",n); |
725 | var_set_nominal(var,n); |
726 | } else { |
727 | n = -n; |
728 | FPRINTF(fp,"ERROR: (slv7) calc_nominals\n"); |
729 | FPRINTF(fp," Variable "); |
730 | print_var_name(fp,sys,var); |
731 | FPRINTF(fp," \nhas negative nominal value.\n"); |
732 | FPRINTF(fp," Resetting to %g.\n",n); |
733 | var_set_nominal(var,n); |
734 | } |
735 | } |
736 | #if DEBUG |
737 | FPRINTF(fp,"Column %d is",col); |
738 | print_var_name(fp,sys,var); |
739 | FPRINTF(fp,"\nScaling of column %d is %g\n",col,n); |
740 | #endif |
741 | sys->nominals.vec[col] = n; |
742 | } |
743 | square_norm( &(sys->nominals) ); |
744 | sys->update.nominals = UPDATE_NOMINALS; |
745 | sys->nominals.accurate = TRUE; |
746 | } |
747 | |
748 | #if TERMSCALE |
749 | /* rel_nominal scaling of rows supported as an alternative if |
750 | TERMSCALE true. scales by 2 norm if SP7_RELNOM==0 */ |
751 | static void calc_weights( slv7_system_t sys) |
752 | /** |
753 | *** Calculates the weights of all of the block relations |
754 | *** to scale the rows of the Jacobian. |
755 | *** Switch between interface and 2norm weight by SP7_RELNOM. |
756 | **/ |
757 | { |
758 | mtx_coord_t nz; |
759 | |
760 | if( sys->weights.accurate ) |
761 | return; |
762 | |
763 | nz.row = sys->weights.rng->low; |
764 | if (!(sys->iarray[SP7_RELNOM])) { |
765 | for( ; nz.row <= sys->weights.rng->high; (nz.row)++ ) { |
766 | real64 sum; |
767 | sum=mtx_sum_sqrs_in_row(sys->J.mtx,nz.row,&(sys->J.reg.col)); |
768 | sys->weights.vec[nz.row] = (sum>0.0) ? 1.0/calc_sqrt_D0(sum) : 1.0; |
769 | } |
770 | } else { |
771 | for( ; nz.row <= sys->weights.rng->high; (nz.row)++ ) { |
772 | sys->weights.vec[nz.row] = |
773 | 1.0/rel_nominal(sys->rlist[mtx_row_to_org(sys->J.mtx,nz.row)]); |
774 | } |
775 | } |
776 | square_norm( &(sys->weights) ); |
777 | sys->update.weights = UPDATE_WEIGHTS; |
778 | sys->residuals.accurate = FALSE; |
779 | sys->weights.accurate = TRUE; |
780 | } |
781 | #else |
782 | /* 2 norm scaling of rows */ |
783 | |
784 | static void calc_weights( slv7_system_t sys) |
785 | /** |
786 | *** Calculates the weights of all of the block relations |
787 | *** to scale the rows of the Jacobian. |
788 | **/ |
789 | { |
790 | mtx_coord_t nz; |
791 | |
792 | if( sys->weights.accurate ) |
793 | return; |
794 | |
795 | nz.row = sys->weights.rng->low; |
796 | for( ; nz.row <= sys->weights.rng->high; (nz.row)++ ) { |
797 | real64 sum; |
798 | sum=mtx_sum_sqrs_in_row(sys->J.mtx,nz.row,&(sys->J.reg.col)); |
799 | sys->weights.vec[nz.row] = (sum>0.0) ? 1.0/calc_sqrt_D0(sum) : 1.0; |
800 | } |
801 | square_norm( &(sys->weights) ); |
802 | sys->update.weights = UPDATE_WEIGHTS; |
803 | sys->residuals.accurate = FALSE; |
804 | sys->weights.accurate = TRUE; |
805 | } |
806 | /* end of termscale alternative calcweights */ |
807 | #endif |
808 | |
809 | static void scale_J( slv7_system_t sys) |
810 | /** |
811 | *** Scales the jacobian. |
812 | **/ |
813 | { |
814 | int32 row; |
815 | int32 col; |
816 | |
817 | if( sys->J.accurate ) return; |
818 | |
819 | calc_nominals(sys); |
820 | for( col=sys->J.reg.col.low; col <= sys->J.reg.col.high; col++ ) |
821 | mtx_mult_col(sys->J.mtx,col,sys->nominals.vec[col],&(sys->J.reg.row)); |
822 | |
823 | calc_weights(sys); |
824 | for( row=sys->J.reg.row.low; row <= sys->J.reg.row.high; row++ ) |
825 | mtx_mult_row(sys->J.mtx,row,sys->weights.vec[row],&(sys->J.reg.col)); |
826 | |
827 | if (DUMPCNORM) { |
828 | for( col=sys->J.reg.col.low; col <= sys->J.reg.col.high; col++ ) { |
829 | real64 cnorm; |
830 | cnorm = |
831 | calc_sqrt_D0(mtx_sum_sqrs_in_col(sys->J.mtx,col,&(sys->J.reg.row))); |
832 | if (cnorm >CNHIGH || cnorm <CNLOW) { |
833 | FPRINTF(stderr,"[col %d org %d] %g\n", col, |
834 | mtx_col_to_org(sys->J.mtx,col), cnorm); |
835 | } |
836 | } |
837 | } |
838 | |
839 | sys->update.jacobian = UPDATE_JACOBIAN; |
840 | sys->J.accurate = TRUE; |
841 | sys->J.singular = FALSE; /* yet to be determined */ |
842 | #if DEBUG |
843 | FPRINTF(LIF(sys),"\nJacobian: \n"); |
844 | debug_out_jacobian(LIF(sys),sys); |
845 | #endif |
846 | } |
847 | |
848 | |
849 | static void scale_variables( slv7_system_t sys) |
850 | { |
851 | int32 col; |
852 | |
853 | if( sys->variables.accurate ) return; |
854 | |
855 | col = sys->variables.rng->low; |
856 | for( ; col <= sys->variables.rng->high; col++ ) { |
857 | struct var_variable *var = sys->vlist[mtx_col_to_org(sys->J.mtx,col)]; |
858 | sys->variables.vec[col] = var_value(var)/sys->nominals.vec[col]; |
859 | } |
860 | square_norm( &(sys->variables) ); |
861 | sys->variables.accurate = TRUE; |
862 | #if DEBUG |
863 | FPRINTF(LIF(sys),"Variables: "); |
864 | debug_out_vector(LIF(sys),sys,&(sys->variables)); |
865 | #endif |
866 | } |
867 | |
868 | |
869 | static void scale_residuals( slv7_system_t sys) |
870 | /** |
871 | *** Scales the previously calculated residuals. |
872 | **/ |
873 | { |
874 | int32 row; |
875 | |
876 | if( sys->residuals.accurate ) return; |
877 | |
878 | row = sys->residuals.rng->low; |
879 | for( ; row <= sys->residuals.rng->high; row++ ) { |
880 | struct rel_relation *rel = sys->rlist[mtx_row_to_org(sys->J.mtx,row)]; |
881 | sys->residuals.vec[row] = rel_residual(rel)*sys->weights.vec[row]; |
882 | } |
883 | square_norm( &(sys->residuals) ); |
884 | sys->residuals.accurate = TRUE; |
885 | #if DEBUG |
886 | FPRINTF(LIF(sys),"Residuals: "); |
887 | debug_out_vector(LIF(sys),sys,&(sys->residuals)); |
888 | #endif |
889 | } |
890 | |
891 | static boolean calc_gradient(slv7_system_t sys) |
892 | /** |
893 | *** Calculate scaled gradient of the objective function. |
894 | **/ |
895 | { |
896 | /* |
897 | * |
898 | * This entire function needs to be reimplemented with relman_diffs. |
899 | * |
900 | */ |
901 | if( sys->gradient.accurate ) return TRUE; |
902 | |
903 | calc_ok = TRUE; |
904 | if ( !OPTIMIZING(sys) ) { |
905 | zero_vector(&(sys->gradient)); |
906 | sys->gradient.norm2 = 0.0; |
907 | } else { |
908 | real64 pd; |
909 | const struct var_variable **vp; |
910 | var_filter_t vfilter; |
911 | vfilter.matchbits = (VAR_INBLOCK | VAR_SVAR | VAR_ACTIVE); |
912 | vfilter.matchvalue = (VAR_INBLOCK | VAR_SVAR | VAR_ACTIVE); |
913 | zero_vector(&(sys->gradient)); |
914 | Asc_Panic(2, "calc_gradient", "abort"); |
915 | /* the next line will core dump anyway since vp not null-terminated*/ |
916 | for( vp = rel_incidence_list(sys->obj) ; *vp != NULL ; ++vp ) { |
917 | int32 col; |
918 | col = mtx_org_to_col(sys->J.mtx,var_sindex(*vp)); |
919 | if( var_apply_filter(*vp,&vfilter) ) { |
920 | relman_diff(sys->obj,*vp,&pd,SAFE_CALC); |
921 | sys->gradient.vec[col] = sys->nominals.vec[col]*pd; |
922 | } |
923 | } |
924 | square_norm( &(sys->gradient) ); |
925 | } |
926 | sys->gradient.accurate = TRUE; |
927 | #if DEBUG |
928 | FPRINTF(LIF(sys),"Gradient: "); |
929 | debug_out_vector(LIF(sys),sys,&(sys->gradient)); |
930 | #endif |
931 | return calc_ok; |
932 | } |
933 | |
934 | static void create_update( slv7_system_t sys) |
935 | /** |
936 | *** Create a new hessian_data structure for storing |
937 | *** latest update information. |
938 | **/ |
939 | { |
940 | struct hessian_data *update; |
941 | |
942 | if( !OPTIMIZING(sys) ) |
943 | return; |
944 | |
945 | update = (struct hessian_data *)ascmalloc(sizeof(struct hessian_data)); |
946 | update->y.vec = create_array(sys->cap,real64); |
947 | update->y.rng = &(sys->J.reg.col); |
948 | update->y.accurate = FALSE; |
949 | update->Bs.vec = create_array(sys->cap,real64); |
950 | update->Bs.rng = &(sys->J.reg.col); |
951 | update->Bs.accurate = FALSE; |
952 | update->next = sys->B; |
953 | sys->B = update; |
954 | } |
955 | |
956 | |
957 | static void calc_B( slv7_system_t sys) |
958 | /** |
959 | *** Computes a rank 2 BFGS update to the hessian matrix |
960 | *** B which accumulates curvature. |
961 | **/ |
962 | { |
963 | if( sys->s.block.iteration > 1 ) { |
964 | create_update(sys); |
965 | } else { |
966 | if( sys->B ) { |
967 | struct hessian_data *update; |
968 | for( update=sys->B; update != NULL; ) { |
969 | struct hessian_data *handle; |
970 | handle = update; |
971 | update = update->next; |
972 | destroy_array(handle->y.vec); |
973 | destroy_array(handle->Bs.vec); |
974 | ascfree(handle); |
975 | } |
976 | sys->B = NULL; |
977 | } |
978 | } |
979 | if( sys->B ) { |
980 | real64 theta; |
981 | /** |
982 | *** The y vector |
983 | **/ |
984 | if( !sys->B->y.accurate ) { |
985 | int32 col; |
986 | matrix_product(sys->J.mtx, &(sys->multipliers), |
987 | &(sys->B->y), 1.0, TRUE); |
988 | col = sys->B->y.rng->low; |
989 | for( ; col <= sys->B->y.rng->high; col++ ) { |
990 | sys->B->y.vec[col] += sys->gradient.vec[col] - |
991 | sys->stationary.vec[col]; |
992 | } |
993 | square_norm( &(sys->B->y) ); |
994 | sys->B->y.accurate = TRUE; |
995 | } |
996 | |
997 | /** |
998 | *** The Bs vector |
999 | **/ |
1000 | if( !sys->B->Bs.accurate ) { |
1001 | struct hessian_data *update; |
1002 | copy_vector(&(sys->varstep),&(sys->B->Bs)); |
1003 | for( update=sys->B->next; update != NULL; update = update->next ) { |
1004 | int32 col; |
1005 | real64 ys = inner_product( &(update->y),&(sys->varstep) ); |
1006 | real64 sBs = inner_product( &(update->Bs),&(sys->varstep) ); |
1007 | col = sys->B->Bs.rng->low; |
1008 | for( ; col<=sys->B->Bs.rng->high; col++) { |
1009 | sys->B->Bs.vec[col] += update->ys > 0.0 ? |
1010 | (update->y.vec[col])*ys/update->ys : 0.0; |
1011 | sys->B->Bs.vec[col] -= update->sBs > 0.0 ? |
1012 | (update->Bs.vec[col])*sBs/update->sBs : 0.0; |
1013 | } |
1014 | } |
1015 | square_norm( &(sys->B->Bs) ); |
1016 | sys->B->Bs.accurate = TRUE; |
1017 | } |
1018 | |
1019 | sys->B->ys = inner_product( &(sys->B->y),&(sys->varstep) ); |
1020 | sys->B->sBs = inner_product( &(sys->B->Bs),&(sys->varstep) ); |
1021 | |
1022 | if( sys->B->ys == 0.0 && sys->B->sBs == 0.0 ) { |
1023 | theta = 0.0; |
1024 | } else { |
1025 | theta = sys->B->ys < POSITIVE_DEFINITE*sys->B->sBs ? |
1026 | (1.0-POSITIVE_DEFINITE)*sys->B->sBs/(sys->B->sBs - sys->B->ys):1.0; |
1027 | } |
1028 | #if DEBUG |
1029 | FPRINTF(LIF(sys),"ys, sBs, PD, theta = %g, %g, %g, %g\n", |
1030 | sys->B->ys, |
1031 | sys->B->sBs, |
1032 | POSITIVE_DEFINITE, |
1033 | theta); |
1034 | #endif |
1035 | if( theta < 1.0 ) { |
1036 | int32 col; |
1037 | col = sys->B->y.rng->low; |
1038 | for( ; col <= sys->B->y.rng->high; col++ ) |
1039 | sys->B->y.vec[col] = theta*sys->B->y.vec[col] + |
1040 | (1.0-theta)*sys->B->Bs.vec[col]; |
1041 | square_norm( &(sys->B->y) ); |
1042 | sys->B->ys = theta*sys->B->ys + (1.0-theta)*sys->B->sBs; |
1043 | } |
1044 | } |
1045 | } |
1046 | |
1047 | |
1048 | static int calc_pivots(slv7_system_t sys) |
1049 | /** |
1050 | *** Obtain the equations and variables which |
1051 | *** are able to be pivoted. |
1052 | *** return value is the row rank deficiency, which we hope is 0. |
1053 | **/ |
1054 | { |
1055 | int row_rank_defect=0; |
1056 | linsolqr_system_t lsys = sys->J.sys; |
1057 | |
1058 | linsolqr_factor(lsys,sys->J.fm); /* factor */ |
1059 | |
1060 | if (OPTIMIZING(sys)) { |
1061 | /* need things for nullspace move. don't care about |
1062 | * dependency coefficiency in any circumstances at present. |
1063 | */ |
1064 | linsolqr_calc_col_dependencies(lsys); |
1065 | set_null(sys->J.relpivots,sys->cap); |
1066 | set_null(sys->J.varpivots,sys->cap); |
1067 | linsolqr_get_pivot_sets(lsys,sys->J.relpivots,sys->J.varpivots); |
1068 | } |
1069 | |
1070 | sys->J.rank = linsolqr_rank(lsys); |
1071 | sys->J.singular = FALSE; |
1072 | row_rank_defect = sys->J.reg.row.high - |
1073 | sys->J.reg.row.low+1 - sys->J.rank; |
1074 | if( row_rank_defect > 0 ) { |
1075 | sys->J.singular = TRUE; |
1076 | } |
1077 | #if KDEBUG |
1078 | sys->J.pivrows = linsolqr_pivoted_rows(lsys); |
1079 | sys->J.singrows = linsolqr_unpivoted_rows(lsys); |
1080 | #endif /* KDEBUG */ |
1081 | return row_rank_defect; |
1082 | } |
1083 | |
1084 | static void zero_un_p_weights(slv7_system_t sys) |
1085 | /* This function used to be part of calc_pivots but |
1086 | it seemed to be a severe side effect and has been |
1087 | moved */ |
1088 | { |
1089 | FILE *fp = LIF(sys); |
1090 | |
1091 | if( sys->J.rank_defect > 0 ) { |
1092 | linsolqr_system_t lsys = sys->J.sys; |
1093 | int32 row,krow; |
1094 | mtx_sparse_t *uprows=NULL; |
1095 | sys->J.singular = TRUE; |
1096 | uprows = linsolqr_unpivoted_rows(lsys); |
1097 | if (uprows !=NULL) { |
1098 | for( krow=0; krow < uprows->len ; krow++ ) { |
1099 | int32 org_row; |
1100 | struct rel_relation *rel; |
1101 | |
1102 | org_row = uprows->idata[krow]; |
1103 | row = mtx_org_to_row(sys->J.mtx,org_row); |
1104 | rel = sys->rlist[org_row]; |
1105 | FPRINTF(fp,"%-40s ---> ","Relation not pivoted"); |
1106 | print_rel_name(fp,sys,rel); |
1107 | PUTC('\n',fp); |
1108 | |
1109 | /** |
1110 | *** assign zeros to the corresponding weights |
1111 | *** so that subsequent calls to "scale_residuals" |
1112 | *** will only measure the pivoted equations. |
1113 | **/ |
1114 | sys->weights.vec[row] = 0.0; |
1115 | sys->residuals.vec[row] = 0.0; |
1116 | sys->residuals.accurate = FALSE; |
1117 | mtx_mult_row(sys->J.mtx,row,0.0,&(sys->J.reg.col)); |
1118 | } |
1119 | mtx_destroy_sparse(uprows); |
1120 | } |
1121 | if( !sys->residuals.accurate ) { |
1122 | square_norm( &(sys->residuals) ); |
1123 | sys->residuals.accurate = TRUE; |
1124 | sys->update.weights = 0; /* re-compute weights next iteration. */ |
1125 | } |
1126 | } |
1127 | if( sys->J.rank < sys->J.reg.col.high-sys->J.reg.col.low+1 ) { |
1128 | int32 col,kcol; |
1129 | mtx_sparse_t *upcols=NULL; |
1130 | if (NOTNULL(upcols)) { |
1131 | for( kcol=0; upcols != NULL && kcol < upcols->len ; kcol++ ) { |
1132 | int32 org_col; |
1133 | struct var_variable *var; |
1134 | |
1135 | org_col = upcols->idata[kcol]; |
1136 | col = mtx_org_to_col(sys->J.mtx,org_col); |
1137 | var = sys->vlist[org_col]; |
1138 | FPRINTF(fp,"%-40s ---> ","Variable not pivoted"); |
1139 | print_var_name(fp,sys,var); |
1140 | PUTC('\n',fp); |
1141 | /** |
1142 | *** If we're not optimizing (everything should be |
1143 | *** pivotable) or this was one of the dependent variables, |
1144 | *** consider this variable as if it were fixed. |
1145 | **/ |
1146 | if( col <= sys->J.reg.col.high - sys->ZBZ.order ) { |
1147 | mtx_mult_col(sys->J.mtx,col,0.0,&(sys->J.reg.row)); |
1148 | } |
1149 | } |
1150 | mtx_destroy_sparse(upcols); |
1151 | } |
1152 | } |
1153 | if (sys->p.output.less_important) { |
1154 | FPRINTF(LIF(sys),"%-40s ---> %d (%s)\n","Jacobian rank", sys->J.rank, |
1155 | sys->J.singular ? "deficient":"full"); |
1156 | FPRINTF(LIF(sys),"%-40s ---> %g\n","Smallest pivot", |
1157 | linsolqr_smallest_pivot(sys->J.sys)); |
1158 | } |
1159 | } |
1160 | |
1161 | static void calc_ZBZ(slv7_system_t sys) |
1162 | /** |
1163 | *** Updates the reduced hessian matrix. |
1164 | *** if !OPTIMIZING just sets zbz.accurate true and returns. |
1165 | **/ |
1166 | { |
1167 | mtx_coord_t nz; |
1168 | |
1169 | if( sys->ZBZ.accurate ) return; |
1170 | |
1171 | for( nz.row = 0; nz.row < sys->ZBZ.order; nz.row++ ) { |
1172 | for( nz.col = 0; nz.col <= nz.row; nz.col++ ) { |
1173 | int32 col, depr, depc; |
1174 | col = nz.row+sys->J.reg.col.high+1-sys->ZBZ.order; |
1175 | depr = mtx_col_to_org(sys->J.mtx,col); |
1176 | col = nz.col+sys->J.reg.col.high+1-sys->ZBZ.order; |
1177 | depc = mtx_col_to_org(sys->J.mtx,col); |
1178 | sys->ZBZ.mtx[nz.row][nz.col] = (nz.row==nz.col ? 1.0 : 0.0); |
1179 | col = sys->J.reg.col.low; |
1180 | for( ; col <= sys->J.reg.col.high - sys->ZBZ.order; col++ ) { |
1181 | int32 ind = mtx_col_to_org(sys->J.mtx,col); |
1182 | if( set_is_member(sys->J.varpivots,ind) ) |
1183 | sys->ZBZ.mtx[nz.row][nz.col] += |
1184 | (-linsolqr_org_col_dependency(sys->J.sys,depr,ind))* |
1185 | (-linsolqr_org_col_dependency(sys->J.sys,depc,ind)); |
1186 | } |
1187 | if( nz.row != nz.col ) |
1188 | sys->ZBZ.mtx[nz.col][nz.row] = |
1189 | sys->ZBZ.mtx[nz.row][nz.col]; |
1190 | } |
1191 | } |
1192 | if( OPTIMIZING(sys) ) { |
1193 | struct hessian_data *update; |
1194 | for( update=sys->B; update != NULL; update = update->next ) { |
1195 | mtx_coord_t nz; |
1196 | for( nz.row=0; nz.row < sys->ZBZ.order; nz.row++ ) { |
1197 | int32 col, dep; |
1198 | col = nz.row + sys->J.reg.col.high + 1 - sys->ZBZ.order; |
1199 | dep = mtx_col_to_org(sys->J.mtx,col); |
1200 | sys->ZBZ.Zy[nz.row] = update->y.vec[col]; |
1201 | sys->ZBZ.ZBs[nz.row] = update->Bs.vec[col]; |
1202 | col = sys->J.reg.col.low; |
1203 | for( ; col <= sys->J.reg.col.high - sys->ZBZ.order; col++ ) { |
1204 | int32 ind = mtx_col_to_org(sys->J.mtx,col); |
1205 | if( set_is_member(sys->J.varpivots,ind) ) { |
1206 | sys->ZBZ.Zy[nz.row] += update->y.vec[col]* |
1207 | (-linsolqr_org_col_dependency(sys->J.sys,dep,ind)); |
1208 | sys->ZBZ.ZBs[nz.row] += update->Bs.vec[col]* |
1209 | (-linsolqr_org_col_dependency(sys->J.sys,dep,ind)); |
1210 | } |
1211 | } |
1212 | for( nz.col=0; nz.col <= nz.row; nz.col++ ) { |
1213 | sys->ZBZ.mtx[nz.row][nz.col] += update->ys > 0.0 ? |
1214 | sys->ZBZ.Zy[nz.row]*sys->ZBZ.Zy[nz.col]/update->ys : 0.0; |
1215 | sys->ZBZ.mtx[nz.row][nz.col] -= update->sBs > 0.0 ? |
1216 | sys->ZBZ.ZBs[nz.row]*sys->ZBZ.ZBs[nz.col]/update->sBs : 0.0; |
1217 | if( nz.row != nz.col ) { |
1218 | sys->ZBZ.mtx[nz.col][nz.row] = |
1219 | sys->ZBZ.mtx[nz.row][nz.col]; |
1220 | } |
1221 | } |
1222 | } |
1223 | } |
1224 | } |
1225 | sys->ZBZ.accurate = TRUE; |
1226 | #if DEBUG |
1227 | FPRINTF(LIF(sys),"\nReduced Hessian: \n"); |
1228 | debug_out_hessian(LIF(sys),sys); |
1229 | #endif |
1230 | } |
1231 | |
1232 | |
1233 | static void calc_rhs(slv7_system_t sys, struct vector_data *vec, |
1234 | real64 scalar, boolean transpose) |
1235 | /** |
1236 | *** Calculates just the jacobian RHS. This function should be used to |
1237 | *** supplement calculation of the jacobian. The vector vec must |
1238 | *** already be calculated and scaled so as to simply be added to the |
1239 | *** rhs. Caller is responsible for initially zeroing the rhs vector. |
1240 | **/ |
1241 | { |
1242 | if( transpose ) { /* vec is indexed by col */ |
1243 | int32 col; |
1244 | for( col=vec->rng->low; col<=vec->rng->high; col++ ) |
1245 | sys->J.rhs[mtx_col_to_org(sys->J.mtx,col)] += |
1246 | scalar*vec->vec[col]; |
1247 | |
1248 | } else { /* vec is indexed by row */ |
1249 | int32 row; |
1250 | for( row=vec->rng->low; row<=vec->rng->high; row++ ) |
1251 | sys->J.rhs[mtx_row_to_org(sys->J.mtx,row)] += |
1252 | scalar*vec->vec[row]; |
1253 | } |
1254 | linsolqr_rhs_was_changed(sys->J.sys,sys->J.rhs); |
1255 | } |
1256 | |
1257 | |
1258 | static void calc_multipliers(slv7_system_t sys) |
1259 | /** |
1260 | *** Computes the lagrange multipliers for the equality constraints. |
1261 | **/ |
1262 | { |
1263 | if( sys->multipliers.accurate ) |
1264 | return; |
1265 | |
1266 | if ( !OPTIMIZING(sys) ) { |
1267 | zero_vector(&(sys->multipliers)); |
1268 | sys->multipliers.norm2 = 0.0; |
1269 | } else { |
1270 | linsolqr_system_t lsys = sys->J.sys; |
1271 | int32 row; |
1272 | sys->J.rhs = linsolqr_get_rhs(lsys,0); |
1273 | mem_zero_byte_cast(sys->J.rhs,0.0,sys->cap*sizeof(real64)); |
1274 | calc_rhs(sys, &(sys->gradient), -1.0, TRUE ); |
1275 | linsolqr_solve(lsys,sys->J.rhs); |
1276 | row = sys->multipliers.rng->low; |
1277 | for( ; row <= sys->multipliers.rng->high; row++ ) { |
1278 | struct rel_relation *rel = sys->rlist[mtx_row_to_org(sys->J.mtx,row)]; |
1279 | sys->multipliers.vec[row] = linsolqr_var_value |
1280 | (lsys,sys->J.rhs,mtx_row_to_org(sys->J.mtx,row)); |
1281 | rel_set_multiplier(rel,sys->multipliers.vec[row]* |
1282 | sys->weights.vec[row]); |
1283 | |
1284 | } |
1285 | if (sys->iarray[SP7_SAVLIN]) { |
1286 | FILE *ldat; |
1287 | int32 ov; |
1288 | sprintf(savlinfilename,"%s%d",savlinfilebase,savlinnum++); |
1289 | ldat=fopen(savlinfilename,"w"); |
1290 | FPRINTF(ldat, |
1291 | "================= multipliersrhs (orgcoled) itn %d =====\n", |
1292 | sys->s.iteration); |
1293 | debug_write_array(ldat,sys->J.rhs,sys->cap); |
1294 | FPRINTF(ldat, |
1295 | "================= multipliers (orgrowed) ============\n"); |
1296 | for(ov=0 ; ov < sys->cap; ov++ ) |
1297 | FPRINTF(ldat,"%.20g\n",linsolqr_var_value(lsys,sys->J.rhs,ov)); |
1298 | fclose(ldat); |
1299 | } |
1300 | square_norm( &(sys->multipliers) ); |
1301 | } |
1302 | sys->multipliers.accurate = TRUE; |
1303 | #if DEBUG |
1304 | FPRINTF(LIF(sys),"Multipliers: "); |
1305 | debug_out_vector(LIF(sys),sys,&(sys->multipliers)); |
1306 | #endif |
1307 | } |
1308 | |
1309 | |
1310 | static void calc_stationary( slv7_system_t sys) |
1311 | /** |
1312 | *** Computes the gradient of the lagrangian which |
1313 | *** should be zero at the optimum solution. |
1314 | **/ |
1315 | { |
1316 | if( sys->stationary.accurate ) |
1317 | return; |
1318 | |
1319 | if ( !OPTIMIZING(sys) ) { |
1320 | zero_vector(&(sys->stationary)); |
1321 | sys->stationary.norm2 = 0.0; |
1322 | } else { |
1323 | int32 col; |
1324 | matrix_product(sys->J.mtx, &(sys->multipliers), |
1325 | &(sys->stationary), 1.0, TRUE); |
1326 | col = sys->stationary.rng->low; |
1327 | for( ; col <= sys->stationary.rng->high; col++ ) |
1328 | sys->stationary.vec[col] += sys->gradient.vec[col]; |
1329 | square_norm( &(sys->stationary) ); |
1330 | } |
1331 | sys->stationary.accurate = TRUE; |
1332 | #if DEBUG |
1333 | FPRINTF(LIF(sys),"Stationary: "); |
1334 | debug_out_vector(LIF(sys),sys,&(sys->stationary)); |
1335 | #endif |
1336 | } |
1337 | |
1338 | |
1339 | static void calc_gamma( slv7_system_t sys) |
1340 | /** |
1341 | *** Calculate the gamma vector. |
1342 | **/ |
1343 | { |
1344 | if( sys->gamma.accurate ) |
1345 | return; |
1346 | |
1347 | matrix_product(sys->J.mtx, &(sys->residuals), |
1348 | &(sys->gamma), -1.0, TRUE); |
1349 | square_norm( &(sys->gamma) ); |
1350 | sys->gamma.accurate = TRUE; |
1351 | #if DEBUG |
1352 | FPRINTF(LIF(sys),"Gamma: "); |
1353 | debug_out_vector(LIF(sys),sys,&(sys->gamma)); |
1354 | #endif |
1355 | } |
1356 | |
1357 | static void calc_Jgamma( slv7_system_t sys) |
1358 | /** |
1359 | *** Calculate the Jgamma vector. |
1360 | **/ |
1361 | { |
1362 | if( sys->Jgamma.accurate ) |
1363 | return; |
1364 | |
1365 | matrix_product(sys->J.mtx, &(sys->gamma), |
1366 | &(sys->Jgamma), 1.0, FALSE); |
1367 | square_norm( &(sys->Jgamma) ); |
1368 | sys->Jgamma.accurate = TRUE; |
1369 | #if DEBUG |
1370 | FPRINTF(LIF(sys),"Jgamma: "); |
1371 | debug_out_vector(LIF(sys),sys,&(sys->Jgamma)); |
1372 | #endif |
1373 | } |
1374 | |
1375 | |
1376 | static void calc_newton( slv7_system_t sys) |
1377 | /** |
1378 | *** Computes a step to solve the linearized equations. |
1379 | **/ |
1380 | { |
1381 | linsolqr_system_t lsys = sys->J.sys; |
1382 | int32 col; |
1383 | |
1384 | if( sys->newton.accurate ) |
1385 | return; |
1386 | |
1387 | sys->J.rhs = linsolqr_get_rhs(lsys,1); |
1388 | mem_zero_byte_cast(sys->J.rhs,0.0,sys->cap*sizeof(real64)); |
1389 | calc_rhs(sys, &(sys->residuals), -1.0, FALSE); |
1390 | linsolqr_solve(lsys,sys->J.rhs); |
1391 | col = sys->newton.rng->low; |
1392 | for( ; col <= sys->newton.rng->high; col++ ) |
1393 | sys->newton.vec[col] = linsolqr_var_value |
1394 | (lsys,sys->J.rhs,mtx_col_to_org(sys->J.mtx,col)); |
1395 | if (sys->iarray[SP7_SAVLIN]) { |
1396 | FILE *ldat; |
1397 | int32 ov; |
1398 | sprintf(savlinfilename,"%s%d",savlinfilebase,savlinnum++); |
1399 | ldat=fopen(savlinfilename,"w"); |
1400 | FPRINTF(ldat,"================= resids (orgrowed) itn %d =====\n", |
1401 | sys->s.iteration); |
1402 | debug_write_array(ldat,sys->J.rhs,sys->cap); |
1403 | FPRINTF(ldat,"================= vars (orgcoled) ============\n"); |
1404 | for(ov=0 ; ov < sys->cap; ov++ ) |
1405 | FPRINTF(ldat,"%.20g\n",linsolqr_var_value(lsys,sys->J.rhs,ov)); |
1406 | fclose(ldat); |
1407 | } |
1408 | square_norm( &(sys->newton) ); |
1409 | sys->newton.accurate = TRUE; |
1410 | #if DEBUG |
1411 | FPRINTF(LIF(sys),"Newton: "); |
1412 | debug_out_vector(LIF(sys),sys,&(sys->newton)); |
1413 | #endif |
1414 | } |
1415 | |
1416 | static void ngslv_calc_newton( slv7_system_t sys,mtx_matrix_t *factors) |
1417 | /** |
1418 | *** Computes a step to solve the linearized equations. |
1419 | *** Stores in sys->grad_newton vector in cur col order |
1420 | *** wrt factors. |
1421 | **/ |
1422 | { |
1423 | linsolqr_system_t lsys = sys->J.sys; |
1424 | int32 col; |
1425 | |
1426 | if( sys->newton.accurate ) |
1427 | return; |
1428 | |
1429 | sys->J.rhs = linsolqr_get_rhs(lsys,1); |
1430 | mem_zero_byte_cast(sys->J.rhs,0.0,sys->cap*sizeof(real64)); |
1431 | calc_rhs(sys, &(sys->residuals), -1.0, FALSE); |
1432 | linsolqr_solve(lsys,sys->J.rhs); |
1433 | col = sys->newton.rng->low; |
1434 | for( ; col <= sys->newton.rng->high; col++ ) |
1435 | sys->grad_newton.vec[col] = linsolqr_var_value |
1436 | (lsys,sys->J.rhs,mtx_col_to_org(*factors,col)); |
1437 | if (sys->iarray[SP7_SAVLIN]) { |
1438 | FILE *ldat; |
1439 | int32 ov; |
1440 | sprintf(savlinfilename,"%s%d",savlinfilebase,savlinnum++); |
1441 | ldat=fopen(savlinfilename,"w"); |
1442 | FPRINTF(ldat,"================= resids (orgrowed) itn %d =====\n", |
1443 | sys->s.iteration); |
1444 | debug_write_array(ldat,sys->J.rhs,sys->cap); |
1445 | FPRINTF(ldat,"================= vars (orgcoled) ============\n"); |
1446 | for(ov=0 ; ov < sys->cap; ov++ ) |
1447 | FPRINTF(ldat,"%.20g\n",linsolqr_var_value(lsys,sys->J.rhs,ov)); |
1448 | fclose(ldat); |
1449 | } |
1450 | square_norm( &(sys->newton) ); |
1451 | sys->newton.accurate = TRUE; |
1452 | #if DEBUG |
1453 | FPRINTF(LIF(sys),"Newton: "); |
1454 | debug_out_vector(LIF(sys),sys,&(sys->newton)); |
1455 | #endif |
1456 | } |
1457 | |
1458 | static void calc_Bnewton( slv7_system_t sys) |
1459 | /** |
1460 | *** Computes an update to the product B and newton. |
1461 | **/ |
1462 | { |
1463 | if( sys->Bnewton.accurate ) |
1464 | return; |
1465 | |
1466 | if ( !OPTIMIZING(sys) ) { |
1467 | zero_vector(&(sys->Bnewton)); |
1468 | sys->Bnewton.norm2 = 0.0; |
1469 | } else { |
1470 | struct hessian_data *update; |
1471 | copy_vector(&(sys->newton),&(sys->Bnewton)); |
1472 | for( update=sys->B; update != NULL; update = update->next ) { |
1473 | int32 col; |
1474 | real64 yn = inner_product( &(update->y),&(sys->newton) ); |
1475 | real64 sBn = inner_product( &(update->Bs),&(sys->newton) ); |
1476 | col = sys->Bnewton.rng->low; |
1477 | for( ; col <= sys->Bnewton.rng->high; col++ ) { |
1478 | sys->Bnewton.vec[col] += update->ys > 0.0 ? |
1479 | (update->y.vec[col])*yn/update->ys : 0.0; |
1480 | sys->Bnewton.vec[col] -= update->sBs > 0.0 ? |
1481 | (update->Bs.vec[col])*sBn/update->sBs : 0.0; |
1482 | } |
1483 | } |
1484 | square_norm( &(sys->Bnewton) ); |
1485 | } |
1486 | sys->Bnewton.accurate = TRUE; |
1487 | #if DEBUG |
1488 | FPRINTF(LIF(sys),"Bnewton: "); |
1489 | debug_out_vector(LIF(sys),sys,&(sys->Bnewton)); |
1490 | #endif |
1491 | } |
1492 | |
1493 | |
1494 | static void calc_nullspace( slv7_system_t sys) |
1495 | /** |
1496 | *** Calculate the nullspace move if OPTIMIZING. |
1497 | **/ |
1498 | { |
1499 | if( sys->nullspace.accurate ) |
1500 | return; |
1501 | |
1502 | if( !OPTIMIZING(sys) ) { |
1503 | zero_vector(&(sys->nullspace)); |
1504 | sys->nullspace.norm2 = 0.0; |
1505 | } else { |
1506 | mtx_coord_t nz; |
1507 | zero_vector(&(sys->nullspace)); |
1508 | for( nz.row=0; nz.row < sys->ZBZ.order; nz.row++ ) { |
1509 | int32 col, dep, ndx; |
1510 | col = nz.row+sys->J.reg.col.high+1-sys->ZBZ.order; |
1511 | dep = mtx_col_to_org(sys->J.mtx,col); |
1512 | sys->nullspace.vec[col] = -sys->stationary.vec[col] - |
1513 | sys->Bnewton.vec[col]; |
1514 | ndx = sys->J.reg.col.low; |
1515 | for( ; ndx <= sys->J.reg.col.high - sys->ZBZ.order; ndx++ ) { |
1516 | int32 ind = mtx_col_to_org(sys->J.mtx,ndx); |
1517 | if( set_is_member(sys->J.varpivots,ind) ) |
1518 | sys->nullspace.vec[col] -= |
1519 | (sys->stationary.vec[ndx] + sys->Bnewton.vec[ndx])* |
1520 | (-linsolqr_org_col_dependency(sys->J.sys,dep,ind)); |
1521 | } |
1522 | } |
1523 | /** |
1524 | *** Must invert ZBZ first. It's symmetric so |
1525 | *** can find Cholesky factors. Essentially, find |
1526 | *** the "square root" of the matrix such that |
1527 | *** |
1528 | *** T |
1529 | *** L U = U U = ZBZ, where U is an upper triangular |
1530 | *** matrix. |
1531 | **/ |
1532 | for( nz.row = 0; nz.row < sys->ZBZ.order; nz.row++ ) { |
1533 | for( nz.col = nz.row; nz.col < sys->ZBZ.order; nz.col++ ) { |
1534 | int32 col; |
1535 | for( col = 0; col < nz.row; col++ ) |
1536 | sys->ZBZ.mtx[nz.row][nz.col] -= |
1537 | sys->ZBZ.mtx[nz.row][col]* |
1538 | sys->ZBZ.mtx[col][nz.col]; |
1539 | if( nz.row == nz.col ) |
1540 | sys->ZBZ.mtx[nz.row][nz.col] = |
1541 | calc_sqrt_D0(sys->ZBZ.mtx[nz.row][nz.col]); |
1542 | else { |
1543 | sys->ZBZ.mtx[nz.row][nz.col] /= |
1544 | sys->ZBZ.mtx[nz.row][nz.row]; |
1545 | sys->ZBZ.mtx[nz.col][nz.row] = |
1546 | sys->ZBZ.mtx[nz.row][nz.col]; |
1547 | } |
1548 | } |
1549 | } |
1550 | #if DEBUG |
1551 | FPRINTF(LIF(sys),"\nInverse Reduced Hessian: \n"); |
1552 | debug_out_hessian(LIF(sys),sys); |
1553 | #endif |
1554 | /** |
1555 | *** forward substitute |
1556 | **/ |
1557 | for( nz.row = 0; nz.row < sys->ZBZ.order; nz.row++ ) { |
1558 | int32 offset = sys->J.reg.col.high+1-sys->ZBZ.order; |
1559 | for( nz.col = 0; nz.col < nz.row; nz.col++ ) { |
1560 | sys->nullspace.vec[nz.row+offset] -= |
1561 | sys->nullspace.vec[nz.col+offset]* |
1562 | sys->ZBZ.mtx[nz.row][nz.col]; |
1563 | } |
1564 | sys->nullspace.vec[nz.row+offset] /= |
1565 | sys->ZBZ.mtx[nz.row][nz.row]; |
1566 | } |
1567 | |
1568 | /** |
1569 | *** backward substitute |
1570 | **/ |
1571 | for( nz.row = sys->ZBZ.order-1; nz.row >= 0; nz.row-- ) { |
1572 | int32 offset = sys->J.reg.col.high+1-sys->ZBZ.order; |
1573 | for( nz.col = nz.row+1; nz.col < sys->ZBZ.order; nz.col++ ) { |
1574 | sys->nullspace.vec[nz.row+offset] -= |
1575 | sys->nullspace.vec[nz.col+offset]* |
1576 | sys->ZBZ.mtx[nz.row][nz.col]; |
1577 | } |
1578 | sys->nullspace.vec[nz.row+offset] /= |
1579 | sys->ZBZ.mtx[nz.row][nz.row]; |
1580 | } |
1581 | square_norm( &(sys->nullspace) ); |
1582 | } |
1583 | sys->nullspace.accurate = TRUE; |
1584 | #if DEBUG |
1585 | FPRINTF(LIF(sys),"Nullspace: "); |
1586 | debug_out_vector(LIF(sys),sys,&(sys->nullspace)); |
1587 | #endif |
1588 | } |
1589 | |
1590 | static void calc_varstep1( slv7_system_t sys) |
1591 | /** |
1592 | *** Calculate the 1st order descent direction for phi |
1593 | *** in the variables. |
1594 | **/ |
1595 | { |
1596 | if( sys->varstep1.accurate ) |
1597 | return; |
1598 | |
1599 | if( !OPTIMIZING(sys) ) { |
1600 | copy_vector(&(sys->gamma),&(sys->varstep1)); |
1601 | sys->varstep1.norm2 = sys->gamma.norm2; |
1602 | } else { |
1603 | int32 col; |
1604 | col = sys->varstep1.rng->low; |
1605 | for( ; col <= sys->varstep1.rng->high; col++ ) |
1606 | sys->varstep1.vec[col] = sys->p.rho*sys->gamma.vec[col] - |
1607 | sys->stationary.vec[col]; |
1608 | square_norm( &(sys->varstep1) ); |
1609 | } |
1610 | sys->varstep1.accurate = TRUE; |
1611 | #if DEBUG |
1612 | FPRINTF(LIF(sys),"Varstep1: "); |
1613 | debug_out_vector(LIF(sys),sys,&(sys->varstep1)); |
1614 | #endif |
1615 | } |
1616 | |
1617 | |
1618 | static void calc_Bvarstep1( slv7_system_t sys) |
1619 | /** |
1620 | *** Computes an update to the product B and varstep1. |
1621 | **/ |
1622 | { |
1623 | if( sys->Bvarstep1.accurate ) |
1624 | return; |
1625 | |
1626 | if ( !OPTIMIZING(sys) ) { |
1627 | zero_vector(&(sys->Bvarstep1)); |
1628 | sys->Bvarstep1.norm2 = 0.0; |
1629 | } else { |
1630 | struct hessian_data *update; |
1631 | copy_vector(&(sys->varstep1),&(sys->Bvarstep1)); |
1632 | for( update=sys->B; update != NULL; update = update->next ) { |
1633 | int32 col; |
1634 | real64 yv = inner_product( &(update->y),&(sys->varstep1) ); |
1635 | real64 sBv = inner_product( &(update->Bs),&(sys->varstep1) ); |
1636 | col = sys->Bvarstep1.rng->low; |
1637 | for( ; col <= sys->Bvarstep1.rng->high; col++ ) { |
1638 | sys->Bvarstep1.vec[col] += update->ys > 0.0 ? |
1639 | (update->y.vec[col])*yv/update->ys : 0.0; |
1640 | sys->Bvarstep1.vec[col] -= update->sBs > 0.0 ? |
1641 | (update->Bs.vec[col])*sBv/update->sBs : 0.0; |
1642 | } |
1643 | } |
1644 | square_norm( &(sys->Bvarstep1) ); |
1645 | } |
1646 | sys->Bvarstep1.accurate = TRUE; |
1647 | #if DEBUG |
1648 | FPRINTF(LIF(sys),"Bvarstep1: "); |
1649 | debug_out_vector(LIF(sys),sys,&(sys->Bvarstep1)); |
1650 | #endif |
1651 | } |
1652 | |
1653 | |
1654 | static void calc_varstep2( slv7_system_t sys) |
1655 | /** |
1656 | *** Calculate the 2nd order descent direction for phi |
1657 | *** in the variables. |
1658 | **/ |
1659 | { |
1660 | if( sys->varstep2.accurate ) |
1661 | return; |
1662 | |
1663 | if( !OPTIMIZING(sys) ) { |
1664 | copy_vector(&(sys->newton),&(sys->varstep2)); |
1665 | sys->varstep2.norm2 = sys->newton.norm2; |
1666 | } else { |
1667 | int32 col; |
1668 | col = sys->varstep2.rng->low; |
1669 | for( ; col <= sys->varstep2.rng->high - sys->ZBZ.order ; ++col ) { |
1670 | int32 dep; |
1671 | int32 ind = mtx_col_to_org(sys->J.mtx,col); |
1672 | sys->varstep2.vec[col] = sys->newton.vec[col]; |
1673 | if( set_is_member(sys->J.varpivots,ind) ) { |
1674 | dep = sys->varstep2.rng->high + 1 - sys->ZBZ.order; |
1675 | for( ; dep <= sys->varstep2.rng->high; dep++ ) |
1676 | sys->varstep2.vec[col] += sys->nullspace.vec[dep]* |
1677 | (-linsolqr_org_col_dependency(sys->J.sys,dep,ind)); |
1678 | } |
1679 | } |
1680 | col = sys->varstep2.rng->high + 1 - sys->ZBZ.order; |
1681 | for( ; col <= sys->varstep2.rng->high; ++col ) |
1682 | sys->varstep2.vec[col] = sys->nullspace.vec[col] + |
1683 | sys->newton.vec[col]; |
1684 | square_norm( &(sys->varstep2) ); |
1685 | } |
1686 | sys->varstep2.accurate = TRUE; |
1687 | #if DEBUG |
1688 | FPRINTF(LIF(sys),"Varstep2: "); |
1689 | debug_out_vector(LIF(sys),sys,&(sys->varstep2)); |
1690 | #endif |
1691 | } |
1692 | |
1693 | |
1694 | static void calc_Bvarstep2( slv7_system_t sys) |
1695 | /** |
1696 | *** Computes an update to the product B and varstep2. |
1697 | **/ |
1698 | { |
1699 | if( sys->Bvarstep2.accurate ) |
1700 | return; |
1701 | |
1702 | if ( !OPTIMIZING(sys) ) { |
1703 | zero_vector(&(sys->Bvarstep2)); |
1704 | sys->Bvarstep2.norm2 = 0.0; |
1705 | } else { |
1706 | struct hessian_data *update; |
1707 | copy_vector(&(sys->varstep2),&(sys->Bvarstep2)); |
1708 | for( update=sys->B; update != NULL; update = update->next ) { |
1709 | int32 col; |
1710 | real64 yv = inner_product( &(update->y),&(sys->varstep2) ); |
1711 | real64 sBv = inner_product( &(update->Bs),&(sys->varstep2) ); |
1712 | col = sys->Bvarstep2.rng->low; |
1713 | for( ; col <= sys->Bvarstep2.rng->high; col++ ) { |
1714 | sys->Bvarstep2.vec[col] += update->ys > 0.0 ? |
1715 | (update->y.vec[col])*yv/update->ys : 0.0; |
1716 | sys->Bvarstep2.vec[col] -= update->sBs > 0.0 ? |
1717 | (update->Bs.vec[col])*sBv/update->sBs : 0.0; |
1718 | } |
1719 | } |
1720 | square_norm( &(sys->Bvarstep2) ); |
1721 | } |
1722 | sys->Bvarstep2.accurate = TRUE; |
1723 | #if DEBUG |
1724 | FPRINTF(LIF(sys),"Bvarstep2: "); |
1725 | debug_out_vector(LIF(sys),sys,&(sys->Bvarstep2)); |
1726 | #endif |
1727 | } |
1728 | |
1729 | |
1730 | static void calc_mulstep1( slv7_system_t sys) |
1731 | /** |
1732 | *** Calculate the negative gradient direction of phi in the |
1733 | *** multipliers. |
1734 | **/ |
1735 | { |
1736 | if( sys->mulstep1.accurate ) |
1737 | return; |
1738 | |
1739 | if( !OPTIMIZING(sys) ) { |
1740 | zero_vector(&(sys->mulstep1)); |
1741 | sys->mulstep1.norm2 = 0.0; |
1742 | } else { |
1743 | int32 row; |
1744 | row = sys->mulstep1.rng->low; |
1745 | for( ; row <= sys->mulstep1.rng->high; row++ ) |
1746 | sys->mulstep1.vec[row] = -sys->residuals.vec[row]; |
1747 | square_norm( &(sys->mulstep1) ); |
1748 | } |
1749 | sys->mulstep1.accurate = TRUE; |
1750 | #if DEBUG |
1751 | FPRINTF(LIF(sys),"Mulstep1: "); |
1752 | debug_out_vector(LIF(sys),sys,&(sys->mulstep1)); |
1753 | #endif |
1754 | } |
1755 | |
1756 | |
1757 | static void calc_mulstep2( slv7_system_t sys) |
1758 | /** |
1759 | *** Calculate the mulstep2 direction of phi in the |
1760 | *** multipliers. |
1761 | **/ |
1762 | { |
1763 | if( sys->mulstep2.accurate ) |
1764 | return; |
1765 | |
1766 | if( !OPTIMIZING(sys) ) { |
1767 | zero_vector(&(sys->mulstep2)); |
1768 | sys->mulstep2.norm2 = 0.0; |
1769 | } else { |
1770 | linsolqr_system_t lsys = sys->J.sys; |
1771 | int32 row; |
1772 | sys->J.rhs = linsolqr_get_rhs(lsys,2); |
1773 | mem_zero_byte_cast(sys->J.rhs,0.0,sys->cap*sizeof(real64)); |
1774 | calc_rhs(sys, &(sys->Bvarstep2), -1.0, TRUE); |
1775 | calc_rhs(sys, &(sys->stationary), -1.0, TRUE); |
1776 | linsolqr_solve(lsys,sys->J.rhs); |
1777 | row = sys->mulstep2.rng->low; |
1778 | for( ; row <= sys->mulstep2.rng->high; row++ ) |
1779 | sys->mulstep2.vec[row] = linsolqr_var_value |
1780 | (lsys,sys->J.rhs,mtx_row_to_org(sys->J.mtx,row)); |
1781 | if (sys->iarray[SP7_SAVLIN]) { |
1782 | FILE *ldat; |
1783 | int32 ov; |
1784 | sprintf(savlinfilename,"%s%d",savlinfilebase,savlinnum++); |
1785 | ldat=fopen(savlinfilename,"w"); |
1786 | FPRINTF(ldat, |
1787 | "================= mulstep2rhs (orgcoled) itn %d =======\n", |
1788 | sys->s.iteration); |
1789 | debug_write_array(ldat,sys->J.rhs,sys->cap); |
1790 | FPRINTF(ldat, |
1791 | "================= mulstep2vars (orgrowed) ============\n"); |
1792 | for(ov=0 ; ov < sys->cap; ov++ ) |
1793 | FPRINTF(ldat,"%.20g\n",linsolqr_var_value(lsys,sys->J.rhs,ov)); |
1794 | fclose(ldat); |
1795 | } |
1796 | square_norm( &(sys->mulstep2) ); |
1797 | } |
1798 | sys->mulstep2.accurate = TRUE; |
1799 | #if DEBUG |
1800 | FPRINTF(LIF(sys),"Mulstep2: "); |
1801 | debug_out_vector(LIF(sys),sys,&(sys->mulstep2)); |
1802 | #endif |
1803 | } |
1804 | |
1805 | |
1806 | static void calc_phi( slv7_system_t sys) |
1807 | /** |
1808 | *** Computes the global minimizing function Phi. |
1809 | **/ |
1810 | { |
1811 | if( !OPTIMIZING(sys) ) |
1812 | sys->phi = 0.5*sys->residuals.norm2; |
1813 | else { |
1814 | sys->phi = sys->objective; |
1815 | sys->phi += inner_product( &(sys->multipliers),&(sys->residuals) ); |
1816 | sys->phi += 0.5*sys->p.rho*sys->residuals.norm2; |
1817 | } |
1818 | } |
1819 | |
1820 | #if NGSLV |
1821 | /** |
1822 | *** Current implementation problems: |
1823 | *** 1) The regions (un_p_row, A12, etc) which are currently part of |
1824 | *** sys->J are a bit deceptive in that they really should be associated |
1825 | *** with factors. |
1826 | **/ |
1827 | |
1828 | /** |
1829 | *** NGSlv Linesearch Functions |
1830 | **/ |
1831 | |
1832 | static void set_up_line_search(slv7_system_t sys,mtx_matrix_t *factors, |
1833 | linsolqr_system_t *lsys) |
1834 | { |
1835 | mtx_coord_t loc; |
1836 | int32 ind; |
1837 | zero_vector(&(sys->tmp_ls)); |
1838 | for (loc.col = sys->J.un_p_col_reg.col.low; |
1839 | loc.col <= sys->J.un_p_col_reg.col.high; loc.col++ ) { |
1840 | sys->tmp_ls.vec[mtx_col_to_org(*factors,loc.col)] = |
1841 | sys->grad_newton.vec[loc.col] * |
1842 | sys->line_search.full_grad_mult; |
1843 | } |
1844 | for (loc.col = sys->J.reg.col.low; |
1845 | loc.col < sys->J.un_p_col_reg.col.low; loc.col++ ) { |
1846 | ind = linsolqr_org_col_to_org_row(*lsys,mtx_col_to_org(*factors,loc.col)); |
1847 | sys->tmp_ls.vec[mtx_col_to_org(*factors,loc.col)] = |
1848 | - sys->line_search.full_grad_mult * |
1849 | sys->line_search.newton_mult * |
1850 | sys->tmp.vec[ind]; |
1851 | } |
1852 | } |
1853 | /* |
1854 | static void set_up_line_search(slv7_system_t sys,mtx_matrix_t *factors, |
1855 | linsolqr_system_t *lsys) |
1856 | { |
1857 | mtx_coord_t loc; |
1858 | int32 ind; |
1859 | zero_vector(&(sys->tmp_ls)); |
1860 | for (loc.col = sys->J.un_p_col_reg.col.low; |
1861 | loc.col <= sys->J.un_p_col_reg.col.high; loc.col++ ) { |
1862 | sys->tmp_ls.vec[loc.col] = sys->grad_newton.vec[loc.col] * |
1863 | sys->line_search.full_grad_mult; |
1864 | } |
1865 | for (loc.col = sys->J.reg.col.low; |
1866 | loc.col < sys->J.un_p_col_reg.col.low; loc.col++ ) { |
1867 | ind = linsolqr_org_col_to_org_row(*lsys,mtx_col_to_org(*factors,loc.col)); |
1868 | sys->tmp_ls.vec[loc.col] = - sys->line_search.full_grad_mult * |
1869 | sys->tmp.vec[ind]; |
1870 | } |
1871 | }*/ |
1872 | |
1873 | static void ls_calc_gradient(slv7_system_t sys, |
1874 | mtx_matrix_t *factors, |
1875 | mtx_matrix_t *outer_factors, |
1876 | linsolqr_system_t *lsys) |
1877 | |
1878 | /** |
1879 | *** Calculates gradient of the linesearch objective function (ls_obj). |
1880 | *** ls_obj = sqrt(sum(h^2)) where h = gradient_eqn_residual and the sum |
1881 | *** is over the gradient equations |
1882 | *** d(ls_obj)/d(alpha) = sum(d(h)/d(alpha)) |
1883 | *** d(h)/d(alpha) = J(d(x)/d(alpha)) |
1884 | *** | -inv(L)*A12'*alpha | <-- Newton eqns |
1885 | *** d(x)/d(alpha) = | alpha | <-- Gradient eqns |
1886 | *** |
1887 | **/ |
1888 | { |
1889 | int32 row,col,ind,status; |
1890 | var_filter_t vfilter; |
1891 | real64 resid,ls_obj = 0.0; |
1892 | mtx_coord_t loc; |
1893 | real64 sum_dh_dalpha; |
1894 | struct rel_relation *rel; |
1895 | |
1896 | /* zero_vector(&(sys->tmp_ls));*/ |
1897 | /* calculate the gradient rows of the jacobian and the corresponding |
1898 | residuals at the trial point */ |
1899 | calc_ok = TRUE; |
1900 | vfilter.matchbits = (VAR_INBLOCK | VAR_ACTIVE); |
1901 | vfilter.matchvalue = (VAR_INBLOCK | VAR_ACTIVE); |
1902 | |
1903 | mtx_clear_region(sys->J.mtx,&(sys->J.reg)); |
1904 | Asc_SignalHandlerPush(SIGFPE,SIG_IGN); |
1905 | for(row = sys->J.reg.row.low; |
1906 | row < sys->J.un_p_row_reg.row.low; row++) { |
1907 | rel = sys->rlist[mtx_row_to_org(*factors,row)]; |
1908 | resid = relman_eval(rel,&status,SAFE_CALC); |
1909 | ls_obj += sqr(resid); |
1910 | } |
1911 | Asc_SignalHandlerPop(SIGFPE,SIG_IGN); |
1912 | sys->line_search.obj2 = sqrt(ls_obj); |
1913 | ls_obj = 0; |
1914 | |
1915 | for(row = sys->J.un_p_row_reg.row.low; |
1916 | row <= sys->J.un_p_row_reg.row.high; row++) { |
1917 | rel = sys->rlist[mtx_row_to_org(*factors,row)]; |
1918 | relman_diffs(rel,&vfilter,sys->J.mtx,&resid,SAFE_CALC); |
1919 | ls_obj += sqr(resid); |
1920 | } |
1921 | sys->line_search.obj = sqrt(ls_obj); |
1922 | FPRINTF(stderr,"TOTAL ERR = %16.8g\n",sys->line_search.obj + |
1923 | sys->line_search.obj2); |
1924 | FPRINTF(stderr,"LS_OBJ = %16.8g, SCALED = %16.8g\n", |
1925 | sys->line_search.obj, |
1926 | sys->line_search.obj/(sys->J.un_p_row_reg.row.high - |
1927 | sys->J.un_p_row_reg.row.low + 1)); |
1928 | FPRINTF(stderr,"LS_OBJ2 = %16.8g, SCALED = %16.8g\n", |
1929 | sys->line_search.obj2, |
1930 | sys->line_search.obj2/(sys->J.un_p_row_reg.row.low - |
1931 | sys->J.reg.row.low)); |
1932 | for( col=sys->J.reg.col.low; col <= sys->J.reg.col.high; col++ ) { |
1933 | mtx_mult_col(sys->J.mtx,col,sys->nominals.vec[col],&(sys->J.reg.row)); |
1934 | } |
1935 | for( row=sys->J.reg.row.low; row <= sys->J.reg.row.high; row++ ) { |
1936 | mtx_mult_row(sys->J.mtx,row,sys->weights.vec[row],&(sys->J.reg.col)); |
1937 | } |
1938 | |
1939 | /* CORRECT CODE, WRONG IDEA */ |
1940 | /* Here we calculate -inv(L)*A12'*alpha. This vector is stored |
1941 | in sys->tmp_ls in org row order (wrt factors).*/ |
1942 | /*A12' is stored in outer_factors*/ |
1943 | /* for (loc.row = sys->J.A12_reg.row.low; |
1944 | loc.row <= sys->J.A12_reg.row.high; loc.row++ ) { |
1945 | ind = mtx_row_to_org(*factors,loc.row); |
1946 | sys->tmp_ls.vec[ind] = -sys->line_search.grad_mult* |
1947 | mtx_sum_num_in_row(*outer_factors,loc.row,&(sys->J.A12_reg.col)); |
1948 | } |
1949 | forward_substitute2(*lsys,sys->tmp_ls.vec,FALSE); |
1950 | for (loc.row = sys->J.A22_reg.row.low; |
1951 | loc.row <= sys->J.A22_reg.row.high; loc.row++ ) { |
1952 | ind = mtx_row_to_org(*factors,loc.row); |
1953 | sys->tmp_ls.vec[ind] = sys->line_search.grad_mult; |
1954 | } |
1955 | */ |
1956 | |
1957 | for (loc.row = sys->J.A22_reg.row.low; |
1958 | loc.row <= sys->J.A22_reg.row.high; loc.row++ ) { |
1959 | ind = mtx_org_to_row(sys->J.mtx,mtx_row_to_org(*factors,loc.row)); |
1960 | /* sum_dh_dalpha += mtx_row_dot_full_org_custom_vec(sys->J.mtx,*factors, |
1961 | ind,sys->tmp_ls.vec, |
1962 | mtx_ALL_COLS, |
1963 | TRUE);*/ |
1964 | sum_dh_dalpha += mtx_row_dot_full_org_vec(sys->J.mtx, |
1965 | ind,sys->tmp_ls.vec, |
1966 | mtx_ALL_COLS, |
1967 | FALSE); |
1968 | } |
1969 | |
1970 | if (sys->line_search.obj != 0) { |
1971 | sys->line_search.grad = sum_dh_dalpha/sys->line_search.obj; |
1972 | } else { |
1973 | sys->line_search.grad = sum_dh_dalpha; |
1974 | } |
1975 | FPRINTF(stderr,"LS_GRAD = %16.8g\n",sys->line_search.grad); |
1976 | } |
1977 | /** |
1978 | *** NGSlv Functions |
1979 | **/ |
1980 | |
1981 | static void set_un_p_reg(slv7_system_t sys) |
1982 | /** |
1983 | *** Sets unpivoted regions |
1984 | **/ |
1985 | { |
1986 | sys->J.un_p_col_reg.col.low = sys->J.reg.col.high - sys->J.rank_defect + 1; |
1987 | sys->J.un_p_col_reg.col.high = sys->J.reg.col.high; |
1988 | sys->J.un_p_col_reg.row.low = sys->J.reg.row.low; |
1989 | sys->J.un_p_col_reg.row.high = sys->J.reg.row.high; |
1990 | |
1991 | sys->J.un_p_row_reg.col.low = sys->J.reg.col.low; |
1992 | sys->J.un_p_row_reg.col.high = sys->J.reg.col.high; |
1993 | sys->J.un_p_row_reg.row.low = sys->J.reg.row.high - sys->J.rank_defect + 1; |
1994 | sys->J.un_p_row_reg.row.high = sys->J.reg.row.high; |
1995 | |
1996 | sys->J.A21_reg.col.low = sys->J.reg.col.low; |
1997 | sys->J.A21_reg.col.high = sys->J.reg.col.high - sys->J.rank_defect; |
1998 | sys->J.A21_reg.row.low = sys->J.reg.row.high - sys->J.rank_defect + 1; |
1999 | sys->J.A21_reg.row.high = sys->J.reg.row.high; |
2000 | |
2001 | sys->J.A12_reg.col.low = sys->J.reg.col.high - sys->J.rank_defect + 1; |
2002 | sys->J.A12_reg.col.high = sys->J.reg.col.high; |
2003 | sys->J.A12_reg.row.low = sys->J.reg.row.low; |
2004 | sys->J.A12_reg.row.high = sys->J.reg.row.high - sys->J.rank_defect; |
2005 | |
2006 | sys->J.A22_reg.col.low = sys->J.reg.col.high - sys->J.rank_defect + 1; |
2007 | sys->J.A22_reg.col.high = sys->J.reg.col.high; |
2008 | sys->J.A22_reg.row.low = sys->J.reg.row.high - sys->J.rank_defect + 1; |
2009 | sys->J.A22_reg.row.high = sys->J.reg.row.high; |
2010 | } |
2011 | |
2012 | mtx_sparse_t *create_tmp(slv7_system_t sys) |
2013 | /** |
2014 | *** Function to allocate tmp matrix |
2015 | **/ |
2016 | { |
2017 | mtx_sparse_t *tmp = NULL; |
2018 | tmp = (mtx_sparse_t *)ascmalloc(sizeof(mtx_sparse_t)); |
2019 | if (ISNULL(tmp)) { |
2020 | FPRINTF(stderr,"ERROR: (slv7) create_tmp\n"); |
2021 | FPRINTF(stderr," Insufficient memory.\n"); |
2022 | return tmp; |
2023 | } |
2024 | tmp->cap = sys->J.reg.col.high - sys->J.reg.col.low + 1; |
2025 | tmp->len = tmp->cap; |
2026 | tmp->data = (real64 *)ascmalloc(sizeof(real64)*tmp->cap); |
2027 | tmp->idata = (int32 *)ascmalloc(sizeof(int32)*tmp->cap); |
2028 | if (ISNULL(tmp->data) || ISNULL(tmp->idata)) { |
2029 | FPRINTF(stderr,"ERROR: (slv7) create_tmp\n"); |
2030 | FPRINTF(stderr," Insufficient memory.\n"); |
2031 | mtx_destroy_sparse(tmp); |
2032 | tmp = NULL; |
2033 | } |
2034 | return tmp; |
2035 | } |
2036 | |
2037 | static void fill_un_p_row_reg(slv7_system_t sys,mtx_matrix_t *factors, |
2038 | mtx_matrix_t *coef,mtx_matrix_t *outer_factors, |
2039 | mtx_sparse_t **tmp) |
2040 | /*** |
2041 | *** Get rows from 'A' matrix and stuff into outer_factors |
2042 | *** This fills the un pivoted rows with the elements |
2043 | *** in the correct order. Note that no range checking is |
2044 | *** needed since we are taking rows from entire region of |
2045 | *** 'A' and inserting into the corresponding region of |
2046 | *** factors. |
2047 | **/ |
2048 | { |
2049 | /* CHECK IF SCALING IS NEEDED */ |
2050 | mtx_coord_t loc; |
2051 | int32 k,ind; |
2052 | for( loc.row = sys->J.un_p_row_reg.row.low; |
2053 | loc.row <= sys->J.un_p_row_reg.row.high; loc.row++ ){ |
2054 | ind = mtx_row_to_org(*factors,loc.row); |
2055 | ind = mtx_org_to_row(*coef,ind); |
2056 | *tmp = mtx_org_row_sparse(*coef,ind,*tmp,&(sys->J.un_p_row_reg.col), |
2057 | mtx_IGNORE_ZEROES); |
2058 | for (k = 0; k < (*tmp)->len; k++) { |
2059 | loc.col = mtx_org_to_col(*factors,(*tmp)->idata[k]); |
2060 | mtx_fill_value(*outer_factors,&(loc),(*tmp)->data[k]); |
2061 | } |
2062 | } |
2063 | } |
2064 | |
2065 | static void fill_un_p_col_reg(slv7_system_t sys,mtx_matrix_t *factors, |
2066 | mtx_matrix_t *coef,mtx_matrix_t *outer_factors, |
2067 | mtx_sparse_t **tmp,linsolqr_system_t *lsys) |
2068 | /*** |
2069 | *** This fills the L12 setion of outer_factors with |
2070 | *** inv(U11)*A12. Note that range checking is |
2071 | *** needed. We are taking cols from entire region of |
2072 | *** 'A', removing unwanted elements, premultiplying |
2073 | *** by inv(U11) and inserting into the corresponding |
2074 | *** region of factors. |
2075 | *** We also calculate A22' at this time. |
2076 | **/ |
2077 | { |
2078 | /* this is a temporary hack and should be fixed |
2079 | * we need a better way to access these functions |
2080 | */ |
2081 | extern void backward_substitute2(linsolqr_system_t sys, |
2082 | real64 *arr, |
2083 | boolean transpose); |
2084 | extern void forward_substitute2(linsolqr_system_t sys, |
2085 | real64 *arr, |
2086 | boolean transpose); |
2087 | |
2088 | /* CHECK IF SCALING IS NEEDED (i.e. is coef scaled?)*/ |
2089 | mtx_coord_t loc; |
2090 | int32 k,ind; |
2091 | double tmp_double; |
2092 | zero_vector(&(sys->tmp)); |
2093 | for( loc.col = sys->J.un_p_col_reg.col.low; |
2094 | loc.col <= sys->J.un_p_col_reg.col.high; loc.col++ ) { |
2095 | ind = mtx_col_to_org(*factors,loc.col); |
2096 | ind = mtx_org_to_col(*coef,ind); |
2097 | *tmp = mtx_org_col_sparse(*coef,ind,*tmp,&(sys->J.reg.row), |
2098 | mtx_IGNORE_ZEROES); |
2099 | |
2100 | /* This loop stores a col of A12 into sys->tmp.vec */ |
2101 | /* in org row order. There should be no other non-zeros */ |
2102 | /* than those introduced here. */ |
2103 | for (k = 0; k < (*tmp)->len; k++) { |
2104 | if (mtx_org_to_row(*coef,(*tmp)->idata[k]) < sys->J.un_p_row_reg.row.low) { |
2105 | sys->tmp.vec[mtx_row_to_org(*factors,mtx_org_to_row(*coef,(*tmp)->idata[k]))] = |
2106 | (*tmp)->data[k]; |
2107 | } |
2108 | } |
2109 | |
2110 | backward_substitute2(*lsys,sys->tmp.vec,FALSE); |
2111 | for (k = sys->tmp.rng->low; k <= sys->tmp.rng->high; k++) { |
2112 | if (sys->tmp.vec[k] != D_ZERO) { |
2113 | loc.row = mtx_org_to_row(*factors,k); |
2114 | mtx_fill_value(*outer_factors,&loc,sys->tmp.vec[k]); |
2115 | /* Do not zero sys->tmp.vec here as it will be used */ |
2116 | /* in calculating A22' */ |
2117 | } |
2118 | } |
2119 | |
2120 | /* now calculating A22' = A22 - A21*inv(L11)*inv(U11)*A12 */ |
2121 | forward_substitute2(*lsys,sys->tmp.vec,FALSE); |
2122 | for (loc.row = sys->J.A22_reg.row.low; |
2123 | loc.row <= sys->J.A22_reg.row.high; loc.row++ ) { |
2124 | tmp_double = mtx_value(*outer_factors,&loc); |
2125 | mtx_clear_coord(*outer_factors,loc.row,loc.col); |
2126 | mtx_fill_value(*outer_factors,&(loc), |
2127 | (tmp_double - |
2128 | mtx_row_dot_full_org_vec(*outer_factors,loc.row, |
2129 | sys->tmp.vec, |
2130 | &(sys->J.un_p_row_reg.col), |
2131 | TRUE))); |
2132 | } |
2133 | /* note that the above code is in very bad style. |
2134 | * we should probably use mtx_fill_value and then |
2135 | * an mtx_assemble after all rows/cols are done |
2136 | */ |
2137 | zero_vector(&(sys->tmp)); |
2138 | } |
2139 | } |
2140 | |
2141 | static void ngslv_calc_varstep(slv7_system_t sys,mtx_matrix_t *factors) |
2142 | { |
2143 | int32 col,ind; |
2144 | col = sys->newton.rng->low; |
2145 | for( ; col <= sys->newton.rng->high; col++ ) { |
2146 | ind = mtx_org_to_col(sys->J.mtx,mtx_col_to_org(*factors,col)); |
2147 | sys->varstep.vec[ind] = sys->grad_newton2.vec[col]; |
2148 | } |
2149 | } |
2150 | |
2151 | static void calc_un_p_rhs(slv7_system_t sys, mtx_matrix_t *factors, |
2152 | mtx_matrix_t *outer_factors, |
2153 | real64 *varvalue) |
2154 | /** |
2155 | *** This function stores B2 in the lower section of sys->tmp.vec |
2156 | *** then proceeds to calculate B2' = B2 - A21*inv(L11)*inv(U11)*B1 |
2157 | *** where inv(L11)*inv(U11)*B1 is the newton direction in the |
2158 | *** pivoted vars. |
2159 | **/ |
2160 | { |
2161 | mtx_coord_t loc; |
2162 | /* Store B2 in sys->tmp.vec in org row order wrt sys->J.mtx */ |
2163 | for( loc.row = sys->J.un_p_row_reg.row.low; |
2164 | loc.row <= sys->J.un_p_row_reg.row.high; loc.row++ ) { |
2165 | sys->tmp.vec[mtx_row_to_org(sys->J.mtx,loc.row)] = |
2166 | -(sys->residuals.vec[loc.row]); |
2167 | } |
2168 | for( loc.row = sys->J.un_p_row_reg.row.low; |
2169 | loc.row <= sys->J.un_p_row_reg.row.high; loc.row++ ) { |
2170 | sys->tmp.vec[mtx_row_to_org(*factors,loc.row)] -= |
2171 | mtx_row_dot_full_cur_vec(*outer_factors,loc.row, |
2172 | sys->grad_newton.vec, |
2173 | &(sys->J.A21_reg.col),FALSE); |
2174 | } |
2175 | } |
2176 | |
2177 | static void calc_un_p_grad(slv7_system_t sys,mtx_matrix_t *factors, |
2178 | mtx_matrix_t *outer_factors, |
2179 | real64 *varvalue) |
2180 | /** |
2181 | *** Calculates Xg, the gradient direction in the unpivoted variables. |
2182 | *** Xg = trans(A22')*B2' stored in sys->grad_newton.vec in cur col order (wrt J.mtx). |
2183 | **/ |
2184 | { |
2185 | mtx_coord_t loc; |
2186 | for (loc.col = sys->J.un_p_col_reg.col.low; |
2187 | loc.col <= sys->J.un_p_col_reg.col.high; loc.col++ ) { |
2188 | sys->grad_newton.vec[loc.col] = |
2189 | mtx_col_dot_full_org_vec(*outer_factors,loc.col,sys->tmp.vec, |
2190 | &(sys->J.un_p_row_reg.row),FALSE); |
2191 | } |
2192 | } |
2193 | |
2194 | static void calc_newton_adjustment(slv7_system_t sys,linsolqr_system_t *lsys, |
2195 | mtx_matrix_t *factors, |
2196 | mtx_matrix_t *outer_factors, |
2197 | real64 *varvalue) |
2198 | /** |
2199 | *** This function calculates the unscaled adjustment to the newton step |
2200 | *** inv(L11)*A12'*Xg stored in tmp.vec in org row order (wrt factors). |
2201 | *** note that A12' = inv(U11)*A12 is stored in outer_factors. |
2202 | **/ |
2203 | { |
2204 | extern void forward_substitute2(linsolqr_system_t sys, |
2205 | real64 *arr, |
2206 | boolean transpose); |
2207 | mtx_coord_t loc; |
2208 | int32 ind; |
2209 | for (loc.row = sys->J.A12_reg.row.low; |
2210 | loc.row <= sys->J.A12_reg.row.high; loc.row++ ) { |
2211 | ind = mtx_row_to_org(*factors,loc.row); |
2212 | sys->tmp.vec[ind] = |
2213 | mtx_row_dot_full_cur_vec(*outer_factors,loc.row,sys->grad_newton.vec, |
2214 | mtx_ALL_COLS,FALSE); /*all_cols should be ok here???*/ |
2215 | } |
2216 | forward_substitute2(*lsys,sys->tmp.vec,FALSE); |
2217 | } |
2218 | |
2219 | static void set_grad_multiplier(slv7_system_t sys, |
2220 | mtx_matrix_t *outer_factors) |
2221 | { |
2222 | int32 ind; |
2223 | double denominator, numerator,alpha; |
2224 | denominator = numerator = 0; |
2225 | for (ind = sys->J.un_p_col_reg.col.low; |
2226 | ind <= sys->J.un_p_col_reg.col.high; ind++ ) { |
2227 | sys->tmp2.vec[ind] = |
2228 | mtx_row_dot_full_cur_vec(*outer_factors,ind,sys->grad_newton.vec, |
2229 | &(sys->J.un_p_col_reg.col),FALSE); |
2230 | numerator += sqr(sys->grad_newton.vec[ind]); |
2231 | denominator += sqr(sys->tmp2.vec[ind]); |
2232 | } |
2233 | if (denominator == 0) { |
2234 | FPRINTF(stderr,"Infinite denominator in NGSlv\n"); |
2235 | sys->line_search.full_grad_mult = 0.0; |
2236 | } |
2237 | alpha = numerator/denominator; |
2238 | if (alpha < 0){ |
2239 | FPRINTF(stderr,"Warning: Negative Alpha in NGSlv\n"); |
2240 | sys->line_search.full_grad_mult = -alpha; |
2241 | } else { |
2242 | sys->line_search.full_grad_mult = alpha; |
2243 | } |
2244 | } |
2245 | |
2246 | static void calc_ng_step(slv7_system_t sys,linsolqr_system_t *lsys, |
2247 | mtx_matrix_t *factors, |
2248 | real64 *varvalue) |
2249 | /** |
2250 | *** Calculates the actual step given the gradient multiplier. |
2251 | *** Stores the step in sys->grad_newton2.vec in cur col order. |
2252 | **/ |
2253 | { |
2254 | mtx_coord_t loc; |
2255 | int32 ind; |
2256 | #if KDEBUG |
2257 | real64 gradnorm2 = 0; |
2258 | real64 newtnorm2 = 0; |
2259 | #endif /*KDEBUG*/ |
2260 | zero_vector(&(sys->grad_newton2)); |
2261 | /* we will do our own dense arithmetic here */ |
2262 | for (loc.col = sys->J.un_p_col_reg.col.low; |
2263 | loc.col <= sys->J.un_p_col_reg.col.high; loc.col++ ) { |
2264 | sys->grad_newton2.vec[loc.col] = sys->grad_newton.vec[loc.col] * |
2265 | sys->line_search.full_grad_mult * |
2266 | sys->line_search.grad_mult; |
2267 | #if KDEBUG |
2268 | gradnorm2 += sqr(sys->grad_newton2.vec[loc.col]); |
2269 | #endif /*KDEBUG*/ |
2270 | } |
2271 | for (loc.col = sys->J.reg.col.low; |
2272 | loc.col < sys->J.un_p_col_reg.col.low; loc.col++ ) { |
2273 | /* THIS IS A MESS */ |
2274 | ind = linsolqr_org_col_to_org_row(*lsys,mtx_col_to_org(*factors,loc.col)); |
2275 | sys->grad_newton2.vec[loc.col] = sys->line_search.newton_mult * |
2276 | (sys->grad_newton.vec[loc.col] - |
2277 | sys->line_search.full_grad_mult * sys->tmp.vec[ind] * |
2278 | sys->line_search.grad_mult); |
2279 | #if KDEBUG |
2280 | newtnorm2 += sqr(sys->grad_newton2.vec[loc.col]); |
2281 | #endif /*KDEBUG*/ |
2282 | } |
2283 | #if KDEBUG |
2284 | gradnorm2 = sqrt(gradnorm2); |
2285 | newtnorm2 = sqrt(newtnorm2); |
2286 | sys->line_search.newton_norm = newtnorm2; |
2287 | sys->line_search.grad_norm = gradnorm2; |
2288 | FPRINTF(stderr,"NEWTON NORM = %16.8g, SCALED = %16.8g\n", |
2289 | newtnorm2, |
2290 | newtnorm2/(sys->J.un_p_row_reg.row.low - |
2291 | sys->J.reg.row.low)); |
2292 | FPRINTF(stderr,"GRADIENT NORM = %16.8g, SCALED = %16.8g\n", |
2293 | gradnorm2, |
2294 | gradnorm2/(sys->J.un_p_row_reg.row.high - |
2295 | sys->J.un_p_row_reg.row.low + 1)); |
2296 | #endif /*KDEBUG*/ |
2297 | } |
2298 | static void print_ls_data(slv7_system_t sys, int32 task) |
2299 | { |
2300 | FILE *output; |
2301 | output = fopen ("ngslv_data","a"); |
2302 | if (task == 0) { |
2303 | FPRINTF(output,"%4.3g %d %d %8.4g %8.4g %8.4g %8.4g %8.4g %8.4g %8.4g\n", |
2304 | sys->p.tolerance.singular, |
2305 | sys->line_search.rank_defect, |
2306 | sys->line_search.rank, |
2307 | sys->line_search.newton_norm, /* 2norm newton step */ |
2308 | sys->line_search.grad_norm, /* 2norm grad step */ |
2309 | sys->line_search.full_grad_mult, /* calculated grad mult */ |
2310 | sys->line_search.grad_mult, /* grad multiplier 0->1 */ |
2311 | sys->line_search.newton_mult, /* newton multiplier 0->1 */ |
2312 | sys->line_search.obj2, /* newton error */ |
2313 | sys->line_search.obj /* gradient error */ |
2314 | ); |
2315 | } else if (task == 1) { |
2316 | FPRINTF(output," ****STEP ACCEPTED**** \n"); |
2317 | FPRINTF(output," grad mult = %8.4g\n",sys->line_search.grad_mult); |
2318 | FPRINTF(output," ********************* \n"); |
2319 | } else { |
2320 | FPRINTF(output,"\n"); |
2321 | } |
2322 | fclose (output); |
2323 | } |
2324 | |
2325 | static real64 ngslv_calc_newton_mult(slv7_system_t sys) |
2326 | { |
2327 | real64 mult = 1, var_norm, ratio; |
2328 | int32 col; |
2329 | ratio = sys->line_search.newton_norm/sys->line_search.rank; |
2330 | if( ratio > 0.05 ) { |
2331 | mult = 0.05 / ratio; |
2332 | } |
2333 | return mult; |
2334 | |
2335 | for( col = sys->J.reg.col.low; |
2336 | col < sys->J.un_p_col_reg.col.low; col++) { |
2337 | struct var_variable *var; |
2338 | var = sys->vlist[mtx_col_to_org(sys->J.mtx,col)]; |
2339 | if (sys->nominals.vec[col] != 0) { |
2340 | var_norm = sqr(var_value(var) / sys->nominals.vec[col]); |
2341 | } |
2342 | } |
2343 | var_norm = sqrt(var_norm); |
2344 | ratio = sys->line_search.newton_norm/var_norm; |
2345 | if (ratio > 1) { |
2346 | mult = 1/ratio; |
2347 | } |
2348 | return mult; |
2349 | } |
2350 | |
2351 | static void apply_step( slv7_system_t ); |
2352 | static void restore_variables( slv7_system_t ); |
2353 | |
2354 | static void ngslv_line_search(slv7_system_t sys, |
2355 | linsolqr_system_t *lsys, |
2356 | mtx_matrix_t *factors, |
2357 | mtx_matrix_t *outer_factors, |
2358 | real64 *varvalue) |
2359 | { |
2360 | int32 ind; |
2361 | real64 best_obj = 1e300, best_alpha = -1; |
2362 | sys->line_search.newton_mult = 1; |
2363 | sys->line_search.grad_mult = 1; |
2364 | |
2365 | sys->line_search.rank = sys->J.un_p_row_reg.row.low - |
2366 | sys->J.reg.row.low; |
2367 | sys->line_search.rank_defect = sys->J.un_p_row_reg.row.high - |
2368 | sys->J.un_p_row_reg.row.low + 1; |
2369 | /* calc_ng_step(sys,lsys,factors,varvalue); |
2370 | ngslv_calc_varstep(sys,factors); |
2371 | apply_step(sys); |
2372 | ls_calc_gradient(sys,factors,outer_factors,lsys); |
2373 | sys->line_search.newton_mult = ngslv_calc_newton_mult(sys); |
2374 | restore_variables(sys); |
2375 | */ |
2376 | for (ind = 1; ind <= 10; ++ind) { |
2377 | calc_ng_step(sys,lsys,factors,varvalue); |
2378 | ngslv_calc_varstep(sys,factors); |
2379 | apply_step(sys); |
2380 | ls_calc_gradient(sys,factors,outer_factors,lsys); |
2381 | if(sys->line_search.obj + sys->line_search.obj2 < best_obj) { |
2382 | best_obj = sys->line_search.obj; |
2383 | best_alpha = sys->line_search.grad_mult; |
2384 | } |
2385 | print_ls_data(sys,0); |
2386 | restore_variables(sys); |
2387 | sys->line_search.grad_mult -= 0.1; |
2388 | sys->line_search.newton_mult -= 0.1; |
2389 | } |
2390 | sys->line_search.grad_mult = best_alpha; |
2391 | calc_ng_step(sys,lsys,factors,varvalue); |
2392 | ngslv_calc_varstep(sys,factors); |
2393 | print_ls_data(sys,1); |
2394 | } |
2395 | |
2396 | static void calc_newton_grad_step(slv7_system_t sys) |
2397 | /** |
2398 | *** Calculate Newton/Gradient step |
2399 | **/ |
2400 | { |
2401 | mtx_sparse_t *tmp = NULL; |
2402 | linsolqr_system_t lsys = sys->J.sys; |
2403 | mtx_matrix_t factors, coef, outer_factors; |
2404 | real64 *varvalue; |
2405 | /* THIS IS GOING AWAY |
2406 | int32 status_ok; */ |
2407 | |
2408 | factors = linsolqr_get_factors(lsys); |
2409 | outer_factors = mtx_create_slave(factors); |
2410 | coef = linsolqr_get_matrix(lsys); |
2411 | varvalue = linsolqr_get_varvalue(lsys,0); |
2412 | |
2413 | set_un_p_reg(sys); |
2414 | tmp = create_tmp(sys); |
2415 | if (tmp == NULL) { |
2416 | return; |
2417 | } |
2418 | mtx_clear_region(factors,&(sys->J.un_p_row_reg)); |
2419 | mtx_clear_region(factors,&(sys->J.A12_reg)); |
2420 | |
2421 | fill_un_p_row_reg(sys,&factors,&coef,&outer_factors,&tmp); |
2422 | fill_un_p_col_reg(sys,&factors,&coef,&outer_factors,&tmp,&lsys); |
2423 | |
2424 | /* THIS IS GOING AWAY |
2425 | status_ok = linsolqr_setup_ngslv(lsys,sys->J.rhs, |
2426 | &(sys->J.un_p_row_reg.row),sys->tmp.vec); |
2427 | if (status_ok != 1) { |
2428 | return; |
2429 | } |
2430 | */ |
2431 | zero_vector(&(sys->grad_newton)); |
2432 | ngslv_calc_newton(sys,&factors); |
2433 | calc_un_p_rhs(sys,&factors,&outer_factors,varvalue); |
2434 | calc_un_p_grad(sys,&factors,&outer_factors,varvalue); |
2435 | set_grad_multiplier(sys,&outer_factors); |
2436 | zero_vector(&(sys->tmp)); |
2437 | calc_newton_adjustment(sys,&lsys,&factors,&outer_factors,varvalue); |
2438 | |
2439 | /* Here we will 'manualy' scale the step. This should eventually be a linesearch */ |
2440 | sys->line_search.full_grad_mult = |
2441 | sys->rarray[SP7_GMULT] * sys->line_search.full_grad_mult; |
2442 | /* |
2443 | FPRINTF(stderr,"ALPHA = %16.8g\n",sys->line_search.full_grad_mult); |
2444 | sys->line_search.newton_mult = 1; |
2445 | calc_ng_step(sys,&lsys,&factors,varvalue); |
2446 | ngslv_calc_varstep(sys,&factors); |
2447 | */ |
2448 | set_up_line_search(sys,&factors,&lsys); |
2449 | ngslv_line_search(sys,&lsys,&factors,&outer_factors,varvalue); |
2450 | } |
2451 | #endif /*NGSLV*/ |
2452 | |
2453 | /** |
2454 | *** OK. Here's where we compute the actual step to be taken. It will |
2455 | *** be some linear combination of the 1st order and 2nd order steps. |
2456 | **/ |
2457 | |
2458 | typedef real64 sym_2x2_t[3]; /* Stores symmetric 2x2 matrices */ |
2459 | |
2460 | struct parms_t { |
2461 | real64 low,high,guess; /* Used to search for parameter */ |
2462 | }; |
2463 | |
2464 | struct calc_step_vars { |
2465 | sym_2x2_t coef1, coef2; |
2466 | real64 rhs[2]; /* RHS for 2x2 system */ |
2467 | struct parms_t parms; |
2468 | real64 alpha1, alpha2; |
2469 | real64 error; /* Error between step norm and sys->maxstep */ |
2470 | }; |
2471 | |
2472 | static void calc_2x2_system(slv7_system_t sys, struct calc_step_vars *vars) |
2473 | /** |
2474 | *** Calculates 2x2 system (coef1,coef2,rhs). |
2475 | **/ |
2476 | { |
2477 | vars->coef1[0] = (2.0*sys->phi/sys->newton.norm2)* |
2478 | calc_sqrt_D0(sys->newton.norm2)/calc_sqrt_D0(sys->gamma.norm2); |
2479 | vars->coef1[1] = 1.0; |
2480 | vars->coef1[2] = (sys->Jgamma.norm2/sys->gamma.norm2)* |
2481 | calc_sqrt_D0(sys->newton.norm2)/calc_sqrt_D0(sys->gamma.norm2); |
2482 | |
2483 | vars->coef2[0] = 1.0; |
2484 | vars->coef2[1] = 2.0*sys->phi/ |
2485 | calc_sqrt_D0(sys->newton.norm2)/calc_sqrt_D0(sys->gamma.norm2); |
2486 | vars->coef2[2] = 1.0; |
2487 | |
2488 | vars->rhs[0] = 2.0*sys->phi/ |
2489 | sys->maxstep/calc_sqrt_D0(sys->gamma.norm2); |
2490 | vars->rhs[1] = calc_sqrt_D0(sys->newton.norm2)/sys->maxstep; |
2491 | } |
2492 | |
2493 | static void coefs_from_parm( slv7_system_t sys, struct calc_step_vars *vars) |
2494 | /** |
2495 | *** Determines alpha1 and alpha2 from the parameter (guess). |
2496 | **/ |
2497 | { |
2498 | |
2499 | sym_2x2_t coef; /* Actual coefficient matrix */ |
2500 | real64 det; /* Determinant of coefficient matrix */ |
2501 | int i; |
2502 | |
2503 | for( i=0 ; i<3 ; ++i ) coef[i] = |
2504 | vars->coef1[i] + vars->parms.guess * vars->coef2[i]; |
2505 | det = coef[0]*coef[2] - coef[1]*coef[1]; |
2506 | if( det < 0.0 ) |
2507 | FPRINTF(MIF(sys),"%-40s ---> %g\n", |
2508 | " Unexpected negative determinant!",det); |
2509 | if( det <= DETZERO ) { |
2510 | /** |
2511 | *** varstep2 and varstep1 are essentially parallel: |
2512 | *** adjust length of either |
2513 | **/ |
2514 | vars->alpha2 = 0.0; |
2515 | vars->alpha1 = 1.0; |
2516 | } else { |
2517 | vars->alpha2 = (vars->rhs[0]*coef[2] - vars->rhs[1]*coef[1])/det; |
2518 | vars->alpha1 = (vars->rhs[1]*coef[0] - vars->rhs[0]*coef[1])/det; |
2519 | } |
2520 | } |
2521 | |
2522 | static real64 step_norm2( slv7_system_t sys, struct calc_step_vars *vars) |
2523 | /** |
2524 | *** Computes step vector length based on 1st order and 2nd order |
2525 | *** vectors and their coefficients. |
2526 | **/ |
2527 | { |
2528 | return sys->maxstep*sys->maxstep* |
2529 | (vars->alpha2 * vars->alpha2 + |
2530 | vars->alpha2 * vars->alpha1 * sys->phi/ |
2531 | calc_sqrt_D0(sys->varstep2.norm2 + sys->mulstep2.norm2)/ |
2532 | calc_sqrt_D0(sys->varstep1.norm2 + sys->mulstep1.norm2) + |
2533 | vars->alpha1 * vars->alpha1); |
2534 | } |
2535 | |
2536 | static void adjust_parms( slv7_system_t sys, struct calc_step_vars *vars) |
2537 | /** |
2538 | *** Re-guesses the parameters based on |
2539 | *** step size vs. target value. |
2540 | **/ |
2541 | { |
2542 | vars->error = (calc_sqrt_D0(step_norm2(sys,vars))/sys->maxstep) - 1.0; |
2543 | if( vars->error > 0.0 ) { |
2544 | /* Increase parameter (to decrease step length) */ |
2545 | vars->parms.low = vars->parms.guess; |
2546 | vars->parms.guess = (vars->parms.high>3.0*vars->parms.guess) |
2547 | ? 2.0*vars->parms.guess |
2548 | : 0.5*(vars->parms.low + vars->parms.high); |
2549 | } else { |
2550 | /* Decrease parameter (to increase step norm) */ |
2551 | vars->parms.high = vars->parms.guess; |
2552 | vars->parms.guess = 0.5*(vars->parms.low + vars->parms.high); |
2553 | } |
2554 | } |
2555 | |
2556 | static void compute_step( slv7_system_t sys, struct calc_step_vars *vars) |
2557 | /** |
2558 | *** Computes the step based on the coefficients in vars. |
2559 | **/ |
2560 | { |
2561 | int32 row,col; |
2562 | real64 tot1_norm2, tot2_norm2; |
2563 | |
2564 | tot1_norm2 = sys->varstep1.norm2 + sys->mulstep1.norm2; |
2565 | tot2_norm2 = sys->varstep2.norm2 + sys->mulstep2.norm2; |
2566 | if( !sys->varstep.accurate ) { |
2567 | for( col=sys->varstep.rng->low ; col<=sys->varstep.rng->high ; ++col ) |
2568 | if( (vars->alpha2 == 1.0) && (vars->alpha1 == 0.0) ) { |
2569 | sys->varstep.vec[col] = sys->maxstep * |
2570 | sys->varstep2.vec[col]/calc_sqrt_D0(tot2_norm2); |
2571 | } else if( (vars->alpha2 == 0.0) && (vars->alpha1 == 1.0) ) { |
2572 | sys->varstep.vec[col] = sys->maxstep * |
2573 | sys->varstep1.vec[col]/calc_sqrt_D0(tot1_norm2); |
2574 | } else if( (vars->alpha2 != 0.0) && (vars->alpha1 != 0.0) ) { |
2575 | sys->varstep.vec[col] = sys->maxstep* |
2576 | ( |
2577 | vars->alpha2*sys->varstep2.vec[col]/calc_sqrt_D0(tot2_norm2) + |
2578 | vars->alpha1*sys->varstep1.vec[col]/calc_sqrt_D0(tot1_norm2) |
2579 | ); |
2580 | } |
2581 | sys->varstep.accurate = TRUE; |
2582 | } |
2583 | if( !sys->mulstep.accurate ) { |
2584 | for( row=sys->mulstep.rng->low ; row<=sys->mulstep.rng->high ; ++row ) |
2585 | if( (vars->alpha2 == 1.0) && (vars->alpha1 == 0.0) ) { |
2586 | sys->mulstep.vec[row] = sys->maxstep * |
2587 | sys->mulstep2.vec[row]/calc_sqrt_D0(tot2_norm2); |
2588 | } else if( (vars->alpha2 == 0.0) && (vars->alpha1 == 1.0) ) { |
2589 | sys->mulstep.vec[row] = sys->maxstep * |
2590 | sys->mulstep1.vec[row]/calc_sqrt_D0(tot1_norm2); |
2591 | } else if( (vars->alpha2 != 0.0) && (vars->alpha1 != 0.0) ) { |
2592 | sys->mulstep.vec[row] = sys->maxstep* |
2593 | ( |
2594 | vars->alpha2*sys->mulstep2.vec[row]/calc_sqrt_D0(tot2_norm2) + |
2595 | vars->alpha1*sys->mulstep1.vec[row]/calc_sqrt_D0(tot1_norm2) |
2596 | ); |
2597 | } |
2598 | sys->mulstep.accurate = TRUE; |
2599 | } |
2600 | #if DEBUG |
2601 | FPRINTF(LIF(sys),"Varstep: "); |
2602 | debug_out_vector(LIF(sys),sys,&(sys->varstep)); |
2603 | FPRINTF(LIF(sys),"Mulstep: "); |
2604 | debug_out_vector(LIF(sys),sys,&(sys->mulstep)); |
2605 | #endif |
2606 | } |
2607 | |
2608 | |
2609 | static void calc_step( slv7_system_t sys, int minor) |
2610 | /** |
2611 | *** Calculates step vector, based on sys->maxstep, and the varstep2/ |
2612 | *** varstep1 and mulstep2/mulstep1 vectors. Nothing is assumed to be |
2613 | *** calculated, except the weights and the jacobian (scaled). Also, |
2614 | *** the step is not checked for legitimacy. |
2615 | *** NOTE: the step is scaled. |
2616 | **/ |
2617 | { |
2618 | |
2619 | struct calc_step_vars vars; |
2620 | real64 tot1_norm2, tot2_norm2; |
2621 | |
2622 | if( sys->varstep.accurate && sys->mulstep.accurate ) |
2623 | return; |
2624 | if (sys->p.output.less_important) { |
2625 | FPRINTF(LIF(sys),"\n%-40s ---> %d\n", " Step trial",minor); |
2626 | } |
2627 | |
2628 | tot1_norm2 = sys->varstep1.norm2 + sys->mulstep1.norm2; |
2629 | tot2_norm2 = sys->varstep2.norm2 + sys->mulstep2.norm2; |
2630 | if( (tot1_norm2 == 0.0) && (tot2_norm2 == 0.0) ) { |
2631 | /* Take no step at all */ |
2632 | vars.alpha1 = 0.0; |
2633 | vars.alpha2 = 0.0; |
2634 | sys->maxstep = 0.0; |
2635 | sys->varstep.norm2 = 0.0; |
2636 | sys->mulstep.norm2 = 0.0; |
2637 | |
2638 | } else if( (tot2_norm2 > 0.0) && OPTIMIZING(sys) ) { |
2639 | /* Stay in varstep2 direction */ |
2640 | vars.alpha1 = 0.0; |
2641 | vars.alpha2 = 1.0; |
2642 | sys->maxstep = MIN(sys->maxstep,calc_sqrt_D0(tot2_norm2)); |
2643 | sys->varstep.norm2 = calc_sqr_D0(sys->maxstep)* |
2644 | sys->varstep2.norm2/tot2_norm2; |
2645 | sys->mulstep.norm2 = calc_sqr_D0(sys->maxstep)* |
2646 | sys->mulstep2.norm2/tot2_norm2; |
2647 | |
2648 | } else if( (tot2_norm2>0.0)&&(calc_sqrt_D0(tot2_norm2)<=sys->maxstep) ) { |
2649 | /* Attempt step in varstep2 direction */ |
2650 | vars.alpha1 = 0.0; |
2651 | vars.alpha2 = 1.0; |
2652 | sys->maxstep = calc_sqrt_D0(tot2_norm2); |
2653 | sys->varstep.norm2 = calc_sqr_D0(sys->maxstep)* |
2654 | sys->varstep2.norm2/tot2_norm2; |
2655 | sys->mulstep.norm2 = calc_sqr_D0(sys->maxstep)* |
2656 | sys->mulstep2.norm2/tot2_norm2; |
2657 | |
2658 | } else if( (tot2_norm2==0.0 || sys->s.block.current_size==1) && |
2659 | (tot1_norm2 > 0.0) ) { |
2660 | /* Attempt step in varstep1 direction */ |
2661 | vars.alpha1 = 1.0; |
2662 | vars.alpha2 = 0.0; |
2663 | if ( (sys->gamma.norm2/sys->Jgamma.norm2)* |
2664 | calc_sqrt_D0(sys->gamma.norm2) <= sys->maxstep ) |
2665 | sys->maxstep = (sys->gamma.norm2/sys->Jgamma.norm2)* |
2666 | calc_sqrt_D0(sys->gamma.norm2); |
2667 | sys->varstep.norm2 = calc_sqr_D0(sys->maxstep)* |
2668 | sys->varstep1.norm2/tot1_norm2; |
2669 | sys->mulstep.norm2 = calc_sqr_D0(sys->maxstep)* |
2670 | sys->mulstep1.norm2/tot1_norm2; |
2671 | |
2672 | } else { |
2673 | /* Attempt step in varstep1-varstep2 direction */ |
2674 | vars.parms.low = 0.0; |
2675 | vars.parms.high = MAXDOUBLE; |
2676 | vars.parms.guess = 1.0; |
2677 | calc_2x2_system(sys,&vars); |
2678 | do { |
2679 | coefs_from_parm(sys, &vars); |
2680 | adjust_parms(sys, &vars); |
2681 | } while( fabs(vars.error) > STEPSIZEERR_MAX && |
2682 | vars.parms.high - vars.parms.low > PARMRNG_MIN ); |
2683 | if (sys->p.output.less_important) { |
2684 | FPRINTF(LIF(sys),"%-40s ---> %g\n", |
2685 | " parameter high", vars.parms.high); |
2686 | FPRINTF(LIF(sys),"%-40s ---> %g\n", |
2687 | " parameter low", vars.parms.low); |
2688 | FPRINTF(LIF(sys),"%-40s ---> %g\n", |
2689 | " Error in step length", vars.error); |
2690 | } |
2691 | sys->varstep.norm2 = step_norm2(sys, &vars); |
2692 | sys->mulstep.norm2 = 0.0; |
2693 | } |
2694 | |
2695 | if (sys->p.output.less_important) { |
2696 | FPRINTF(LIF(sys),"%-40s ---> %g\n", " Alpha1 coefficient (normalized)", |
2697 | vars.alpha1); |
2698 | FPRINTF(LIF(sys),"%-40s ---> %g\n", " Alpha2 coefficient (normalized)", |
2699 | vars.alpha2); |
2700 | } |
2701 | compute_step(sys,&vars); |
2702 | return; |
2703 | |
2704 | } |
2705 | |
2706 | |
2707 | |
2708 | /** |
2709 | *** Variable values maintenance |
2710 | *** --------------------------- |
2711 | *** restore_variables(sys) |
2712 | *** coef = required_coef_to_stay_inbounds(sys) |
2713 | *** apply_step(sys,coef) |
2714 | *** step_accepted(sys) |
2715 | *** change_maxstep(sys,maxstep) |
2716 | **/ |
2717 | |
2718 | static void restore_variables( slv7_system_t sys) |
2719 | /** |
2720 | *** Restores the values of the variables before applying |
2721 | *** a step. |
2722 | **/ |
2723 | { |
2724 | int32 col; |
2725 | for( col = sys->J.reg.col.low; col <= sys->J.reg.col.high; col++ ) { |
2726 | struct var_variable *var = sys->vlist[mtx_col_to_org(sys->J.mtx,col)]; |
2727 | var_set_value(var,sys->variables.vec[col]*sys->nominals.vec[col]); |
2728 | } |
2729 | } |
2730 | |
2731 | |
2732 | static real64 required_coef_to_stay_inbounds( slv7_system_t sys) |
2733 | /** |
2734 | *** Calculates the maximum fraction of the step which can be |
2735 | *** taken without going out of bounds. If the entire step can be |
2736 | *** taken, 1.0 is returned. Otherwise a value less than 1 is |
2737 | *** returned. It is assumed that the current variable values |
2738 | *** are within their bounds. The step must be calculated. |
2739 | **/ |
2740 | { |
2741 | real64 mincoef; |
2742 | int32 col; |
2743 | |
2744 | if( sys->p.ignore_bounds ) |
2745 | return(1.0); |
2746 | |
2747 | mincoef = 1.0; |
2748 | for( col=sys->varstep.rng->low; col <= sys->varstep.rng->high; col++ ) { |
2749 | struct var_variable *var; |
2750 | real64 coef,dx,val,bnd; |
2751 | var = sys->vlist[mtx_col_to_org(sys->J.mtx,col)]; |
2752 | coef = 1.0; |
2753 | dx = sys->varstep.vec[col] * sys->nominals.vec[col]; |
2754 | bnd = var_upper_bound(var); |
2755 | if( (val=var_value(var)) + dx > bnd ) |
2756 | coef = MIN((bnd-val)/dx, 1.0); |
2757 | bnd = var_lower_bound(var); |
2758 | if( val + dx < bnd ) |
2759 | coef = MIN((bnd-val)/dx, 1.0); |
2760 | if( coef < mincoef ) |
2761 | mincoef = coef; |
2762 | } |
2763 | return(mincoef); |
2764 | } |
2765 | |
2766 | |
2767 | static void apply_step( slv7_system_t sys) |
2768 | /** |
2769 | *** Adds sys->varstep to the variable values in block: projecting |
2770 | *** near bounds. |
2771 | **/ |
2772 | { |
2773 | FILE *lif = LIF(sys); |
2774 | int nproj = 0; |
2775 | real64 bounds_coef = 1.0; |
2776 | int32 col; |
2777 | |
2778 | if (TRUNCATE && (!sys->p.ignore_bounds)) |
2779 | bounds_coef = required_coef_to_stay_inbounds(sys); |
2780 | |
2781 | for( col=sys->varstep.rng->low; col <= sys->varstep.rng->high; col++ ) { |
2782 | struct var_variable *var; |
2783 | real64 dx,val,bnd; |
2784 | var = sys->vlist[mtx_col_to_org(sys->J.mtx,col)]; |
2785 | dx = sys->nominals.vec[col]*sys->varstep.vec[col]; |
2786 | val = var_value(var); |
2787 | if (bounds_coef < 1.0) { |
2788 | dx = dx*TOWARD_BOUNDS*bounds_coef; |
2789 | sys->varstep.vec[col] = dx/sys->nominals.vec[col]; |
2790 | } else { |
2791 | if( !sys->p.ignore_bounds ) { |
2792 | if( val + dx > (bnd=var_upper_bound(var)) ) { |
2793 | dx = TOWARD_BOUNDS*(bnd-val); |
2794 | sys->varstep.vec[col] = dx/sys->nominals.vec[col]; |
2795 | if (sys->p.output.less_important) { |
2796 | FPRINTF(lif,"%-40s ---> ", |
2797 | " Variable projected to upper bound"); |
2798 | print_var_name(lif,sys,var); PUTC('\n',lif); |
2799 | } |
2800 | ++nproj; |
2801 | } else if( val + dx < (bnd=var_lower_bound(var)) ) { |
2802 | dx = TOWARD_BOUNDS*(bnd-val); |
2803 | sys->varstep.vec[col] = dx/sys->nominals.vec[col]; |
2804 | if (sys->p.output.less_important) { |
2805 | FPRINTF(lif,"%-40s ---> ", |
2806 | " Variable projected to lower bound"); |
2807 | print_var_name(lif,sys,var); PUTC('\n',lif); |
2808 | } |
2809 | ++nproj; |
2810 | } |
2811 | } |
2812 | } |
2813 | var_set_value(var,val+dx); |
2814 | } |
2815 | |
2816 | if( !sys->p.ignore_bounds ) { |
2817 | if (nproj > 0) { |
2818 | square_norm(&(sys->varstep)); |