xref: /petsc/src/snes/impls/ntrdc/ntrdc.c (revision e600fa544e2bb197ca2af9b6e65ea465976dec56) !
1 
2 #include <../src/snes/impls/ntrdc/ntrdcimpl.h>                /*I   "petscsnes.h"   I*/
3 
4 typedef struct {
5   SNES           snes;
6   /*  Information on the regular SNES convergence test; which may have been user provided
7       Copied from tr.c (maybe able to disposed, but this is a private function) - Heeho
8       Same with SNESTR_KSPConverged_Private, SNESTR_KSPConverged_Destroy, and SNESTR_Converged_Private
9  */
10 
11   PetscErrorCode (*convtest)(KSP,PetscInt,PetscReal,KSPConvergedReason*,void*);
12   PetscErrorCode (*convdestroy)(void*);
13   void           *convctx;
14 } SNES_TRDC_KSPConverged_Ctx;
15 
16 static PetscErrorCode SNESTRDC_KSPConverged_Private(KSP ksp,PetscInt n,PetscReal rnorm,KSPConvergedReason *reason,void *cctx)
17 {
18   SNES_TRDC_KSPConverged_Ctx  *ctx = (SNES_TRDC_KSPConverged_Ctx*)cctx;
19   SNES                        snes = ctx->snes;
20   SNES_NEWTONTRDC             *neP = (SNES_NEWTONTRDC*)snes->data;
21   Vec                         x;
22   PetscReal                   nrm;
23   PetscErrorCode              ierr;
24 
25   PetscFunctionBegin;
26   ierr = (*ctx->convtest)(ksp,n,rnorm,reason,ctx->convctx);CHKERRQ(ierr);
27   if (*reason) {
28     ierr = PetscInfo(snes,"Default or user provided convergence test KSP iterations=%" PetscInt_FMT ", rnorm=%g\n",n,(double)rnorm);CHKERRQ(ierr);
29   }
30   /* Determine norm of solution */
31   ierr = KSPBuildSolution(ksp,NULL,&x);CHKERRQ(ierr);
32   ierr = VecNorm(x,NORM_2,&nrm);CHKERRQ(ierr);
33   if (nrm >= neP->delta) {
34     ierr    = PetscInfo(snes,"Ending linear iteration early, delta=%g, length=%g\n",(double)neP->delta,(double)nrm);CHKERRQ(ierr);
35     *reason = KSP_CONVERGED_STEP_LENGTH;
36   }
37   PetscFunctionReturn(0);
38 }
39 
40 static PetscErrorCode SNESTRDC_KSPConverged_Destroy(void *cctx)
41 {
42   SNES_TRDC_KSPConverged_Ctx *ctx = (SNES_TRDC_KSPConverged_Ctx*)cctx;
43   PetscErrorCode             ierr;
44 
45   PetscFunctionBegin;
46   ierr = (*ctx->convdestroy)(ctx->convctx);CHKERRQ(ierr);
47   ierr = PetscFree(ctx);CHKERRQ(ierr);
48 
49   PetscFunctionReturn(0);
50 }
51 
52 /* ---------------------------------------------------------------- */
53 /*
54    SNESTRDC_Converged_Private -test convergence JUST for
55    the trust region tolerance.
56 
57 */
58 static PetscErrorCode SNESTRDC_Converged_Private(SNES snes,PetscInt it,PetscReal xnorm,PetscReal pnorm,PetscReal fnorm,SNESConvergedReason *reason,void *dummy)
59 {
60   SNES_NEWTONTRDC  *neP = (SNES_NEWTONTRDC*)snes->data;
61   PetscErrorCode   ierr;
62 
63   PetscFunctionBegin;
64   *reason = SNES_CONVERGED_ITERATING;
65   if (neP->delta < xnorm * snes->deltatol) {
66     ierr    = PetscInfo(snes,"Diverged due to too small a trust region %g<%g*%g\n",(double)neP->delta,(double)xnorm,(double)snes->deltatol);CHKERRQ(ierr);
67     *reason = SNES_DIVERGED_TR_DELTA;
68   } else if (snes->nfuncs >= snes->max_funcs && snes->max_funcs >= 0) {
69     ierr    = PetscInfo(snes,"Exceeded maximum number of function evaluations: %" PetscInt_FMT "\n",snes->max_funcs);CHKERRQ(ierr);
70     *reason = SNES_DIVERGED_FUNCTION_COUNT;
71   }
72   PetscFunctionReturn(0);
73 }
74 
75 /*@
76   SNESNewtonTRDCGetRhoFlag - Get whether the solution update is within the trust-region.
77 
78   Input Parameters:
79 . snes - the nonlinear solver object
80 
81   Output Parameters:
82 . rho_flag: PETSC_TRUE if the solution update is in the trust-region; otherwise, PETSC_FALSE
83 
84   Level: developer
85 
86 @*/
87 PetscErrorCode  SNESNewtonTRDCGetRhoFlag(SNES snes,PetscBool *rho_flag)
88 {
89   SNES_NEWTONTRDC  *tr = (SNES_NEWTONTRDC*)snes->data;
90 
91   PetscFunctionBegin;
92   PetscValidHeaderSpecific(snes,SNES_CLASSID,1);
93   PetscValidBoolPointer(rho_flag,2);
94   *rho_flag = tr->rho_satisfied;
95   PetscFunctionReturn(0);
96 }
97 
98 /*@C
99    SNESNewtonTRDCSetPreCheck - Sets a user function that is called before the search step has been determined.
100        Allows the user a chance to change or override the trust region decision.
101 
102    Logically Collective on snes
103 
104    Input Parameters:
105 +  snes - the nonlinear solver object
106 .  func - [optional] function evaluation routine, see SNESNewtonTRDCPreCheck()  for the calling sequence
107 -  ctx  - [optional] user-defined context for private data for the function evaluation routine (may be NULL)
108 
109    Level: intermediate
110 
111    Note: This function is called BEFORE the function evaluation within the SNESNEWTONTRDC solver.
112 
113 .seealso: SNESNewtonTRDCPreCheck(), SNESNewtonTRDCGetPreCheck(), SNESNewtonTRDCSetPostCheck(), SNESNewtonTRDCGetPostCheck()
114 @*/
115 PetscErrorCode  SNESNewtonTRDCSetPreCheck(SNES snes, PetscErrorCode (*func)(SNES,Vec,Vec,PetscBool*,void*),void *ctx)
116 {
117   SNES_NEWTONTRDC  *tr = (SNES_NEWTONTRDC*)snes->data;
118 
119   PetscFunctionBegin;
120   PetscValidHeaderSpecific(snes,SNES_CLASSID,1);
121   if (func) tr->precheck    = func;
122   if (ctx)  tr->precheckctx = ctx;
123   PetscFunctionReturn(0);
124 }
125 
126 /*@C
127    SNESNewtonTRDCGetPreCheck - Gets the pre-check function
128 
129    Not collective
130 
131    Input Parameter:
132 .  snes - the nonlinear solver context
133 
134    Output Parameters:
135 +  func - [optional] function evaluation routine, see for the calling sequence SNESNewtonTRDCPreCheck()
136 -  ctx  - [optional] user-defined context for private data for the function evaluation routine (may be NULL)
137 
138    Level: intermediate
139 
140 .seealso: SNESNewtonTRDCSetPreCheck(), SNESNewtonTRDCPreCheck()
141 @*/
142 PetscErrorCode  SNESNewtonTRDCGetPreCheck(SNES snes, PetscErrorCode (**func)(SNES,Vec,Vec,PetscBool*,void*),void **ctx)
143 {
144   SNES_NEWTONTRDC  *tr = (SNES_NEWTONTRDC*)snes->data;
145 
146   PetscFunctionBegin;
147   PetscValidHeaderSpecific(snes,SNES_CLASSID,1);
148   if (func) *func = tr->precheck;
149   if (ctx)  *ctx  = tr->precheckctx;
150   PetscFunctionReturn(0);
151 }
152 
153 /*@C
154    SNESNewtonTRDCSetPostCheck - Sets a user function that is called after the search step has been determined but before the next
155        function evaluation. Allows the user a chance to change or override the decision of the line search routine
156 
157    Logically Collective on snes
158 
159    Input Parameters:
160 +  snes - the nonlinear solver object
161 .  func - [optional] function evaluation routine, see SNESNewtonTRDCPostCheck()  for the calling sequence
162 -  ctx  - [optional] user-defined context for private data for the function evaluation routine (may be NULL)
163 
164    Level: intermediate
165 
166    Note: This function is called BEFORE the function evaluation within the SNESNEWTONTRDC solver while the function set in
167    SNESLineSearchSetPostCheck() is called AFTER the function evaluation.
168 
169 .seealso: SNESNewtonTRDCPostCheck(), SNESNewtonTRDCGetPostCheck()
170 @*/
171 PetscErrorCode  SNESNewtonTRDCSetPostCheck(SNES snes,PetscErrorCode (*func)(SNES,Vec,Vec,Vec,PetscBool*,PetscBool*,void*),void *ctx)
172 {
173   SNES_NEWTONTRDC  *tr = (SNES_NEWTONTRDC*)snes->data;
174 
175   PetscFunctionBegin;
176   PetscValidHeaderSpecific(snes,SNES_CLASSID,1);
177   if (func) tr->postcheck    = func;
178   if (ctx)  tr->postcheckctx = ctx;
179   PetscFunctionReturn(0);
180 }
181 
182 /*@C
183    SNESNewtonTRDCGetPostCheck - Gets the post-check function
184 
185    Not collective
186 
187    Input Parameter:
188 .  snes - the nonlinear solver context
189 
190    Output Parameters:
191 +  func - [optional] function evaluation routine, see for the calling sequence SNESNewtonTRDCPostCheck()
192 -  ctx  - [optional] user-defined context for private data for the function evaluation routine (may be NULL)
193 
194    Level: intermediate
195 
196 .seealso: SNESNewtonTRDCSetPostCheck(), SNESNewtonTRDCPostCheck()
197 @*/
198 PetscErrorCode  SNESNewtonTRDCGetPostCheck(SNES snes,PetscErrorCode (**func)(SNES,Vec,Vec,Vec,PetscBool*,PetscBool*,void*),void **ctx)
199 {
200   SNES_NEWTONTRDC  *tr = (SNES_NEWTONTRDC*)snes->data;
201 
202   PetscFunctionBegin;
203   PetscValidHeaderSpecific(snes,SNES_CLASSID,1);
204   if (func) *func = tr->postcheck;
205   if (ctx)  *ctx  = tr->postcheckctx;
206   PetscFunctionReturn(0);
207 }
208 
209 /*@C
210    SNESNewtonTRDCPreCheck - Called before the step has been determined in SNESNEWTONTRDC
211 
212    Logically Collective on snes
213 
214    Input Parameters:
215 +  snes - the solver
216 .  X - The last solution
217 -  Y - The step direction
218 
219    Output Parameters:
220 .  changed_Y - Indicator that the step direction Y has been changed.
221 
222    Level: developer
223 
224 .seealso: SNESNewtonTRDCSetPreCheck(), SNESNewtonTRDCGetPreCheck()
225 @*/
226 static PetscErrorCode SNESNewtonTRDCPreCheck(SNES snes,Vec X,Vec Y,PetscBool *changed_Y)
227 {
228   SNES_NEWTONTRDC  *tr = (SNES_NEWTONTRDC*)snes->data;
229   PetscErrorCode   ierr;
230 
231   PetscFunctionBegin;
232   *changed_Y = PETSC_FALSE;
233   if (tr->precheck) {
234     ierr = (*tr->precheck)(snes,X,Y,changed_Y,tr->precheckctx);CHKERRQ(ierr);
235     PetscValidLogicalCollectiveBool(snes,*changed_Y,4);
236   }
237   PetscFunctionReturn(0);
238 }
239 
240 /*@C
241    SNESNewtonTRDCPostCheck - Called after the step has been determined in SNESNEWTONTRDC but before the function evaluation
242 
243    Logically Collective on snes
244 
245    Input Parameters:
246 +  snes - the solver.  X - The last solution
247 .  Y - The full step direction
248 -  W - The updated solution, W = X - Y
249 
250    Output Parameters:
251 +  changed_Y - indicator if step has been changed
252 -  changed_W - Indicator if the new candidate solution W has been changed.
253 
254    Notes:
255      If Y is changed then W is recomputed as X - Y
256 
257    Level: developer
258 
259 .seealso: SNESNewtonTRDCSetPostCheck(), SNESNewtonTRDCGetPostCheck()
260 @*/
261 static PetscErrorCode SNESNewtonTRDCPostCheck(SNES snes,Vec X,Vec Y,Vec W,PetscBool *changed_Y,PetscBool *changed_W)
262 {
263   SNES_NEWTONTRDC  *tr = (SNES_NEWTONTRDC*)snes->data;
264   PetscErrorCode   ierr;
265 
266   PetscFunctionBegin;
267   *changed_Y = PETSC_FALSE;
268   *changed_W = PETSC_FALSE;
269   if (tr->postcheck) {
270     ierr = (*tr->postcheck)(snes,X,Y,W,changed_Y,changed_W,tr->postcheckctx);CHKERRQ(ierr);
271     PetscValidLogicalCollectiveBool(snes,*changed_Y,5);
272     PetscValidLogicalCollectiveBool(snes,*changed_W,6);
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 /*
278    SNESSolve_NEWTONTRDC - Implements Newton's Method with trust-region subproblem and adds dogleg Cauchy
279    (Steepest Descent direction) step and direction if the trust region is not satisfied for solving system of
280    nonlinear equations
281 
282 */
283 static PetscErrorCode SNESSolve_NEWTONTRDC(SNES snes)
284 {
285   SNES_NEWTONTRDC            *neP = (SNES_NEWTONTRDC*)snes->data;
286   Vec                        X,F,Y,G,W,GradF,YNtmp;
287   Vec                        YCtmp;
288   Mat                        jac;
289   PetscErrorCode             ierr;
290   PetscInt                   maxits,i,j,lits,inner_count,bs;
291   PetscReal                  rho,fnorm,gnorm,xnorm=0,delta,ynorm,temp_xnorm,temp_ynorm;  /* TRDC inner iteration */
292   PetscReal                  inorms[99]; /* need to make it dynamic eventually, fixed max block size of 99 for now */
293   PetscReal                  deltaM,ynnorm,f0,mp,gTy,g,yTHy;  /* rho calculation */
294   PetscReal                  auk,gfnorm,ycnorm,c0,c1,c2,tau,tau_pos,tau_neg,gTBg;  /* Cauchy Point */
295   KSP                        ksp;
296   SNESConvergedReason        reason = SNES_CONVERGED_ITERATING;
297   PetscBool                  breakout = PETSC_FALSE;
298   SNES_TRDC_KSPConverged_Ctx *ctx;
299   PetscErrorCode             (*convtest)(KSP,PetscInt,PetscReal,KSPConvergedReason*,void*),(*convdestroy)(void*);
300   void                       *convctx;
301 
302   PetscFunctionBegin;
303   maxits = snes->max_its;               /* maximum number of iterations */
304   X      = snes->vec_sol;               /* solution vector */
305   F      = snes->vec_func;              /* residual vector */
306   Y      = snes->work[0];               /* update vector */
307   G      = snes->work[1];               /* updated residual */
308   W      = snes->work[2];               /* temporary vector */
309   GradF  = snes->work[3];               /* grad f = J^T F */
310   YNtmp  = snes->work[4];               /* Newton solution */
311   YCtmp  = snes->work[5];               /* Cauchy solution */
312 
313   PetscCheckFalse(snes->xl || snes->xu || snes->ops->computevariablebounds,PetscObjectComm((PetscObject)snes),PETSC_ERR_ARG_WRONGSTATE, "SNES solver %s does not support bounds", ((PetscObject)snes)->type_name);
314 
315   ierr = VecGetBlockSize(YNtmp,&bs);CHKERRQ(ierr);
316 
317   ierr       = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr);
318   snes->iter = 0;
319   ierr       = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr);
320 
321   /* Set the linear stopping criteria to use the More' trick. From tr.c */
322   ierr = SNESGetKSP(snes,&ksp);CHKERRQ(ierr);
323   ierr = KSPGetConvergenceTest(ksp,&convtest,&convctx,&convdestroy);CHKERRQ(ierr);
324   if (convtest != SNESTRDC_KSPConverged_Private) {
325     ierr                  = PetscNew(&ctx);CHKERRQ(ierr);
326     ctx->snes             = snes;
327     ierr                  = KSPGetAndClearConvergenceTest(ksp,&ctx->convtest,&ctx->convctx,&ctx->convdestroy);CHKERRQ(ierr);
328     ierr                  = KSPSetConvergenceTest(ksp,SNESTRDC_KSPConverged_Private,ctx,SNESTRDC_KSPConverged_Destroy);CHKERRQ(ierr);
329     ierr                  = PetscInfo(snes,"Using Krylov convergence test SNESTRDC_KSPConverged_Private\n");CHKERRQ(ierr);
330   }
331 
332   if (!snes->vec_func_init_set) {
333     ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr);          /* F(X) */
334   } else snes->vec_func_init_set = PETSC_FALSE;
335 
336   ierr = VecNorm(F,NORM_2,&fnorm);CHKERRQ(ierr);             /* fnorm <- || F || */
337   SNESCheckFunctionNorm(snes,fnorm);
338   ierr = VecNorm(X,NORM_2,&xnorm);CHKERRQ(ierr);             /* xnorm <- || X || */
339 
340   ierr       = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr);
341   snes->norm = fnorm;
342   ierr       = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr);
343   delta      = xnorm ? neP->delta0*xnorm : neP->delta0;  /* initial trust region size scaled by xnorm */
344   deltaM     = xnorm ? neP->deltaM*xnorm : neP->deltaM;  /* maximum trust region size scaled by xnorm */
345   neP->delta = delta;
346   ierr       = SNESLogConvergenceHistory(snes,fnorm,0);CHKERRQ(ierr);
347   ierr       = SNESMonitor(snes,0,fnorm);CHKERRQ(ierr);
348 
349   neP->rho_satisfied = PETSC_FALSE;
350 
351   /* test convergence */
352   ierr = (*snes->ops->converged)(snes,snes->iter,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr);
353   if (snes->reason) PetscFunctionReturn(0);
354 
355   for (i=0; i<maxits; i++) {
356     PetscBool changed_y;
357     PetscBool changed_w;
358 
359     /* dogleg method */
360     ierr = SNESComputeJacobian(snes,X,snes->jacobian,snes->jacobian_pre);CHKERRQ(ierr);
361     SNESCheckJacobianDomainerror(snes);
362     ierr = KSPSetOperators(snes->ksp,snes->jacobian,snes->jacobian);CHKERRQ(ierr);
363     ierr = KSPSolve(snes->ksp,F,YNtmp);CHKERRQ(ierr);   /* Quasi Newton Solution */
364     SNESCheckKSPSolve(snes);  /* this is necessary but old tr.c did not have it*/
365     ierr = KSPGetIterationNumber(snes->ksp,&lits);CHKERRQ(ierr);
366     ierr = SNESGetJacobian(snes,&jac,NULL,NULL,NULL);CHKERRQ(ierr);
367 
368     /* rescale Jacobian, Newton solution update, and re-calculate delta for multiphase (multivariable)
369        for inner iteration and Cauchy direction calculation
370     */
371     if (bs > 1 && neP->auto_scale_multiphase) {
372       ierr = VecStrideNormAll(YNtmp,NORM_INFINITY,inorms);CHKERRQ(ierr);
373       for (j=0; j<bs; j++) {
374         if (neP->auto_scale_max > 1.0) {
375           if (inorms[j] < 1.0/neP->auto_scale_max) {
376             inorms[j] = 1.0/neP->auto_scale_max;
377           }
378         }
379         ierr = VecStrideSet(W,j,inorms[j]);CHKERRQ(ierr);
380         ierr = VecStrideScale(YNtmp,j,1.0/inorms[j]);
381         ierr = VecStrideScale(X,j,1.0/inorms[j]);
382       }
383       ierr = VecNorm(X,NORM_2,&xnorm);CHKERRQ(ierr);
384       if (i == 0) {
385         delta = neP->delta0*xnorm;
386       } else {
387         delta = neP->delta*xnorm;
388       }
389       deltaM = neP->deltaM*xnorm;
390       ierr = MatDiagonalScale(jac,PETSC_NULL,W);CHKERRQ(ierr);
391     }
392 
393     /* calculating GradF of minimization function */
394     ierr = MatMultTranspose(jac,F,GradF);CHKERRQ(ierr);  /* grad f = J^T F */
395     ierr = VecNorm(YNtmp,NORM_2,&ynnorm);CHKERRQ(ierr);  /* ynnorm <- || Y_newton || */
396 
397     inner_count = 0;
398     neP->rho_satisfied = PETSC_FALSE;
399     while (1) {
400       if (ynnorm <= delta) {  /* see if the Newton solution is within the trust region */
401         ierr = VecCopy(YNtmp,Y);CHKERRQ(ierr);
402       } else if (neP->use_cauchy) { /* use Cauchy direction if enabled */
403         ierr = MatMult(jac,GradF,W);CHKERRQ(ierr);
404         ierr = VecDotRealPart(W,W,&gTBg);CHKERRQ(ierr);  /* completes GradF^T J^T J GradF */
405         ierr = VecNorm(GradF,NORM_2,&gfnorm);CHKERRQ(ierr);  /* grad f norm <- || grad f || */
406         if (gTBg <= 0.0) {
407           auk = PETSC_MAX_REAL;
408         } else {
409           auk = PetscSqr(gfnorm)/gTBg;
410         }
411         auk  = PetscMin(delta/gfnorm,auk);
412         ierr = VecCopy(GradF,YCtmp);CHKERRQ(ierr);  /* this could be improved */
413         ierr = VecScale(YCtmp,auk);CHKERRQ(ierr);  /* YCtmp, Cauchy solution*/
414         ierr = VecNorm(YCtmp,NORM_2,&ycnorm);CHKERRQ(ierr);  /* ycnorm <- || Y_cauchy || */
415         if (ycnorm >= delta) {  /* see if the Cauchy solution meets the criteria */
416             ierr = VecCopy(YCtmp,Y);CHKERRQ(ierr);
417             ierr = PetscInfo(snes,"DL evaluated. delta: %8.4e, ynnorm: %8.4e, ycnorm: %8.4e\n",(double)delta,(double)ynnorm,(double)ycnorm);CHKERRQ(ierr);
418         } else {  /* take ratio, tau, of Cauchy and Newton direction and step */
419           ierr    = VecAXPY(YNtmp,-1.0,YCtmp);CHKERRQ(ierr);  /* YCtmp = A, YNtmp = B */
420           ierr    = VecNorm(YNtmp,NORM_2,&c0);CHKERRQ(ierr);  /* this could be improved */
421           c0      = PetscSqr(c0);
422           ierr    = VecDotRealPart(YCtmp,YNtmp,&c1);CHKERRQ(ierr);
423           c1      = 2.0*c1;
424           ierr    = VecNorm(YCtmp,NORM_2,&c2);CHKERRQ(ierr);  /* this could be improved */
425           c2      = PetscSqr(c2) - PetscSqr(delta);
426           tau_pos = (c1 + PetscSqrtReal(PetscSqr(c1) - 4.*c0*c2))/(2.*c0); /* quadratic formula */
427           tau_neg = (c1 - PetscSqrtReal(PetscSqr(c1) - 4.*c0*c2))/(2.*c0);
428           tau     = PetscMax(tau_pos, tau_neg);  /* can tau_neg > tau_pos? I don't think so, but just in case. */
429           ierr    = PetscInfo(snes,"DL evaluated. tau: %8.4e, ynnorm: %8.4e, ycnorm: %8.4e\n",(double)tau,(double)ynnorm,(double)ycnorm);CHKERRQ(ierr);
430           ierr    = VecWAXPY(W,tau,YNtmp,YCtmp);CHKERRQ(ierr);
431           ierr    = VecAXPY(W,-tau,YCtmp);CHKERRQ(ierr);
432           ierr    = VecCopy(W, Y);CHKERRQ(ierr); /* this could be improved */
433         }
434       } else {
435         /* if Cauchy is disabled, only use Newton direction */
436         auk = delta/ynnorm;
437         ierr = VecScale(YNtmp,auk);CHKERRQ(ierr);
438         ierr = VecCopy(YNtmp,Y);CHKERRQ(ierr); /* this could be improved (many VecCopy, VecNorm)*/
439       }
440 
441       ierr = VecNorm(Y,NORM_2,&ynorm);CHKERRQ(ierr);  /* compute the final ynorm  */
442       f0 = 0.5*PetscSqr(fnorm);  /* minimizing function f(X) */
443       ierr = MatMult(jac,Y,W);CHKERRQ(ierr);
444       ierr = VecDotRealPart(W,W,&yTHy);CHKERRQ(ierr);  /* completes GradY^T J^T J GradY */
445       ierr = VecDotRealPart(GradF,Y,&gTy);CHKERRQ(ierr);
446       mp = f0 - gTy + 0.5*yTHy;  /* quadratic model to satisfy, -gTy because our update is X-Y*/
447 
448       /* scale back solution update */
449       if (bs > 1 && neP->auto_scale_multiphase) {
450         for (j=0; j<bs; j++) {
451           ierr = VecStrideScale(Y,j,inorms[j]);
452           if (inner_count == 0) {
453             /* TRDC inner algorithm does not need scaled X after calculating delta in the outer iteration */
454             /* need to scale back X to match Y and provide proper update to the external code */
455             ierr = VecStrideScale(X,j,inorms[j]);
456           }
457         }
458         if (inner_count == 0) {ierr = VecNorm(X,NORM_2,&temp_xnorm);CHKERRQ(ierr);}  /* only in the first iteration */
459         ierr = VecNorm(Y,NORM_2,&temp_ynorm);CHKERRQ(ierr);
460       } else {
461         temp_xnorm = xnorm;
462         temp_ynorm = ynorm;
463       }
464       inner_count++;
465 
466       /* Evaluate the solution to meet the improvement ratio criteria */
467       ierr = SNESNewtonTRDCPreCheck(snes,X,Y,&changed_y);CHKERRQ(ierr);
468       ierr = VecWAXPY(W,-1.0,Y,X);CHKERRQ(ierr);
469       ierr = SNESNewtonTRDCPostCheck(snes,X,Y,W,&changed_y,&changed_w);CHKERRQ(ierr);
470       if (changed_y) {ierr = VecWAXPY(W,-1.0,Y,X);CHKERRQ(ierr);}
471       ierr = VecCopy(Y,snes->vec_sol_update);CHKERRQ(ierr);
472       ierr = SNESComputeFunction(snes,W,G);CHKERRQ(ierr); /*  F(X-Y) = G */
473       ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr);      /* gnorm <- || g || */
474       SNESCheckFunctionNorm(snes,gnorm);
475       g = 0.5*PetscSqr(gnorm); /* minimizing function g(W) */
476       if (f0 == mp) rho = 0.0;
477       else rho = (f0 - g)/(f0 - mp);  /* actual improvement over predicted improvement */
478 
479       if (rho < neP->eta2) {
480         delta *= neP->t1;  /* shrink the region */
481       } else if (rho > neP->eta3) {
482         delta = PetscMin(neP->t2*delta,deltaM); /* expand the region, but not greater than deltaM */
483       }
484 
485       neP->delta = delta;
486       if (rho >= neP->eta1) {
487         /* unscale delta and xnorm before going to the next outer iteration */
488         if (bs > 1 && neP->auto_scale_multiphase) {
489           neP->delta = delta/xnorm;
490           xnorm      = temp_xnorm;
491           ynorm      = temp_ynorm;
492         }
493         neP->rho_satisfied = PETSC_TRUE;
494         break;  /* the improvement ratio is satisfactory */
495       }
496       ierr = PetscInfo(snes,"Trying again in smaller region\n");CHKERRQ(ierr);
497 
498       /* check to see if progress is hopeless */
499       neP->itflag = PETSC_FALSE;
500       /* both delta, ynorm, and xnorm are either scaled or unscaled */
501       ierr        = SNESTRDC_Converged_Private(snes,snes->iter,xnorm,ynorm,fnorm,&reason,snes->cnvP);CHKERRQ(ierr);
502       if (!reason) {
503          /* temp_xnorm, temp_ynorm is always unscaled */
504          /* also the inner iteration already calculated the Jacobian and solved the matrix */
505          /* therefore, it should be passing iteration number of iter+1 instead of iter+0 in the first iteration and after */
506          ierr = (*snes->ops->converged)(snes,snes->iter+1,temp_xnorm,temp_ynorm,fnorm,&reason,snes->cnvP);CHKERRQ(ierr);
507       }
508       /* if multiphase state changes, break out inner iteration */
509       if (reason == SNES_BREAKOUT_INNER_ITER) {
510         if (bs > 1 && neP->auto_scale_multiphase) {
511           /* unscale delta and xnorm before going to the next outer iteration */
512           neP->delta = delta/xnorm;
513           xnorm      = temp_xnorm;
514           ynorm      = temp_ynorm;
515         }
516         reason = SNES_CONVERGED_ITERATING;
517         break;
518       }
519       if (reason == SNES_CONVERGED_SNORM_RELATIVE) reason = SNES_DIVERGED_INNER;
520       if (reason) {
521         if (reason < 0) {
522             /* We're not progressing, so return with the current iterate */
523             ierr     = SNESMonitor(snes,i+1,fnorm);CHKERRQ(ierr);
524             breakout = PETSC_TRUE;
525             break;
526         } else if (reason > 0) {
527             /* We're converged, so return with the current iterate and update solution */
528             ierr     = SNESMonitor(snes,i+1,fnorm);CHKERRQ(ierr);
529             breakout = PETSC_FALSE;
530             break;
531         }
532       }
533       snes->numFailures++;
534     }
535     if (!breakout) {
536       /* Update function and solution vectors */
537       fnorm       = gnorm;
538       ierr        = VecCopy(G,F);CHKERRQ(ierr);
539       ierr        = VecCopy(W,X);CHKERRQ(ierr);
540       /* Monitor convergence */
541       ierr        = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr);
542       snes->iter  = i+1;
543       snes->norm  = fnorm;
544       snes->xnorm = xnorm;
545       snes->ynorm = ynorm;
546       ierr        = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr);
547       ierr        = SNESLogConvergenceHistory(snes,snes->norm,lits);CHKERRQ(ierr);
548       ierr        = SNESMonitor(snes,snes->iter,snes->norm);CHKERRQ(ierr);
549       /* Test for convergence, xnorm = || X || */
550       neP->itflag = PETSC_TRUE;
551       if (snes->ops->converged != SNESConvergedSkip) {ierr = VecNorm(X,NORM_2,&xnorm);CHKERRQ(ierr);}
552       ierr = (*snes->ops->converged)(snes,snes->iter,xnorm,ynorm,fnorm,&reason,snes->cnvP);CHKERRQ(ierr);
553       if (reason) break;
554     } else break;
555   }
556 
557   /* ierr         = PetscFree(inorms);CHKERRQ(ierr); */
558   if (i == maxits) {
559     ierr = PetscInfo(snes,"Maximum number of iterations has been reached: %" PetscInt_FMT "\n",maxits);CHKERRQ(ierr);
560     if (!reason) reason = SNES_DIVERGED_MAX_IT;
561   }
562   ierr         = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr);
563   snes->reason = reason;
564   ierr         = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr);
565   if (convtest != SNESTRDC_KSPConverged_Private) {
566     ierr       = KSPGetAndClearConvergenceTest(ksp,&ctx->convtest,&ctx->convctx,&ctx->convdestroy);CHKERRQ(ierr);
567     ierr       = PetscFree(ctx);CHKERRQ(ierr);
568     ierr       = KSPSetConvergenceTest(ksp,convtest,convctx,convdestroy);CHKERRQ(ierr);
569   }
570   PetscFunctionReturn(0);
571 }
572 
573 /*------------------------------------------------------------*/
574 static PetscErrorCode SNESSetUp_NEWTONTRDC(SNES snes)
575 {
576   PetscErrorCode ierr;
577 
578   PetscFunctionBegin;
579   ierr = SNESSetWorkVecs(snes,6);CHKERRQ(ierr);
580   ierr = SNESSetUpMatrices(snes);CHKERRQ(ierr);
581   PetscFunctionReturn(0);
582 }
583 
584 PetscErrorCode SNESReset_NEWTONTRDC(SNES snes)
585 {
586 
587   PetscFunctionBegin;
588   PetscFunctionReturn(0);
589 }
590 
591 static PetscErrorCode SNESDestroy_NEWTONTRDC(SNES snes)
592 {
593   PetscErrorCode ierr;
594 
595   PetscFunctionBegin;
596   ierr = SNESReset_NEWTONTRDC(snes);CHKERRQ(ierr);
597   ierr = PetscFree(snes->data);CHKERRQ(ierr);
598   PetscFunctionReturn(0);
599 }
600 /*------------------------------------------------------------*/
601 
602 static PetscErrorCode SNESSetFromOptions_NEWTONTRDC(PetscOptionItems *PetscOptionsObject,SNES snes)
603 {
604   SNES_NEWTONTRDC  *ctx = (SNES_NEWTONTRDC*)snes->data;
605   PetscErrorCode   ierr;
606 
607   PetscFunctionBegin;
608   ierr = PetscOptionsHead(PetscOptionsObject,"SNES trust region options for nonlinear equations");CHKERRQ(ierr);
609   ierr = PetscOptionsReal("-snes_trdc_tol","Trust region tolerance","SNESSetTrustRegionTolerance",snes->deltatol,&snes->deltatol,NULL);CHKERRQ(ierr);
610   ierr = PetscOptionsReal("-snes_trdc_eta1","eta1","None",ctx->eta1,&ctx->eta1,NULL);CHKERRQ(ierr);
611   ierr = PetscOptionsReal("-snes_trdc_eta2","eta2","None",ctx->eta2,&ctx->eta2,NULL);CHKERRQ(ierr);
612   ierr = PetscOptionsReal("-snes_trdc_eta3","eta3","None",ctx->eta3,&ctx->eta3,NULL);CHKERRQ(ierr);
613   ierr = PetscOptionsReal("-snes_trdc_t1","t1","None",ctx->t1,&ctx->t1,NULL);CHKERRQ(ierr);
614   ierr = PetscOptionsReal("-snes_trdc_t2","t2","None",ctx->t2,&ctx->t2,NULL);CHKERRQ(ierr);
615   ierr = PetscOptionsReal("-snes_trdc_deltaM","deltaM","None",ctx->deltaM,&ctx->deltaM,NULL);CHKERRQ(ierr);
616   ierr = PetscOptionsReal("-snes_trdc_delta0","delta0","None",ctx->delta0,&ctx->delta0,NULL);CHKERRQ(ierr);
617   ierr = PetscOptionsReal("-snes_trdc_auto_scale_max","auto_scale_max","None",ctx->auto_scale_max,&ctx->auto_scale_max,NULL);CHKERRQ(ierr);
618   ierr = PetscOptionsBool("-snes_trdc_use_cauchy","use_cauchy","use Cauchy step and direction",ctx->use_cauchy,&ctx->use_cauchy,NULL);CHKERRQ(ierr);
619   ierr = PetscOptionsBool("-snes_trdc_auto_scale_multiphase","auto_scale_multiphase","Auto scaling for proper cauchy direction",ctx->auto_scale_multiphase,&ctx->auto_scale_multiphase,NULL);CHKERRQ(ierr);
620   ierr = PetscOptionsTail();CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 static PetscErrorCode SNESView_NEWTONTRDC(SNES snes,PetscViewer viewer)
625 {
626   SNES_NEWTONTRDC  *tr = (SNES_NEWTONTRDC*)snes->data;
627   PetscErrorCode   ierr;
628   PetscBool        iascii;
629 
630   PetscFunctionBegin;
631   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
632   if (iascii) {
633     ierr = PetscViewerASCIIPrintf(viewer,"  Trust region tolerance (-snes_trtol)\n",(double)snes->deltatol);CHKERRQ(ierr);
634     ierr = PetscViewerASCIIPrintf(viewer,"  eta1=%g, eta2=%g, eta3=%g\n",(double)tr->eta1,(double)tr->eta2,(double)tr->eta3);CHKERRQ(ierr);
635     ierr = PetscViewerASCIIPrintf(viewer,"  delta0=%g, t1=%g, t2=%g, deltaM=%g\n",(double)tr->delta0,(double)tr->t1,(double)tr->t2,(double)tr->deltaM);CHKERRQ(ierr);
636   }
637   PetscFunctionReturn(0);
638 }
639 /* ------------------------------------------------------------ */
640 /*MC
641       SNESNEWTONTRDC - Newton based nonlinear solver that uses trust-region dogleg method with Cauchy direction
642 
643    Options Database:
644 +   -snes_trdc_tol <tol> - trust region tolerance
645 .   -snes_trdc_eta1 <eta1> - trust region parameter 0.0 <= eta1 <= eta2, rho >= eta1 breaks out of the inner iteration (default: eta1=0.001)
646 .   -snes_trdc_eta2 <eta2> - trust region parameter 0.0 <= eta1 <= eta2, rho <= eta2 shrinks the trust region (default: eta2=0.25)
647 .   -snes_trdc_eta3 <eta3> - trust region parameter eta3 > eta2, rho >= eta3 expands the trust region (default: eta3=0.75)
648 .   -snes_trdc_t1 <t1> - trust region parameter, shrinking factor of trust region (default: 0.25)
649 .   -snes_trdc_t2 <t2> - trust region parameter, expanding factor of trust region (default: 2.0)
650 .   -snes_trdc_deltaM <deltaM> - trust region parameter, max size of trust region, deltaM*norm2(x) (default: 0.5)
651 .   -snes_trdc_delta0 <delta0> - trust region parameter, initial size of trust region, delta0*norm2(x) (default: 0.1)
652 .   -snes_trdc_auto_scale_max <auto_scale_max> - used with auto_scale_multiphase, caps the maximum auto-scaling factor
653 .   -snes_trdc_use_cauchy <use_cauchy> - True uses dogleg Cauchy (Steepest Descent direction) step & direction in the trust region algorithm
654 -   -snes_trdc_auto_scale_multiphase <auto_scale_multiphase> - True turns on auto-scaling for multivariable block matrix for Cauchy and trust region
655 
656     Notes:
657     The algorithm is taken from "Linear and Nonlinear Solvers for Simulating Multiphase Flow
658     within Large-Scale Engineered Subsurface Systems" by Heeho D. Park, Glenn E. Hammond,
659     Albert J. Valocchi, Tara LaForce.
660 
661    Level: intermediate
662 
663 .seealso:  SNESCreate(), SNES, SNESSetType(), SNESNEWTONLS, SNESSetTrustRegionTolerance(), SNESNEWTONTRDC
664 
665 M*/
666 PETSC_EXTERN PetscErrorCode SNESCreate_NEWTONTRDC(SNES snes)
667 {
668   SNES_NEWTONTRDC  *neP;
669   PetscErrorCode   ierr;
670 
671   PetscFunctionBegin;
672   snes->ops->setup          = SNESSetUp_NEWTONTRDC;
673   snes->ops->solve          = SNESSolve_NEWTONTRDC;
674   snes->ops->destroy        = SNESDestroy_NEWTONTRDC;
675   snes->ops->setfromoptions = SNESSetFromOptions_NEWTONTRDC;
676   snes->ops->view           = SNESView_NEWTONTRDC;
677   snes->ops->reset          = SNESReset_NEWTONTRDC;
678 
679   snes->usesksp = PETSC_TRUE;
680   snes->usesnpc = PETSC_FALSE;
681 
682   snes->alwayscomputesfinalresidual = PETSC_TRUE;
683 
684   ierr        = PetscNewLog(snes,&neP);CHKERRQ(ierr);
685   snes->data  = (void*)neP;
686   neP->delta  = 0.0;
687   neP->delta0 = 0.1;
688   neP->eta1   = 0.001;
689   neP->eta2   = 0.25;
690   neP->eta3   = 0.75;
691   neP->t1     = 0.25;
692   neP->t2     = 2.0;
693   neP->deltaM = 0.5;
694   neP->sigma  = 0.0001;
695   neP->itflag = PETSC_FALSE;
696   neP->rnorm0 = 0.0;
697   neP->ttol   = 0.0;
698   neP->use_cauchy            = PETSC_TRUE;
699   neP->auto_scale_multiphase = PETSC_FALSE;
700   neP->auto_scale_max        = -1.0;
701   neP->rho_satisfied         = PETSC_FALSE;
702   snes->deltatol             = 1.e-12;
703 
704   /* for multiphase (multivariable) scaling */
705   /* may be used for dynamic allocation of inorms, but it fails snes_tutorials-ex3_13
706      on test forced DIVERGED_JACOBIAN_DOMAIN test. I will use static array for now.
707   ierr = VecGetBlockSize(snes->work[0],&neP->bs);CHKERRQ(ierr);
708   ierr = PetscCalloc1(neP->bs,&neP->inorms);CHKERRQ(ierr);
709   */
710 
711   PetscFunctionReturn(0);
712 }
713