xref: /petsc/src/tao/bound/impls/bncg/bncg.c (revision 37eeb8152ec6a2cf24186d3591c2c5de5dfd8fa5)
1 #include <petsctaolinesearch.h>
2 #include <../src/tao/bound/impls/bncg/bncg.h>
3 #include <petscksp.h>
4 
5 #define CG_GradientDescent      0
6 #define CG_HestenesStiefel      1
7 #define CG_FletcherReeves       2
8 #define CG_PolakRibierePolyak   3
9 #define CG_PolakRibierePlus     4
10 #define CG_DaiYuan              5
11 #define CG_HagerZhang           6
12 #define CG_DaiKou               7
13 #define CG_KouDai               8
14 #define CG_SSML_BFGS            9
15 #define CG_SSML_DFP             10
16 #define CG_SSML_BROYDEN         11
17 #define CG_PCGradientDescent    12
18 #define CGTypes                 13
19 
20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"};
21 
22 #define CG_AS_NONE       0
23 #define CG_AS_BERTSEKAS  1
24 #define CG_AS_SIZE       2
25 
26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"};
27 
28 PetscErrorCode TaoBNCGSetRecycleFlag(Tao tao, PetscBool recycle)
29 {
30   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
31 
32   PetscFunctionBegin;
33   cg->recycle = recycle;
34   PetscFunctionReturn(0);
35 }
36 
37 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType)
38 {
39   PetscErrorCode               ierr;
40   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
41 
42   PetscFunctionBegin;
43   ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr);
44   if (cg->inactive_idx) {
45     ierr = ISDuplicate(cg->inactive_idx, &cg->inactive_old);CHKERRQ(ierr);
46     ierr = ISCopy(cg->inactive_idx, cg->inactive_old);CHKERRQ(ierr);
47   }
48   switch (asType) {
49   case CG_AS_NONE:
50     ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr);
51     ierr = VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);CHKERRQ(ierr);
52     ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr);
53     ierr = ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);CHKERRQ(ierr);
54     break;
55 
56   case CG_AS_BERTSEKAS:
57     /* Use gradient descent to estimate the active set */
58     ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr);
59     ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr);
60     ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol,
61                                    &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr);
62     break;
63 
64   default:
65     break;
66   }
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step)
71 {
72   PetscErrorCode               ierr;
73   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
74 
75   PetscFunctionBegin;
76   switch (asType) {
77   case CG_AS_NONE:
78     ierr = VecISSet(step, cg->active_idx, 0.0);CHKERRQ(ierr);
79     break;
80 
81   case CG_AS_BERTSEKAS:
82     ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step);CHKERRQ(ierr);
83     break;
84 
85   default:
86     break;
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 static PetscErrorCode TaoSolve_BNCG(Tao tao)
92 {
93   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
94   PetscErrorCode               ierr;
95   PetscReal                    step=1.0,gnorm,gnorm2, resnorm;
96   PetscInt                     nDiff;
97 
98   PetscFunctionBegin;
99   /*   Project the current point onto the feasible set */
100   ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr);
101   ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr);
102 
103   /* Project the initial point onto the feasible region */
104   ierr = TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);CHKERRQ(ierr);
105 
106   if (nDiff > 0 || !cg->recycle){
107     ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);CHKERRQ(ierr);
108   }
109   ierr = VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);CHKERRQ(ierr);
110   if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
111 
112   /* Estimate the active set and compute the projected gradient */
113   ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr);
114 
115   /* Project the gradient and calculate the norm */
116   ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
117   ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr);
118   ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
119   gnorm2 = gnorm*gnorm;
120 
121   /* Initialize counters */
122   tao->niter = 0;
123   cg->ls_fails = cg->descent_error = 0;
124   cg->resets = -1;
125   cg->skipped_updates = cg->pure_gd_steps = 0;
126   cg->iter_quad = 0;
127 
128   /* Convergence test at the starting point. */
129   tao->reason = TAO_CONTINUE_ITERATING;
130 
131   ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr);
132   ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr);
133   if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
134   ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
135   ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr);
136   ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
137   if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
138   /* Calculate initial direction. */
139   if (!cg->recycle) {
140     /* We are not recycling a solution/history from a past TaoSolve */
141     ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
142   }
143   /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */
144   while(1) {
145     /* Call general purpose update function */
146     if (tao->ops->update) {
147       ierr = (*tao->ops->update)(tao, tao->niter, tao->user_update);CHKERRQ(ierr);
148     }
149     ierr = TaoBNCGConductIteration(tao, gnorm);CHKERRQ(ierr);
150     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode TaoSetUp_BNCG(Tao tao)
156 {
157   TAO_BNCG         *cg = (TAO_BNCG*)tao->data;
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   if (!tao->gradient) {
162     ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr);
163   }
164   if (!tao->stepdirection) {
165     ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr);
166   }
167   if (!cg->W) {
168     ierr = VecDuplicate(tao->solution,&cg->W);CHKERRQ(ierr);
169   }
170   if (!cg->work) {
171     ierr = VecDuplicate(tao->solution,&cg->work);CHKERRQ(ierr);
172   }
173   if (!cg->sk) {
174     ierr = VecDuplicate(tao->solution,&cg->sk);CHKERRQ(ierr);
175   }
176   if (!cg->yk) {
177     ierr = VecDuplicate(tao->gradient,&cg->yk);CHKERRQ(ierr);
178   }
179   if (!cg->X_old) {
180     ierr = VecDuplicate(tao->solution,&cg->X_old);CHKERRQ(ierr);
181   }
182   if (!cg->G_old) {
183     ierr = VecDuplicate(tao->gradient,&cg->G_old);CHKERRQ(ierr);
184   }
185   if (cg->diag_scaling){
186     ierr = VecDuplicate(tao->solution,&cg->d_work);CHKERRQ(ierr);
187     ierr = VecDuplicate(tao->solution,&cg->y_work);CHKERRQ(ierr);
188     ierr = VecDuplicate(tao->solution,&cg->g_work);CHKERRQ(ierr);
189   }
190   if (!cg->unprojected_gradient) {
191     ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient);CHKERRQ(ierr);
192   }
193   if (!cg->unprojected_gradient_old) {
194     ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);CHKERRQ(ierr);
195   }
196   ierr = MatLMVMAllocate(cg->B, cg->sk, cg->yk);CHKERRQ(ierr);
197   if (cg->pc){
198     ierr = MatLMVMSetJ0(cg->B, cg->pc);CHKERRQ(ierr);
199   }
200   PetscFunctionReturn(0);
201 }
202 
203 static PetscErrorCode TaoDestroy_BNCG(Tao tao)
204 {
205   TAO_BNCG       *cg = (TAO_BNCG*) tao->data;
206   PetscErrorCode ierr;
207 
208   PetscFunctionBegin;
209   if (tao->setupcalled) {
210     ierr = VecDestroy(&cg->W);CHKERRQ(ierr);
211     ierr = VecDestroy(&cg->work);CHKERRQ(ierr);
212     ierr = VecDestroy(&cg->X_old);CHKERRQ(ierr);
213     ierr = VecDestroy(&cg->G_old);CHKERRQ(ierr);
214     ierr = VecDestroy(&cg->unprojected_gradient);CHKERRQ(ierr);
215     ierr = VecDestroy(&cg->unprojected_gradient_old);CHKERRQ(ierr);
216     ierr = VecDestroy(&cg->g_work);CHKERRQ(ierr);
217     ierr = VecDestroy(&cg->d_work);CHKERRQ(ierr);
218     ierr = VecDestroy(&cg->y_work);CHKERRQ(ierr);
219     ierr = VecDestroy(&cg->sk);CHKERRQ(ierr);
220     ierr = VecDestroy(&cg->yk);CHKERRQ(ierr);
221   }
222   ierr = ISDestroy(&cg->active_lower);CHKERRQ(ierr);
223   ierr = ISDestroy(&cg->active_upper);CHKERRQ(ierr);
224   ierr = ISDestroy(&cg->active_fixed);CHKERRQ(ierr);
225   ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr);
226   ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr);
227   ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr);
228   ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr);
229   ierr = MatDestroy(&cg->B);CHKERRQ(ierr);
230   if (cg->pc) {
231     ierr = MatDestroy(&cg->pc);CHKERRQ(ierr);
232   }
233   ierr = PetscFree(tao->data);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao)
238 {
239     TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
240     PetscErrorCode ierr;
241 
242     PetscFunctionBegin;
243     ierr = TaoLineSearchSetFromOptions(tao->linesearch);CHKERRQ(ierr);
244     ierr = PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");CHKERRQ(ierr);
245     ierr = PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr);
246     if (cg->cg_type != CG_SSML_BFGS){
247       cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */
248     }
249     if (CG_GradientDescent == cg->cg_type){
250       cg->cg_type = CG_PCGradientDescent;
251       /* Set scaling equal to none or, at best, scalar scaling. */
252       cg->unscaled_restart = PETSC_TRUE;
253       cg->diag_scaling = PETSC_FALSE;
254     }
255     ierr = PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr);
256 
257     ierr = PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL);CHKERRQ(ierr);
258     ierr = PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL);CHKERRQ(ierr);
259     ierr = PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL);CHKERRQ(ierr);
260     ierr = PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL);CHKERRQ(ierr);
261     ierr = PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL);CHKERRQ(ierr);
262     ierr = PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL);CHKERRQ(ierr);
263     ierr = PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL);CHKERRQ(ierr);
264     ierr = PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL);CHKERRQ(ierr);
265     ierr = PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL);CHKERRQ(ierr);
266     ierr = PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL);CHKERRQ(ierr);
267     ierr = PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL);CHKERRQ(ierr);
268     ierr = PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL);CHKERRQ(ierr);
269     ierr = PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL);CHKERRQ(ierr);
270     ierr = PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL);CHKERRQ(ierr);
271     ierr = PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL);CHKERRQ(ierr);
272     ierr = PetscOptionsBool("-tao_bncg_recycle","enable recycling the existing solution, gradient, and diagonal scaling vector at the start of a new TaoSolve() call","",cg->recycle,&cg->recycle,NULL);CHKERRQ(ierr);
273     ierr = PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL);CHKERRQ(ierr);
274     ierr = PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL);CHKERRQ(ierr);
275     if (cg->no_scaling){
276       cg->diag_scaling = PETSC_FALSE;
277       cg->alpha = -1.0;
278     }
279     if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling){ /* Some more default options that appear to be good. */
280       cg->neg_xi = PETSC_TRUE;
281     }
282     ierr = PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL);CHKERRQ(ierr);
283     ierr = PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL);CHKERRQ(ierr);
284     ierr = PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL);CHKERRQ(ierr);
285     ierr = PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL);CHKERRQ(ierr);
286     ierr = PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL);CHKERRQ(ierr);
287 
288    ierr = PetscOptionsTail();CHKERRQ(ierr);
289    ierr = MatSetFromOptions(cg->B);CHKERRQ(ierr);
290    PetscFunctionReturn(0);
291 }
292 
293 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer)
294 {
295   PetscBool      isascii;
296   TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
297   PetscErrorCode ierr;
298 
299   PetscFunctionBegin;
300   ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr);
301   if (isascii) {
302     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
303     ierr = PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);CHKERRQ(ierr);
304     ierr = PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %i\n", cg->skipped_updates);CHKERRQ(ierr);
305     ierr = PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %i\n", cg->resets);CHKERRQ(ierr);
306     ierr = PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %i\n", cg->pure_gd_steps);CHKERRQ(ierr);
307     ierr = PetscViewerASCIIPrintf(viewer, "Not a descent direction: %i\n", cg->descent_error);CHKERRQ(ierr);
308     ierr = PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);CHKERRQ(ierr);
309     if (cg->diag_scaling){
310       ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
311       if (isascii) {
312         ierr = PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
313         ierr = MatView(cg->B, viewer);CHKERRQ(ierr);
314         ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
315       }
316     }
317     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha)
323 {
324   PetscReal            a, b, c, sig1, sig2;
325 
326   PetscFunctionBegin;
327   *scale = 0.0;
328 
329   if (1.0 == alpha){
330     *scale = yts/yty;
331   } else if (0.0 == alpha){
332     *scale = sts/yts;
333   }
334   else if (-1.0 == alpha) *scale = 1.0;
335   else {
336     a = yty;
337     b = yts;
338     c = sts;
339     a *= alpha;
340     b *= -(2.0*alpha - 1.0);
341     c *= alpha - 1.0;
342     sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
343     sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
344     /* accept the positive root as the scalar */
345     if (sig1 > 0.0) {
346       *scale = sig1;
347     } else if (sig2 > 0.0) {
348       *scale = sig2;
349     } else {
350       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar");
351     }
352   }
353   PetscFunctionReturn(0);
354 }
355 
356 /*MC
357   TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method.
358 
359   Options Database Keys:
360 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled)
361 . -tao_bncg_eta <r> - restart tolerance
362 . -tao_bncg_type <taocg_type> - cg formula
363 . -tao_bncg_as_type <none,bertsekas> - active set estimation method
364 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation
365 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation
366 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision.
367 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method
368 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
369 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
370 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method
371 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method
372 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method
373 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method
374 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True.
375 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods
376 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts
377 . -tao_bncg_zeta <r> - Scaling parameter in the KD method
378 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps
379 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps
380 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart
381 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem
382 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations
383 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults.
384 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions
385 
386   Notes:
387     CG formulas are:
388 + "gd" - Gradient Descent
389 . "fr" - Fletcher-Reeves
390 . "pr" - Polak-Ribiere-Polyak
391 . "prp" - Polak-Ribiere-Plus
392 . "hs" - Hestenes-Steifel
393 . "dy" - Dai-Yuan
394 . "ssml_bfgs" - Self-Scaling Memoryless BFGS
395 . "ssml_dfp"  - Self-Scaling Memoryless DFP
396 . "ssml_brdn" - Self-Scaling Memoryless Broyden
397 . "hz" - Hager-Zhang (CG_DESCENT 5.3)
398 . "dk" - Dai-Kou (2013)
399 - "kd" - Kou-Dai (2015)
400 
401   Level: beginner
402 M*/
403 
404 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao)
405 {
406   TAO_BNCG       *cg;
407   const char     *morethuente_type = TAOLINESEARCHMT;
408   PetscErrorCode ierr;
409 
410   PetscFunctionBegin;
411   tao->ops->setup = TaoSetUp_BNCG;
412   tao->ops->solve = TaoSolve_BNCG;
413   tao->ops->view = TaoView_BNCG;
414   tao->ops->setfromoptions = TaoSetFromOptions_BNCG;
415   tao->ops->destroy = TaoDestroy_BNCG;
416 
417   /* Override default settings (unless already changed) */
418   if (!tao->max_it_changed) tao->max_it = 2000;
419   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
420 
421   /*  Note: nondefault values should be used for nonlinear conjugate gradient  */
422   /*  method.  In particular, gtol should be less that 0.5; the value used in  */
423   /*  Nocedal and Wright is 0.10.  We use the default values for the  */
424   /*  linesearch because it seems to work better. */
425   ierr = TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);CHKERRQ(ierr);
426   ierr = PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);CHKERRQ(ierr);
427   ierr = TaoLineSearchSetType(tao->linesearch, morethuente_type);CHKERRQ(ierr);
428   ierr = TaoLineSearchUseTaoRoutines(tao->linesearch, tao);CHKERRQ(ierr);
429   ierr = TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);CHKERRQ(ierr);
430 
431   ierr = PetscNewLog(tao,&cg);CHKERRQ(ierr);
432   tao->data = (void*)cg;
433   ierr = KSPInitializePackage();CHKERRQ(ierr);
434   ierr = MatCreate(PetscObjectComm((PetscObject)tao), &cg->B);CHKERRQ(ierr);
435   ierr = PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1);CHKERRQ(ierr);
436   ierr = MatSetOptionsPrefix(cg->B, "tao_bncg_");CHKERRQ(ierr);
437   ierr = MatSetType(cg->B, MATLMVMDIAGBRDN);CHKERRQ(ierr);
438 
439   cg->pc = NULL;
440 
441   cg->dk_eta = 0.5;
442   cg->hz_eta = 0.4;
443   cg->dynamic_restart = PETSC_FALSE;
444   cg->unscaled_restart = PETSC_FALSE;
445   cg->no_scaling = PETSC_FALSE;
446   cg->delta_min = 1e-7;
447   cg->delta_max = 100;
448   cg->theta = 1.0;
449   cg->hz_theta = 1.0;
450   cg->dfp_scale = 1.0;
451   cg->bfgs_scale = 1.0;
452   cg->zeta = 0.1;
453   cg->min_quad = 6;
454   cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/
455   cg->xi = 1.0;
456   cg->neg_xi = PETSC_TRUE;
457   cg->spaced_restart = PETSC_FALSE;
458   cg->tol_quad = 1e-8;
459   cg->as_step = 0.001;
460   cg->as_tol = 0.001;
461   cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/
462   cg->as_type = CG_AS_BERTSEKAS;
463   cg->cg_type = CG_SSML_BFGS;
464   cg->recycle = PETSC_FALSE;
465   cg->alpha = 1.0;
466   cg->diag_scaling = PETSC_TRUE;
467   PetscFunctionReturn(0);
468 }
469 
470 
471 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq)
472 {
473    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
474    PetscErrorCode    ierr;
475    PetscReal         scaling;
476 
477    PetscFunctionBegin;
478    ++cg->resets;
479    scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23);
480    scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling));
481    if (cg->unscaled_restart) {
482      scaling = 1.0;
483      ++cg->pure_gd_steps;
484    }
485    ierr = VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient);CHKERRQ(ierr);
486    /* Also want to reset our diagonal scaling with each restart */
487    if (cg->diag_scaling) {
488      ierr = MatLMVMReset(cg->B, PETSC_FALSE);CHKERRQ(ierr);
489    }
490    PetscFunctionReturn(0);
491  }
492 
493 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold)
494 {
495    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
496    PetscReal         quadinterp;
497 
498    PetscFunctionBegin;
499    if (cg->f < cg->min_quad/10) {
500      *dynrestart = PETSC_FALSE;
501      PetscFunctionReturn(0);
502    } /* just skip this since this strategy doesn't work well for functions near zero */
503    quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old));
504    if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad;
505    else {
506      cg->iter_quad = 0;
507      *dynrestart = PETSC_FALSE;
508    }
509    if (cg->iter_quad >= cg->min_quad) {
510      cg->iter_quad = 0;
511      *dynrestart = PETSC_TRUE;
512    }
513    PetscFunctionReturn(0);
514  }
515 
516 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback)
517 {
518   TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
519   PetscErrorCode    ierr;
520   PetscReal         gamma = 1.0, tau_k, beta;
521   PetscReal         tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd;
522   PetscReal         gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg;
523   PetscInt          dim;
524   PetscBool         cg_restart = PETSC_FALSE;
525   PetscFunctionBegin;
526 
527   /* Local curvature check to see if we need to restart */
528   if (tao->niter >= 1 || cg->recycle){
529     ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
530     ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
531     ynorm2 = ynorm*ynorm;
532     ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
533     if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON){
534       cg_restart = PETSC_TRUE;
535       ++cg->skipped_updates;
536     }
537     if (cg->spaced_restart) {
538       ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr);
539       if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE;
540     }
541   }
542   /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */
543   if (cg->spaced_restart){
544     ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr);
545     if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE;
546   }
547   /* Compute the diagonal scaling vector if applicable */
548   if (cg->diag_scaling) {
549     ierr = MatLMVMUpdate(cg->B, tao->solution, tao->gradient);CHKERRQ(ierr);
550   }
551 
552   /* A note on diagonal scaling (to be added to paper):
553    For the FR, PR, PRP, and DY methods, the diagonally scaled versions
554    must be derived as a preconditioned CG method rather than as
555    a Hessian initialization like in the Broyden methods. */
556 
557   /* In that case, one writes the objective function as
558    f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k).
559    Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to
560    HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the
561    same under preconditioning. Note that A is diagonal, such that A^T = A. */
562 
563   /* This yields questions like what the dot product d_k^T y_k
564    should look like. HZ mistakenly treats that as the same under
565    preconditioning, but that is not necessarily true. */
566 
567   /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation,
568    we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}),
569    yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is
570    NOT the same if our preconditioning matrix is updated between iterations.
571    This same issue is found when considering dot products of the form g_{k+1}^T y_k. */
572 
573   /* Compute CG step direction */
574   if (cg_restart) {
575     ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
576   } else if (pcgd_fallback) {
577     /* Just like preconditioned CG */
578     ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
579     ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr);
580   } else if (ynorm2 > PETSC_MACHINE_EPSILON) {
581     switch (cg->cg_type) {
582     case CG_PCGradientDescent:
583       if (!cg->diag_scaling){
584         if (!cg->no_scaling){
585         cg->sts = step*step*dnorm*dnorm;
586         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
587         } else {
588           tau_k = 1.0;
589           ++cg->pure_gd_steps;
590         }
591         ierr = VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient);CHKERRQ(ierr);
592       } else {
593         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
594         ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr);
595       }
596       break;
597 
598     case CG_HestenesStiefel:
599       /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */
600       if (!cg->diag_scaling){
601         cg->sts = step*step*dnorm*dnorm;
602         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
603         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
604         beta = tau_k*gkp1_yk/dk_yk;
605         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
606       } else {
607         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
608         ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
609         beta = gkp1_yk/dk_yk;
610         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
611       }
612       break;
613 
614     case CG_FletcherReeves:
615       ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
616       ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
617       ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
618       ynorm2 = ynorm*ynorm;
619       ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
620       if (!cg->diag_scaling){
621         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha);CHKERRQ(ierr);
622         beta = tau_k*gnorm2/gnorm2_old;
623         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
624       } else {
625         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Before it's updated */
626         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
627         ierr = VecDot(tao->gradient, cg->g_work, &tmp);CHKERRQ(ierr);
628         beta = tmp/gnorm2_old;
629         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
630       }
631       break;
632 
633     case CG_PolakRibierePolyak:
634       snorm = step*dnorm;
635       if (!cg->diag_scaling){
636         ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
637         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
638         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
639         beta = tau_k*gkp1_yk/gnorm2_old;
640         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
641       } else {
642         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr);
643         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
644         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
645         beta = gkp1_yk/gnorm2_old;
646         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
647       }
648       break;
649 
650     case CG_PolakRibierePlus:
651       ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
652       ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
653       ynorm2 = ynorm*ynorm;
654       if (!cg->diag_scaling){
655         ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
656         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
657         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
658         beta = tau_k*gkp1_yk/gnorm2_old;
659         beta = PetscMax(beta, 0.0);
660         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
661       } else {
662         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Old gtDg */
663         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
664         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
665         beta = gkp1_yk/gnorm2_old;
666         beta = PetscMax(beta, 0.0);
667         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
668       }
669       break;
670 
671     case CG_DaiYuan:
672       /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property."
673          SIAM Journal on optimization 10, no. 1 (1999): 177-182. */
674       if (!cg->diag_scaling){
675         ierr = VecDot(tao->stepdirection, tao->gradient, &gd);CHKERRQ(ierr);
676         ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
677         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha);CHKERRQ(ierr);
678         beta = tau_k*gnorm2/(gd - gd_old);
679         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
680       } else {
681         ierr = MatMult(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
682         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
683         ierr = VecDot(cg->g_work, tao->gradient, &gtDg);CHKERRQ(ierr);
684         ierr = VecDot(tao->stepdirection, cg->G_old, &gd_old);CHKERRQ(ierr);
685         ierr = VecDot(cg->d_work, cg->g_work, &dk_yk);CHKERRQ(ierr);
686         dk_yk = dk_yk - gd_old;
687         beta = gtDg/dk_yk;
688         ierr = VecScale(cg->d_work, beta);CHKERRQ(ierr);
689         ierr = VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work);CHKERRQ(ierr);
690       }
691       break;
692 
693     case CG_HagerZhang:
694       /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent."
695          ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */
696       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
697       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
698       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
699       snorm = dnorm*step;
700       cg->yts = step*dk_yk;
701       if (cg->use_dynamic_restart){
702         ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr);
703       }
704       if (cg->dynamic_restart){
705         ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
706       } else {
707         if (!cg->diag_scaling){
708           ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
709           ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
710           /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */
711           tmp = gd/dk_yk;
712           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk));
713           /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */
714           beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
715           /* d <- -t*g + beta*t*d */
716           ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
717         } else {
718           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
719           cg->yty = ynorm2;
720           cg->sts = snorm*snorm;
721           /* Apply the diagonal scaling to all my vectors */
722           ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
723           ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
724           ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
725           /* Construct the constant ytDgkp1 */
726           ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
727           /* Construct the constant for scaling Dkyk in the update */
728           tmp = gd/dk_yk;
729           ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
730           tau_k = -tau_k*gd/(dk_yk*dk_yk);
731           /* beta is the constant which adds the dk contribution */
732           beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */
733           /* From HZ2013, modified to account for diagonal scaling*/
734           ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr);
735           ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr);
736           beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
737           /* Do the update */
738           ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
739         }
740       }
741       break;
742 
743     case CG_DaiKou:
744       /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search."
745          SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */
746       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
747       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
748       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
749       snorm = step*dnorm;
750       cg->yts = dk_yk*step;
751       if (!cg->diag_scaling){
752         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
753         ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
754         /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */
755         tmp = gd/dk_yk;
756         beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk;
757         beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
758         /* d <- -t*g + beta*t*d */
759         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
760       } else {
761         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
762         cg->yty = ynorm2;
763         cg->sts = snorm*snorm;
764         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
765         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
766         ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
767         /* Construct the constant ytDgkp1 */
768         ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
769         ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
770         tau_k = tau_k*gd/(dk_yk*dk_yk);
771         tmp = gd/dk_yk;
772         /* beta is the constant which adds the dk contribution */
773         beta = gkp1_yk/dk_yk - step*tmp - tau_k;
774         /* Update this for the last term in beta */
775         ierr = VecDot(cg->y_work, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
776         beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */
777         ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr);
778         ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr);
779         beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
780         /* Do the update */
781         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
782       }
783       break;
784 
785     case CG_KouDai:
786       /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden–Fletcher–Goldfarb–Shanno method for unconstrained optimization."
787          Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */
788       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
789       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
790       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
791       snorm = step*dnorm;
792       cg->yts = dk_yk*step;
793       if (cg->use_dynamic_restart){
794         ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr);
795       }
796       if (cg->dynamic_restart){
797         ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
798       } else {
799         if (!cg->diag_scaling){
800           ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
801           ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
802           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
803           if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */
804           {
805             beta = cg->zeta*tau_k*gd/(dnorm*dnorm);
806             gamma = 0.0;
807           } else {
808             if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk;
809             /* This seems to be very effective when there's no tau_k scaling.
810                This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */
811             else {
812               gamma = cg->xi*gd/dk_yk;
813             }
814           }
815           /* d <- -t*g + beta*t*d + t*tmp*yk */
816           ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
817         } else {
818           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
819           cg->yty = ynorm2;
820           cg->sts = snorm*snorm;
821           ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
822           ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
823           /* Construct the constant ytDgkp1 */
824           ierr = VecDot(cg->yk, cg->g_work, &gkp1D_yk);CHKERRQ(ierr);
825           /* Construct the constant for scaling Dkyk in the update */
826           gamma = gd/dk_yk;
827           /* tau_k = -ytDy/(ytd)^2 * gd */
828           ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
829           tau_k = tau_k*gd/(dk_yk*dk_yk);
830           /* beta is the constant which adds the d_k contribution */
831           beta = gkp1D_yk/dk_yk - step*gamma - tau_k;
832           /* Here is the requisite check */
833           ierr = VecDot(tao->stepdirection, cg->g_work, &tmp);CHKERRQ(ierr);
834           if (cg->neg_xi){
835             /* modified KD implementation */
836             if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk;
837             else {
838               gamma = cg->xi*gd/dk_yk;
839             }
840             if (beta < cg->zeta*tmp/(dnorm*dnorm)){
841               beta = cg->zeta*tmp/(dnorm*dnorm);
842               gamma = 0.0;
843             }
844           } else { /* original KD 2015 implementation */
845             if (beta < cg->zeta*tmp/(dnorm*dnorm)) {
846               beta = cg->zeta*tmp/(dnorm*dnorm);
847               gamma = 0.0;
848             } else {
849               gamma = cg->xi*gd/dk_yk;
850             }
851           }
852           /* Do the update in two steps */
853           ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
854           ierr = VecAXPY(tao->stepdirection, gamma, cg->y_work);CHKERRQ(ierr);
855         }
856       }
857       break;
858 
859     case CG_SSML_BFGS:
860       /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory."
861          Discussion Papers 269 (1977). */
862       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
863       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
864       snorm = step*dnorm;
865       cg->yts = dk_yk*step;
866       cg->yty = ynorm2;
867       cg->sts = snorm*snorm;
868       if (!cg->diag_scaling){
869         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
870         ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
871         tmp = gd/dk_yk;
872         beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp;
873         /* d <- -t*g + beta*t*d + t*tmp*yk */
874         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
875       } else {
876         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
877         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
878         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
879         /* compute scalar gamma */
880         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
881         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
882         gamma = gd/dk_yk;
883         /* Compute scalar beta */
884         beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
885         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
886         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
887       }
888       break;
889 
890     case CG_SSML_DFP:
891       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
892       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
893       snorm = step*dnorm;
894       cg->yts = dk_yk*step;
895       cg->yty = ynorm2;
896       cg->sts = snorm*snorm;
897       if (!cg->diag_scaling){
898         /* Instead of a regular convex combination, we will solve a quadratic formula. */
899         ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
900         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
901         tau_k = cg->dfp_scale*tau_k;
902         tmp = tau_k*gkp1_yk/cg->yty;
903         beta = -step*gd/dk_yk;
904         /* d <- -t*g + beta*d + tmp*yk */
905         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
906       } else {
907         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */
908         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
909         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
910         /* compute scalar gamma */
911         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
912         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
913         gamma = (gkp1_yk/tmp);
914         /* Compute scalar beta */
915         beta = -step*gd/dk_yk;
916         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
917         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
918       }
919       break;
920 
921     case CG_SSML_BROYDEN:
922       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
923       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
924       snorm = step*dnorm;
925       cg->yts = step*dk_yk;
926       cg->yty = ynorm2;
927       cg->sts = snorm*snorm;
928       if (!cg->diag_scaling){
929         /* Instead of a regular convex combination, we will solve a quadratic formula. */
930         ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale);CHKERRQ(ierr);
931         ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale);CHKERRQ(ierr);
932         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
933         tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp;
934         /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0,
935            it should reproduce the tau_dfp scaling. Same with dfp_scale.   */
936         tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty;
937         beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
938         /* d <- -t*g + beta*d + tmp*yk */
939         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
940       } else {
941         /* We have diagonal scaling enabled */
942         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
943         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
944         /* compute scalar gamma */
945         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
946         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
947         gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp);
948         /* Compute scalar beta */
949         beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
950         /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
951         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
952       }
953       break;
954 
955     default:
956       break;
957 
958     }
959   }
960   PetscFunctionReturn(0);
961 }
962 
963 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm)
964 {
965   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
966   PetscErrorCode               ierr;
967   TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING;
968   PetscReal                    step=1.0,gnorm2,gd,dnorm=0.0;
969   PetscReal                    gnorm2_old,f_old,resnorm, gnorm_old;
970   PetscBool                    pcgd_fallback = PETSC_FALSE;
971 
972   PetscFunctionBegin;
973   /* We are now going to perform a line search along the direction. */
974   /* Store solution and gradient info before it changes */
975   ierr = VecCopy(tao->solution, cg->X_old);CHKERRQ(ierr);
976   ierr = VecCopy(tao->gradient, cg->G_old);CHKERRQ(ierr);
977   ierr = VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old);CHKERRQ(ierr);
978 
979   gnorm_old = gnorm;
980   gnorm2_old = gnorm_old*gnorm_old;
981   f_old = cg->f;
982   /* Perform bounded line search. If we are recycling a solution from a previous */
983   /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */
984   if (!(cg->recycle && 0 == tao->niter)){
985     /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */
986     ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
987     ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
988     ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
989 
990     /*  Check linesearch failure */
991     if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
992       ++cg->ls_fails;
993       if (cg->cg_type == CG_GradientDescent){
994         /* Nothing left to do but fail out of the optimization */
995         step = 0.0;
996         tao->reason = TAO_DIVERGED_LS_FAILURE;
997       } else {
998         /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */
999         ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr);
1000         ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr);
1001         ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr);
1002         gnorm = gnorm_old;
1003         gnorm2 = gnorm2_old;
1004         cg->f = f_old;
1005 
1006         /* Fall back on preconditioned CG (so long as you're not already using it) */
1007         if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling){
1008           pcgd_fallback = PETSC_TRUE;
1009           ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr);
1010 
1011           ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1012           ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1013 
1014           ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
1015           ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
1016           ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
1017 
1018           pcgd_fallback = PETSC_FALSE;
1019           if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1020             /* Going to perform a regular gradient descent step. */
1021             ++cg->ls_fails;
1022             step = 0.0;
1023           }
1024         }
1025         /* Fall back on the scaled gradient step */
1026         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1027           ++cg->ls_fails;
1028           ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1029           ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1030           ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
1031           ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
1032           ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
1033         }
1034 
1035         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1036           /* Nothing left to do but fail out of the optimization */
1037           ++cg->ls_fails;
1038           step = 0.0;
1039           tao->reason = TAO_DIVERGED_LS_FAILURE;
1040         } else {
1041           /* One of the fallbacks worked. Set them both back equal to false. */
1042           pcgd_fallback = PETSC_FALSE;
1043         }
1044       }
1045     }
1046     /* Convergence test for line search failure */
1047     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1048 
1049     /* Standard convergence test */
1050     ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr);
1051     ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr);
1052     if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
1053     ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
1054     ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr);
1055     ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
1056     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1057   }
1058   /* Assert we have an updated step and we need at least one more iteration. */
1059   /* Calculate the next direction */
1060   /* Estimate the active set at the new solution */
1061   ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr);
1062   /* Compute the projected gradient and its norm */
1063   ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
1064   ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr);
1065   ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
1066   gnorm2 = gnorm*gnorm;
1067 
1068   /* Calculate some quantities used in the StepDirectionUpdate. */
1069   ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr);
1070   /* Update the step direction. */
1071   ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr);
1072   ++tao->niter;
1073   ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1074 
1075   if (cg->cg_type != CG_GradientDescent) {
1076     /* Figure out which previously active variables became inactive this iteration */
1077     ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr);
1078     if (cg->inactive_idx && cg->inactive_old) {
1079       ierr = ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives);CHKERRQ(ierr);
1080     }
1081     /* Selectively reset the CG step those freshly inactive variables */
1082     if (cg->new_inactives) {
1083       ierr = VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr);
1084       ierr = VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr);
1085       ierr = VecCopy(cg->inactive_grad, cg->inactive_step);CHKERRQ(ierr);
1086       ierr = VecScale(cg->inactive_step, -1.0);CHKERRQ(ierr);
1087       ierr = VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr);
1088       ierr = VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr);
1089     }
1090     /* Verify that this is a descent direction */
1091     ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
1092     ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr);
1093     if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) {
1094       /* Not a descent direction, so we reset back to projected gradient descent */
1095       ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1096       ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1097       ++cg->descent_error;
1098     } else {
1099     }
1100   }
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0)
1105 {
1106   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
1107   PetscErrorCode               ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = PetscObjectReference((PetscObject)H0);CHKERRQ(ierr);
1111   cg->pc = H0;
1112   PetscFunctionReturn(0);
1113 }
1114