xref: /petsc/src/tao/bound/impls/bncg/bncg.c (revision bd89dbf26d8a5efecb980364933175da61864cd7)
1 #include <petsctaolinesearch.h>
2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/
3 #include <petscksp.h>
4 
5 const char *const TaoBNCGTypes[] = {"gd", "pcgd", "hs", "fr", "prp", "prp_plus", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "TAOBNCGType", "TAO_BNCG_", NULL};
6 
7 #define CG_AS_NONE      0
8 #define CG_AS_BERTSEKAS 1
9 #define CG_AS_SIZE      2
10 
11 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"};
12 
TaoBNCGEstimateActiveSet(Tao tao,PetscInt asType)13 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType)
14 {
15   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
16 
17   PetscFunctionBegin;
18   PetscCall(ISDestroy(&cg->inactive_old));
19   if (cg->inactive_idx) {
20     PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old));
21     PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old));
22   }
23   switch (asType) {
24   case CG_AS_NONE:
25     PetscCall(ISDestroy(&cg->inactive_idx));
26     PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx));
27     PetscCall(ISDestroy(&cg->active_idx));
28     PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx));
29     break;
30   case CG_AS_BERTSEKAS:
31     /* Use gradient descent to estimate the active set */
32     PetscCall(VecCopy(cg->unprojected_gradient, cg->W));
33     PetscCall(VecScale(cg->W, -1.0));
34     PetscCall(TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx));
35     break;
36   default:
37     break;
38   }
39   PetscFunctionReturn(PETSC_SUCCESS);
40 }
41 
TaoBNCGBoundStep(Tao tao,PetscInt asType,Vec step)42 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step)
43 {
44   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
45 
46   PetscFunctionBegin;
47   switch (asType) {
48   case CG_AS_NONE:
49     if (cg->active_idx) PetscCall(VecISSet(step, cg->active_idx, 0.0));
50     break;
51   case CG_AS_BERTSEKAS:
52     PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step));
53     break;
54   default:
55     break;
56   }
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
TaoSolve_BNCG(Tao tao)60 static PetscErrorCode TaoSolve_BNCG(Tao tao)
61 {
62   TAO_BNCG *cg   = (TAO_BNCG *)tao->data;
63   PetscReal step = 1.0, gnorm, gnorm2, resnorm;
64   PetscInt  nDiff;
65 
66   PetscFunctionBegin;
67   /*   Project the current point onto the feasible set */
68   PetscCall(TaoComputeVariableBounds(tao));
69   PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch, tao->XL, tao->XU));
70 
71   /* Project the initial point onto the feasible region */
72   PetscCall(TaoBoundSolution(tao->solution, tao->XL, tao->XU, 0.0, &nDiff, tao->solution));
73 
74   if (nDiff > 0 || !tao->recycle) PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient));
75   PetscCall(VecNorm(cg->unprojected_gradient, NORM_2, &gnorm));
76   PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
77 
78   /* Estimate the active set and compute the projected gradient */
79   PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type));
80 
81   /* Project the gradient and calculate the norm */
82   PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient));
83   if (cg->active_idx) PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0));
84   PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
85   gnorm2 = gnorm * gnorm;
86 
87   /* Initialize counters */
88   tao->niter   = 0;
89   cg->ls_fails = cg->descent_error = 0;
90   cg->resets                       = -1;
91   cg->skipped_updates = cg->pure_gd_steps = 0;
92   cg->iter_quad                           = 0;
93 
94   /* Convergence test at the starting point. */
95   tao->reason = TAO_CONTINUE_ITERATING;
96 
97   PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W));
98   PetscCall(VecNorm(cg->W, NORM_2, &resnorm));
99   PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
100   PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its));
101   PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step));
102   PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
103   if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
104   /* Calculate initial direction. */
105   if (!tao->recycle) {
106     /* We are not recycling a solution/history from a past TaoSolve */
107     PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
108   }
109   /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */
110   while (1) {
111     /* Call general purpose update function */
112     if (tao->ops->update) {
113       PetscUseTypeMethod(tao, update, tao->niter, tao->user_update);
114       PetscCall(TaoComputeObjective(tao, tao->solution, &cg->f));
115     }
116     PetscCall(TaoBNCGConductIteration(tao, gnorm));
117     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
118   }
119 }
120 
TaoSetUp_BNCG(Tao tao)121 static PetscErrorCode TaoSetUp_BNCG(Tao tao)
122 {
123   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
124 
125   PetscFunctionBegin;
126   if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
127   if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection));
128   if (!cg->W) PetscCall(VecDuplicate(tao->solution, &cg->W));
129   if (!cg->work) PetscCall(VecDuplicate(tao->solution, &cg->work));
130   if (!cg->sk) PetscCall(VecDuplicate(tao->solution, &cg->sk));
131   if (!cg->yk) PetscCall(VecDuplicate(tao->gradient, &cg->yk));
132   if (!cg->X_old) PetscCall(VecDuplicate(tao->solution, &cg->X_old));
133   if (!cg->G_old) PetscCall(VecDuplicate(tao->gradient, &cg->G_old));
134   if (cg->diag_scaling) {
135     PetscCall(VecDuplicate(tao->solution, &cg->d_work));
136     PetscCall(VecDuplicate(tao->solution, &cg->y_work));
137     PetscCall(VecDuplicate(tao->solution, &cg->g_work));
138   }
139   if (!cg->unprojected_gradient) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient));
140   if (!cg->unprojected_gradient_old) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient_old));
141   PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk));
142   if (cg->pc) PetscCall(MatLMVMSetJ0(cg->B, cg->pc));
143   PetscFunctionReturn(PETSC_SUCCESS);
144 }
145 
TaoDestroy_BNCG(Tao tao)146 static PetscErrorCode TaoDestroy_BNCG(Tao tao)
147 {
148   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
149 
150   PetscFunctionBegin;
151   if (tao->setupcalled) {
152     PetscCall(VecDestroy(&cg->W));
153     PetscCall(VecDestroy(&cg->work));
154     PetscCall(VecDestroy(&cg->X_old));
155     PetscCall(VecDestroy(&cg->G_old));
156     PetscCall(VecDestroy(&cg->unprojected_gradient));
157     PetscCall(VecDestroy(&cg->unprojected_gradient_old));
158     PetscCall(VecDestroy(&cg->g_work));
159     PetscCall(VecDestroy(&cg->d_work));
160     PetscCall(VecDestroy(&cg->y_work));
161     PetscCall(VecDestroy(&cg->sk));
162     PetscCall(VecDestroy(&cg->yk));
163   }
164   PetscCall(ISDestroy(&cg->active_lower));
165   PetscCall(ISDestroy(&cg->active_upper));
166   PetscCall(ISDestroy(&cg->active_fixed));
167   PetscCall(ISDestroy(&cg->active_idx));
168   PetscCall(ISDestroy(&cg->inactive_idx));
169   PetscCall(ISDestroy(&cg->inactive_old));
170   PetscCall(ISDestroy(&cg->new_inactives));
171   PetscCall(MatDestroy(&cg->B));
172   if (cg->pc) PetscCall(MatDestroy(&cg->pc));
173   PetscCall(PetscFree(tao->data));
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
TaoSetFromOptions_BNCG(Tao tao,PetscOptionItems PetscOptionsObject)177 static PetscErrorCode TaoSetFromOptions_BNCG(Tao tao, PetscOptionItems PetscOptionsObject)
178 {
179   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
180 
181   PetscFunctionBegin;
182   PetscOptionsHeadBegin(PetscOptionsObject, "Nonlinear Conjugate Gradient method for unconstrained optimization");
183   PetscCall(PetscOptionsEnum("-tao_bncg_type", "CG update formula", "TaoBNCGTypes", TaoBNCGTypes, (PetscEnum)cg->cg_type, (PetscEnum *)&cg->cg_type, NULL));
184   if (cg->cg_type != TAO_BNCG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */
185   if (TAO_BNCG_GD == cg->cg_type) {
186     cg->cg_type = TAO_BNCG_PCGD;
187     /* Set scaling equal to none or, at best, scalar scaling. */
188     cg->unscaled_restart = PETSC_TRUE;
189     cg->diag_scaling     = PETSC_FALSE;
190   }
191   PetscCall(PetscOptionsReal("-tao_bncg_hz_eta", "(developer) cutoff tolerance for HZ", "", cg->hz_eta, &cg->hz_eta, NULL));
192   PetscCall(PetscOptionsReal("-tao_bncg_eps", "(developer) cutoff value for restarts", "", cg->epsilon, &cg->epsilon, NULL));
193   PetscCall(PetscOptionsReal("-tao_bncg_dk_eta", "(developer) cutoff tolerance for DK", "", cg->dk_eta, &cg->dk_eta, NULL));
194   PetscCall(PetscOptionsReal("-tao_bncg_xi", "(developer) Parameter in the KD method", "", cg->xi, &cg->xi, NULL));
195   PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL));
196   PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL));
197   PetscCall(PetscOptionsReal("-tao_bncg_alpha", "(developer) parameter for the scalar scaling", "", cg->alpha, &cg->alpha, NULL));
198   PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL));
199   PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL));
200   PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling", "Enable diagonal Broyden-like preconditioning", "", cg->diag_scaling, &cg->diag_scaling, NULL));
201   PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart", "(developer) use dynamic restarts as in HZ, DK, KD", "", cg->use_dynamic_restart, &cg->use_dynamic_restart, NULL));
202   PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart", "(developer) use unscaled gradient restarts", "", cg->unscaled_restart, &cg->unscaled_restart, NULL));
203   PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL));
204   PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL));
205   PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL));
206   PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart", "(developer) Enable regular steepest descent restarting every fixed number of iterations", "", cg->spaced_restart, &cg->spaced_restart, NULL));
207   PetscCall(PetscOptionsBool("-tao_bncg_no_scaling", "Disable all scaling except in restarts", "", cg->no_scaling, &cg->no_scaling, NULL));
208   if (cg->no_scaling) {
209     cg->diag_scaling = PETSC_FALSE;
210     cg->alpha        = -1.0;
211   }
212   if (cg->alpha == -1.0 && cg->cg_type == TAO_BNCG_KD && !cg->diag_scaling) { /* Some more default options that appear to be good. */
213     cg->neg_xi = PETSC_TRUE;
214   }
215   PetscCall(PetscOptionsBool("-tao_bncg_neg_xi", "(developer) Use negative xi when it might be a smaller descent direction than necessary", "", cg->neg_xi, &cg->neg_xi, NULL));
216   PetscCall(PetscOptionsEList("-tao_bncg_as_type", "active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->as_type], &cg->as_type, NULL));
217   PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables", "", cg->as_tol, &cg->as_tol, NULL));
218   PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables", "", cg->as_step, &cg->as_step, NULL));
219   PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts", "", cg->delta_min, &cg->delta_min, NULL));
220   PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts", "", cg->delta_max, &cg->delta_max, NULL));
221 
222   PetscOptionsHeadEnd();
223   PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix));
224   PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_"));
225   PetscCall(MatSetFromOptions(cg->B));
226   PetscFunctionReturn(PETSC_SUCCESS);
227 }
228 
TaoView_BNCG(Tao tao,PetscViewer viewer)229 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer)
230 {
231   PetscBool isascii;
232   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
233 
234   PetscFunctionBegin;
235   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
236   if (isascii) {
237     PetscCall(PetscViewerASCIIPushTab(viewer));
238     PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", TaoBNCGTypes[cg->cg_type]));
239     PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %" PetscInt_FMT "\n", cg->skipped_updates));
240     PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %" PetscInt_FMT "\n", cg->resets));
241     PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %" PetscInt_FMT "\n", cg->pure_gd_steps));
242     PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %" PetscInt_FMT "\n", cg->descent_error));
243     PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %" PetscInt_FMT "\n", cg->ls_fails));
244     if (cg->diag_scaling) {
245       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
246       if (isascii) {
247         PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
248         PetscCall(MatView(cg->B, viewer));
249         PetscCall(PetscViewerPopFormat(viewer));
250       }
251     }
252     PetscCall(PetscViewerASCIIPopTab(viewer));
253   }
254   PetscFunctionReturn(PETSC_SUCCESS);
255 }
256 
TaoBNCGComputeScalarScaling(PetscReal yty,PetscReal yts,PetscReal sts,PetscReal * scale,PetscReal alpha)257 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha)
258 {
259   PetscReal a, b, c, sig1, sig2;
260 
261   PetscFunctionBegin;
262   *scale = 0.0;
263   if (1.0 == alpha) *scale = yts / yty;
264   else if (0.0 == alpha) *scale = sts / yts;
265   else if (-1.0 == alpha) *scale = 1.0;
266   else {
267     a = yty;
268     b = yts;
269     c = sts;
270     a *= alpha;
271     b *= -(2.0 * alpha - 1.0);
272     c *= alpha - 1.0;
273     sig1 = (-b + PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a);
274     sig2 = (-b - PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a);
275     /* accept the positive root as the scalar */
276     if (sig1 > 0.0) *scale = sig1;
277     else if (sig2 > 0.0) *scale = sig2;
278     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar");
279   }
280   PetscFunctionReturn(PETSC_SUCCESS);
281 }
282 
283 /*MC
284   TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method.
285 
286   Options Database Keys:
287 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled)
288 . -tao_bncg_eta <r> - restart tolerance
289 . -tao_bncg_type <taocg_type> - cg formula
290 . -tao_bncg_as_type <none,bertsekas> - active set estimation method
291 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation
292 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation
293 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision.
294 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method
295 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
296 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
297 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method
298 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method
299 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method
300 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method
301 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True.
302 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods
303 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts
304 . -tao_bncg_zeta <r> - Scaling parameter in the KD method
305 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps
306 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps
307 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart
308 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem
309 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations
310 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults.
311 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions
312 
313   Notes:
314     CG formulas are:
315 + "gd" - Gradient Descent
316 . "fr" - Fletcher-Reeves
317 . "pr" - Polak-Ribiere-Polyak
318 . "prp" - Polak-Ribiere-Plus
319 . "hs" - Hestenes-Steifel
320 . "dy" - Dai-Yuan
321 . "ssml_bfgs" - Self-Scaling Memoryless BFGS
322 . "ssml_dfp"  - Self-Scaling Memoryless DFP
323 . "ssml_brdn" - Self-Scaling Memoryless Broyden
324 . "hz" - Hager-Zhang (CG_DESCENT 5.3)
325 . "dk" - Dai-Kou (2013)
326 - "kd" - Kou-Dai (2015)
327 
328   Level: beginner
329 M*/
330 
TaoCreate_BNCG(Tao tao)331 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao)
332 {
333   TAO_BNCG   *cg;
334   const char *morethuente_type = TAOLINESEARCHMT;
335 
336   PetscFunctionBegin;
337   tao->ops->setup          = TaoSetUp_BNCG;
338   tao->ops->solve          = TaoSolve_BNCG;
339   tao->ops->view           = TaoView_BNCG;
340   tao->ops->setfromoptions = TaoSetFromOptions_BNCG;
341   tao->ops->destroy        = TaoDestroy_BNCG;
342 
343   /* Override default settings (unless already changed) */
344   PetscCall(TaoParametersInitialize(tao));
345   PetscObjectParameterSetDefault(tao, max_it, 2000);
346   PetscObjectParameterSetDefault(tao, max_funcs, 4000);
347 
348   /*  Note: nondefault values should be used for nonlinear conjugate gradient  */
349   /*  method.  In particular, gtol should be less than 0.5; the value used in  */
350   /*  Nocedal and Wright is 0.10.  We use the default values for the  */
351   /*  linesearch because it seems to work better. */
352   PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch));
353   PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1));
354   PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type));
355   PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao));
356 
357   PetscCall(PetscNew(&cg));
358   tao->data = (void *)cg;
359   PetscCall(KSPInitializePackage());
360   PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B));
361   PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1));
362   PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN));
363 
364   cg->pc = NULL;
365 
366   cg->dk_eta           = 0.5;
367   cg->hz_eta           = 0.4;
368   cg->dynamic_restart  = PETSC_FALSE;
369   cg->unscaled_restart = PETSC_FALSE;
370   cg->no_scaling       = PETSC_FALSE;
371   cg->delta_min        = 1e-7;
372   cg->delta_max        = 100;
373   cg->theta            = 1.0;
374   cg->hz_theta         = 1.0;
375   cg->dfp_scale        = 1.0;
376   cg->bfgs_scale       = 1.0;
377   cg->zeta             = 0.1;
378   cg->min_quad         = 6;
379   cg->min_restart_num  = 6; /* As in CG_DESCENT and KD2015*/
380   cg->xi               = 1.0;
381   cg->neg_xi           = PETSC_TRUE;
382   cg->spaced_restart   = PETSC_FALSE;
383   cg->tol_quad         = 1e-8;
384   cg->as_step          = 0.001;
385   cg->as_tol           = 0.001;
386   cg->eps_23           = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0 / 3.0); /* Just a little tighter*/
387   cg->as_type          = CG_AS_BERTSEKAS;
388   cg->cg_type          = TAO_BNCG_SSML_BFGS;
389   cg->alpha            = 1.0;
390   cg->diag_scaling     = PETSC_TRUE;
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
TaoBNCGResetUpdate(Tao tao,PetscReal gnormsq)394 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq)
395 {
396   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
397   PetscReal scaling;
398 
399   PetscFunctionBegin;
400   ++cg->resets;
401   scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23);
402   scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling));
403   if (cg->unscaled_restart) {
404     scaling = 1.0;
405     ++cg->pure_gd_steps;
406   }
407   PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient));
408   /* Also want to reset our diagonal scaling with each restart */
409   if (cg->diag_scaling) PetscCall(MatLMVMReset(cg->B, PETSC_FALSE));
410   PetscFunctionReturn(PETSC_SUCCESS);
411 }
412 
TaoBNCGCheckDynamicRestart(Tao tao,PetscReal stepsize,PetscReal gd,PetscReal gd_old,PetscBool * dynrestart,PetscReal fold)413 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold)
414 {
415   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
416   PetscReal quadinterp;
417 
418   PetscFunctionBegin;
419   if (cg->f < cg->min_quad / 10) {
420     *dynrestart = PETSC_FALSE;
421     PetscFunctionReturn(PETSC_SUCCESS);
422   } /* just skip this since this strategy doesn't work well for functions near zero */
423   quadinterp = 2.0 * (cg->f - fold) / (stepsize * (gd + gd_old));
424   if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad;
425   else {
426     cg->iter_quad = 0;
427     *dynrestart   = PETSC_FALSE;
428   }
429   if (cg->iter_quad >= cg->min_quad) {
430     cg->iter_quad = 0;
431     *dynrestart   = PETSC_TRUE;
432   }
433   PetscFunctionReturn(PETSC_SUCCESS);
434 }
435 
TaoBNCGStepDirectionUpdate(Tao tao,PetscReal gnorm2,PetscReal step,PetscReal fold,PetscReal gnorm2_old,PetscReal dnorm,PetscBool pcgd_fallback)436 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback)
437 {
438   TAO_BNCG *cg    = (TAO_BNCG *)tao->data;
439   PetscReal gamma = 1.0, tau_k, beta;
440   PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk = 1.0, gd;
441   PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg;
442   PetscInt  dim;
443   PetscBool cg_restart = PETSC_FALSE;
444 
445   PetscFunctionBegin;
446   /* Local curvature check to see if we need to restart */
447   if (tao->niter >= 1 || tao->recycle) {
448     PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
449     PetscCall(VecNorm(cg->yk, NORM_2, &ynorm));
450     ynorm2 = ynorm * ynorm;
451     PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk));
452     if (step * dnorm < PETSC_MACHINE_EPSILON || step * dk_yk < PETSC_MACHINE_EPSILON) {
453       cg_restart = PETSC_TRUE;
454       ++cg->skipped_updates;
455     }
456     if (cg->spaced_restart) {
457       PetscCall(VecGetSize(tao->gradient, &dim));
458       if (tao->niter % (dim * cg->min_restart_num)) cg_restart = PETSC_TRUE;
459     }
460   }
461   /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */
462   if (cg->spaced_restart) {
463     PetscCall(VecGetSize(tao->gradient, &dim));
464     if (0 == tao->niter % (6 * dim)) cg_restart = PETSC_TRUE;
465   }
466   /* Compute the diagonal scaling vector if applicable */
467   if (cg->diag_scaling) PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient));
468 
469   /* A note on diagonal scaling (to be added to paper):
470    For the FR, PR, PRP, and DY methods, the diagonally scaled versions
471    must be derived as a preconditioned CG method rather than as
472    a Hessian initialization like in the Broyden methods. */
473 
474   /* In that case, one writes the objective function as
475    f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k).
476    Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to
477    HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the
478    same under preconditioning. Note that A is diagonal, such that A^T = A. */
479 
480   /* This yields questions like what the dot product d_k^T y_k
481    should look like. HZ mistakenly treats that as the same under
482    preconditioning, but that is not necessarily true. */
483 
484   /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation,
485    we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}),
486    yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is
487    NOT the same if our matrix used to construct the preconditioner is updated between iterations.
488    This same issue is found when considering dot products of the form g_{k+1}^T y_k. */
489 
490   /* Compute CG step direction */
491   if (cg_restart) {
492     PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
493   } else if (pcgd_fallback) {
494     /* Just like preconditioned CG */
495     PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
496     PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work));
497   } else if (ynorm2 > PETSC_MACHINE_EPSILON) {
498     switch (cg->cg_type) {
499     case TAO_BNCG_PCGD:
500       if (!cg->diag_scaling) {
501         if (!cg->no_scaling) {
502           cg->sts = step * step * dnorm * dnorm;
503           PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha));
504         } else {
505           tau_k = 1.0;
506           ++cg->pure_gd_steps;
507         }
508         PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient));
509       } else {
510         PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
511         PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work));
512       }
513       break;
514 
515     case TAO_BNCG_HS:
516       /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */
517       if (!cg->diag_scaling) {
518         cg->sts = step * step * dnorm * dnorm;
519         PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
520         PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha));
521         beta = tau_k * gkp1_yk / dk_yk;
522         PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
523       } else {
524         PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
525         PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk));
526         beta = gkp1_yk / dk_yk;
527         PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
528       }
529       break;
530 
531     case TAO_BNCG_FR:
532       PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
533       PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
534       PetscCall(VecNorm(cg->yk, NORM_2, &ynorm));
535       ynorm2 = ynorm * ynorm;
536       PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk));
537       if (!cg->diag_scaling) {
538         PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, step * step * dnorm * dnorm, &tau_k, cg->alpha));
539         beta = tau_k * gnorm2 / gnorm2_old;
540         PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
541       } else {
542         PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */
543         PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
544         PetscCall(VecDot(tao->gradient, cg->g_work, &tmp));
545         beta = tmp / gnorm2_old;
546         PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
547       }
548       break;
549 
550     case TAO_BNCG_PRP:
551       snorm = step * dnorm;
552       if (!cg->diag_scaling) {
553         PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
554         PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
555         PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha));
556         beta = tau_k * gkp1_yk / gnorm2_old;
557         PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
558       } else {
559         PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old));
560         PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
561         PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
562         beta = gkp1_yk / gnorm2_old;
563         PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
564       }
565       break;
566 
567     case TAO_BNCG_PRP_PLUS:
568       PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
569       PetscCall(VecNorm(cg->yk, NORM_2, &ynorm));
570       ynorm2 = ynorm * ynorm;
571       if (!cg->diag_scaling) {
572         PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
573         PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
574         PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha));
575         beta = tau_k * gkp1_yk / gnorm2_old;
576         beta = PetscMax(beta, 0.0);
577         PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
578       } else {
579         PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */
580         PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
581         PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
582         beta = gkp1_yk / gnorm2_old;
583         beta = PetscMax(beta, 0.0);
584         PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
585       }
586       break;
587 
588     case TAO_BNCG_DY:
589       /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property."
590          SIAM Journal on optimization 10, no. 1 (1999): 177-182. */
591       if (!cg->diag_scaling) {
592         PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd));
593         PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
594         PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->yts, &tau_k, cg->alpha));
595         beta = tau_k * gnorm2 / (gd - gd_old);
596         PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
597       } else {
598         PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work));
599         PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
600         PetscCall(VecDot(cg->g_work, tao->gradient, &gtDg));
601         PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old));
602         PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk));
603         dk_yk = dk_yk - gd_old;
604         beta  = gtDg / dk_yk;
605         PetscCall(VecScale(cg->d_work, beta));
606         PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work));
607       }
608       break;
609 
610     case TAO_BNCG_HZ:
611       /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent."
612          ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */
613       PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
614       PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
615       PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
616       snorm   = dnorm * step;
617       cg->yts = step * dk_yk;
618       if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold));
619       if (cg->dynamic_restart) {
620         PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
621       } else {
622         if (!cg->diag_scaling) {
623           PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
624           PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha));
625           /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */
626           tmp  = gd / dk_yk;
627           beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk));
628           /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */
629           beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm));
630           /* d <- -t*g + beta*t*d */
631           PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
632         } else {
633           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
634           cg->yty = ynorm2;
635           cg->sts = snorm * snorm;
636           /* Apply the diagonal scaling to all my vectors */
637           PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
638           PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
639           PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work));
640           /* Construct the constant ytDgkp1 */
641           PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk));
642           /* Construct the constant for scaling Dkyk in the update */
643           tmp = gd / dk_yk;
644           PetscCall(VecDot(cg->yk, cg->y_work, &tau_k));
645           tau_k = -tau_k * gd / (dk_yk * dk_yk);
646           /* beta is the constant which adds the dk contribution */
647           beta = gkp1_yk / dk_yk + cg->hz_theta * tau_k; /* HZ; (1.15) from DK 2013 */
648           /* From HZ2013, modified to account for diagonal scaling*/
649           PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old));
650           PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd));
651           beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm));
652           /* Do the update */
653           PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
654         }
655       }
656       break;
657 
658     case TAO_BNCG_DK:
659       /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search."
660          SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */
661       PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
662       PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
663       PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
664       snorm   = step * dnorm;
665       cg->yts = dk_yk * step;
666       if (!cg->diag_scaling) {
667         PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
668         PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha));
669         /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */
670         tmp  = gd / dk_yk;
671         beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk) + gd / (dnorm * dnorm)) - step * gd / dk_yk;
672         beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm));
673         /* d <- -t*g + beta*t*d */
674         PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk));
675       } else {
676         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
677         cg->yty = ynorm2;
678         cg->sts = snorm * snorm;
679         PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
680         PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
681         PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work));
682         /* Construct the constant ytDgkp1 */
683         PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk));
684         PetscCall(VecDot(cg->yk, cg->y_work, &tau_k));
685         tau_k = tau_k * gd / (dk_yk * dk_yk);
686         tmp   = gd / dk_yk;
687         /* beta is the constant which adds the dk contribution */
688         beta = gkp1_yk / dk_yk - step * tmp - tau_k;
689         /* Update this for the last term in beta */
690         PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk));
691         beta += tmp * dk_yk / (dnorm * dnorm); /* projection of y_work onto dk */
692         PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd));
693         PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old));
694         beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm));
695         /* Do the update */
696         PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
697       }
698       break;
699 
700     case TAO_BNCG_KD:
701       /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization."
702          Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */
703       PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
704       PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
705       PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
706       snorm   = step * dnorm;
707       cg->yts = dk_yk * step;
708       if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold));
709       if (cg->dynamic_restart) {
710         PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
711       } else {
712         if (!cg->diag_scaling) {
713           PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
714           PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha));
715           beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)) - step * gd / dk_yk;
716           if (beta < cg->zeta * tau_k * gd / (dnorm * dnorm)) /* 0.1 is KD's zeta parameter */
717           {
718             beta  = cg->zeta * tau_k * gd / (dnorm * dnorm);
719             gamma = 0.0;
720           } else {
721             if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0 * gd / dk_yk;
722             /* This seems to be very effective when there's no tau_k scaling.
723                This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */
724             else gamma = cg->xi * gd / dk_yk;
725           }
726           /* d <- -t*g + beta*t*d + t*tmp*yk */
727           PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma * tau_k, beta, tao->gradient, cg->yk));
728         } else {
729           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
730           cg->yty = ynorm2;
731           cg->sts = snorm * snorm;
732           PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
733           PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
734           /* Construct the constant ytDgkp1 */
735           PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk));
736           /* Construct the constant for scaling Dkyk in the update */
737           gamma = gd / dk_yk;
738           /* tau_k = -ytDy/(ytd)^2 * gd */
739           PetscCall(VecDot(cg->yk, cg->y_work, &tau_k));
740           tau_k = tau_k * gd / (dk_yk * dk_yk);
741           /* beta is the constant which adds the d_k contribution */
742           beta = gkp1D_yk / dk_yk - step * gamma - tau_k;
743           /* Here is the requisite check */
744           PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp));
745           if (cg->neg_xi) {
746             /* modified KD implementation */
747             if (gkp1D_yk / dk_yk < 0) gamma = -1.0 * gd / dk_yk;
748             else gamma = cg->xi * gd / dk_yk;
749             if (beta < cg->zeta * tmp / (dnorm * dnorm)) {
750               beta  = cg->zeta * tmp / (dnorm * dnorm);
751               gamma = 0.0;
752             }
753           } else { /* original KD 2015 implementation */
754             if (beta < cg->zeta * tmp / (dnorm * dnorm)) {
755               beta  = cg->zeta * tmp / (dnorm * dnorm);
756               gamma = 0.0;
757             } else gamma = cg->xi * gd / dk_yk;
758           }
759           /* Do the update in two steps */
760           PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
761           PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work));
762         }
763       }
764       break;
765 
766     case TAO_BNCG_SSML_BFGS:
767       /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory."
768          Discussion Papers 269 (1977). */
769       PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
770       PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
771       snorm   = step * dnorm;
772       cg->yts = dk_yk * step;
773       cg->yty = ynorm2;
774       cg->sts = snorm * snorm;
775       if (!cg->diag_scaling) {
776         PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
777         PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha));
778         tmp  = gd / dk_yk;
779         beta = tau_k * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * tmp;
780         /* d <- -t*g + beta*t*d + t*tmp*yk */
781         PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp * tau_k, beta, tao->gradient, cg->yk));
782       } else {
783         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
784         PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
785         PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
786         /* compute scalar gamma */
787         PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
788         PetscCall(VecDot(cg->y_work, cg->yk, &tmp));
789         gamma = gd / dk_yk;
790         /* Compute scalar beta */
791         beta = (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk;
792         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
793         PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
794       }
795       break;
796 
797     case TAO_BNCG_SSML_DFP:
798       PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
799       PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
800       snorm   = step * dnorm;
801       cg->yts = dk_yk * step;
802       cg->yty = ynorm2;
803       cg->sts = snorm * snorm;
804       if (!cg->diag_scaling) {
805         /* Instead of a regular convex combination, we will solve a quadratic formula. */
806         PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha));
807         PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
808         tau_k = cg->dfp_scale * tau_k;
809         tmp   = tau_k * gkp1_yk / cg->yty;
810         beta  = -step * gd / dk_yk;
811         /* d <- -t*g + beta*d + tmp*yk */
812         PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk));
813       } else {
814         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */
815         PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
816         PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
817         /* compute scalar gamma */
818         PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
819         PetscCall(VecDot(cg->y_work, cg->yk, &tmp));
820         gamma = (gkp1_yk / tmp);
821         /* Compute scalar beta */
822         beta = -step * gd / dk_yk;
823         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
824         PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
825       }
826       break;
827 
828     case TAO_BNCG_SSML_BRDN:
829       PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
830       PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
831       snorm   = step * dnorm;
832       cg->yts = step * dk_yk;
833       cg->yty = ynorm2;
834       cg->sts = snorm * snorm;
835       if (!cg->diag_scaling) {
836         /* Instead of a regular convex combination, we will solve a quadratic formula. */
837         PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_bfgs, cg->bfgs_scale));
838         PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_dfp, cg->dfp_scale));
839         PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
840         tau_k = cg->theta * tau_bfgs + (1.0 - cg->theta) * tau_dfp;
841         /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0,
842            it should reproduce the tau_dfp scaling. Same with dfp_scale.   */
843         tmp  = cg->theta * tau_bfgs * gd / dk_yk + (1 - cg->theta) * tau_dfp * gkp1_yk / cg->yty;
844         beta = cg->theta * tau_bfgs * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * gd / dk_yk;
845         /* d <- -t*g + beta*d + tmp*yk */
846         PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk));
847       } else {
848         /* We have diagonal scaling enabled */
849         PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
850         PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
851         /* compute scalar gamma */
852         PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
853         PetscCall(VecDot(cg->y_work, cg->yk, &tmp));
854         gamma = cg->theta * gd / dk_yk + (1 - cg->theta) * (gkp1_yk / tmp);
855         /* Compute scalar beta */
856         beta = cg->theta * (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk;
857         /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
858         PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
859       }
860       break;
861 
862     default:
863       break;
864     }
865   }
866   PetscFunctionReturn(PETSC_SUCCESS);
867 }
868 
TaoBNCGConductIteration(Tao tao,PetscReal gnorm)869 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm)
870 {
871   TAO_BNCG                    *cg        = (TAO_BNCG *)tao->data;
872   TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING;
873   PetscReal                    step = 1.0, gnorm2, gd, dnorm = 0.0;
874   PetscReal                    gnorm2_old, f_old, resnorm, gnorm_old;
875   PetscBool                    pcgd_fallback = PETSC_FALSE;
876 
877   PetscFunctionBegin;
878   /* We are now going to perform a line search along the direction. */
879   /* Store solution and gradient info before it changes */
880   PetscCall(VecCopy(tao->solution, cg->X_old));
881   PetscCall(VecCopy(tao->gradient, cg->G_old));
882   PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old));
883 
884   gnorm_old  = gnorm;
885   gnorm2_old = gnorm_old * gnorm_old;
886   f_old      = cg->f;
887   /* Perform bounded line search. If we are recycling a solution from a previous */
888   /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */
889   if (!(tao->recycle && 0 == tao->niter)) {
890     /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */
891     PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
892     PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
893     PetscCall(TaoAddLineSearchCounts(tao));
894 
895     /*  Check linesearch failure */
896     if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
897       ++cg->ls_fails;
898       if (cg->cg_type == TAO_BNCG_GD) {
899         /* Nothing left to do but fail out of the optimization */
900         step        = 0.0;
901         tao->reason = TAO_DIVERGED_LS_FAILURE;
902       } else {
903         /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */
904         PetscCall(VecCopy(cg->X_old, tao->solution));
905         PetscCall(VecCopy(cg->G_old, tao->gradient));
906         PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient));
907         gnorm  = gnorm_old;
908         gnorm2 = gnorm2_old;
909         cg->f  = f_old;
910 
911         /* Fall back on preconditioned CG (so long as you're not already using it) */
912         if (cg->cg_type != TAO_BNCG_PCGD && cg->diag_scaling) {
913           pcgd_fallback = PETSC_TRUE;
914           PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback));
915 
916           PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
917           PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
918 
919           PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
920           PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
921           PetscCall(TaoAddLineSearchCounts(tao));
922 
923           pcgd_fallback = PETSC_FALSE;
924           if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
925             /* Going to perform a regular gradient descent step. */
926             ++cg->ls_fails;
927             step = 0.0;
928           }
929         }
930         /* Fall back on the scaled gradient step */
931         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
932           ++cg->ls_fails;
933           PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
934           PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
935           PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
936           PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
937           PetscCall(TaoAddLineSearchCounts(tao));
938         }
939 
940         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
941           /* Nothing left to do but fail out of the optimization */
942           ++cg->ls_fails;
943           step        = 0.0;
944           tao->reason = TAO_DIVERGED_LS_FAILURE;
945         } else {
946           /* One of the fallbacks worked. Set them both back equal to false. */
947           pcgd_fallback = PETSC_FALSE;
948         }
949       }
950     }
951     /* Convergence test for line search failure */
952     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
953 
954     /* Standard convergence test */
955     PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W));
956     PetscCall(VecNorm(cg->W, NORM_2, &resnorm));
957     PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
958     PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its));
959     PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step));
960     PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
961     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
962   }
963   /* Assert we have an updated step and we need at least one more iteration. */
964   /* Calculate the next direction */
965   /* Estimate the active set at the new solution */
966   PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type));
967   /* Compute the projected gradient and its norm */
968   PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient));
969   if (cg->active_idx) PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0));
970   PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
971   gnorm2 = gnorm * gnorm;
972 
973   /* Calculate some quantities used in the StepDirectionUpdate. */
974   PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm));
975   /* Update the step direction. */
976   PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback));
977   ++tao->niter;
978   PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
979 
980   if (cg->cg_type != TAO_BNCG_GD) {
981     /* Figure out which previously active variables became inactive this iteration */
982     PetscCall(ISDestroy(&cg->new_inactives));
983     if (cg->inactive_idx && cg->inactive_old) PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives));
984     /* Selectively reset the CG step those freshly inactive variables */
985     if (cg->new_inactives) {
986       PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step));
987       PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad));
988       PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step));
989       PetscCall(VecScale(cg->inactive_step, -1.0));
990       PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step));
991       PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad));
992     }
993     /* Verify that this is a descent direction */
994     PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
995     PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm));
996     if (PetscIsInfOrNanReal(gd) || (gd / (dnorm * dnorm) <= -1e10 || gd / (dnorm * dnorm) >= -1e-10)) {
997       /* Not a descent direction, so we reset back to projected gradient descent */
998       PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
999       PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
1000       ++cg->descent_error;
1001     } else {
1002     }
1003   }
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
TaoBNCGSetH0(Tao tao,Mat H0)1007 PETSC_INTERN PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0)
1008 {
1009   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
1010   PetscBool same;
1011 
1012   PetscFunctionBegin;
1013   PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same));
1014   if (same) {
1015     PetscCall(PetscObjectReference((PetscObject)H0));
1016     cg->pc = H0;
1017   }
1018   PetscFunctionReturn(PETSC_SUCCESS);
1019 }
1020 
1021 /*@
1022   TaoBNCGGetType - Return the type for the `TAOBNCG` solver
1023 
1024   Input Parameter:
1025 . tao - the `Tao` solver context
1026 
1027   Output Parameter:
1028 . type - `TAOBNCG` type
1029 
1030   Level: advanced
1031 
1032 .seealso: `Tao`, `TAOBNCG`, `TaoBNCGSetType()`, `TaoBNCGType`
1033 @*/
TaoBNCGGetType(Tao tao,TaoBNCGType * type)1034 PetscErrorCode TaoBNCGGetType(Tao tao, TaoBNCGType *type)
1035 {
1036   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
1037   PetscBool same;
1038 
1039   PetscFunctionBegin;
1040   PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same));
1041   PetscCheck(same, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_INCOMP, "TAO solver is not BNCG type");
1042   *type = cg->cg_type;
1043   PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045 
1046 /*@
1047   TaoBNCGSetType - Set the type for the `TAOBNCG` solver
1048 
1049   Input Parameters:
1050 + tao  - the `Tao` solver context
1051 - type - `TAOBNCG` type
1052 
1053   Level: advanced
1054 
1055 .seealso: `Tao`, `TAOBNCG`, `TaoBNCGGetType()`, `TaoBNCGType`
1056 @*/
TaoBNCGSetType(Tao tao,TaoBNCGType type)1057 PetscErrorCode TaoBNCGSetType(Tao tao, TaoBNCGType type)
1058 {
1059   TAO_BNCG *cg = (TAO_BNCG *)tao->data;
1060   PetscBool same;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same));
1064   if (same) cg->cg_type = type;
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067