1 #include <petsctaolinesearch.h>
2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/
3 #include <petscksp.h>
4
5 const char *const TaoBNCGTypes[] = {"gd", "pcgd", "hs", "fr", "prp", "prp_plus", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "TAOBNCGType", "TAO_BNCG_", NULL};
6
7 #define CG_AS_NONE 0
8 #define CG_AS_BERTSEKAS 1
9 #define CG_AS_SIZE 2
10
11 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"};
12
TaoBNCGEstimateActiveSet(Tao tao,PetscInt asType)13 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType)
14 {
15 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
16
17 PetscFunctionBegin;
18 PetscCall(ISDestroy(&cg->inactive_old));
19 if (cg->inactive_idx) {
20 PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old));
21 PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old));
22 }
23 switch (asType) {
24 case CG_AS_NONE:
25 PetscCall(ISDestroy(&cg->inactive_idx));
26 PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx));
27 PetscCall(ISDestroy(&cg->active_idx));
28 PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx));
29 break;
30 case CG_AS_BERTSEKAS:
31 /* Use gradient descent to estimate the active set */
32 PetscCall(VecCopy(cg->unprojected_gradient, cg->W));
33 PetscCall(VecScale(cg->W, -1.0));
34 PetscCall(TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx));
35 break;
36 default:
37 break;
38 }
39 PetscFunctionReturn(PETSC_SUCCESS);
40 }
41
TaoBNCGBoundStep(Tao tao,PetscInt asType,Vec step)42 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step)
43 {
44 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
45
46 PetscFunctionBegin;
47 switch (asType) {
48 case CG_AS_NONE:
49 if (cg->active_idx) PetscCall(VecISSet(step, cg->active_idx, 0.0));
50 break;
51 case CG_AS_BERTSEKAS:
52 PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step));
53 break;
54 default:
55 break;
56 }
57 PetscFunctionReturn(PETSC_SUCCESS);
58 }
59
TaoSolve_BNCG(Tao tao)60 static PetscErrorCode TaoSolve_BNCG(Tao tao)
61 {
62 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
63 PetscReal step = 1.0, gnorm, gnorm2, resnorm;
64 PetscInt nDiff;
65
66 PetscFunctionBegin;
67 /* Project the current point onto the feasible set */
68 PetscCall(TaoComputeVariableBounds(tao));
69 PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch, tao->XL, tao->XU));
70
71 /* Project the initial point onto the feasible region */
72 PetscCall(TaoBoundSolution(tao->solution, tao->XL, tao->XU, 0.0, &nDiff, tao->solution));
73
74 if (nDiff > 0 || !tao->recycle) PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient));
75 PetscCall(VecNorm(cg->unprojected_gradient, NORM_2, &gnorm));
76 PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
77
78 /* Estimate the active set and compute the projected gradient */
79 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type));
80
81 /* Project the gradient and calculate the norm */
82 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient));
83 if (cg->active_idx) PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0));
84 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
85 gnorm2 = gnorm * gnorm;
86
87 /* Initialize counters */
88 tao->niter = 0;
89 cg->ls_fails = cg->descent_error = 0;
90 cg->resets = -1;
91 cg->skipped_updates = cg->pure_gd_steps = 0;
92 cg->iter_quad = 0;
93
94 /* Convergence test at the starting point. */
95 tao->reason = TAO_CONTINUE_ITERATING;
96
97 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W));
98 PetscCall(VecNorm(cg->W, NORM_2, &resnorm));
99 PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
100 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its));
101 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step));
102 PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
103 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
104 /* Calculate initial direction. */
105 if (!tao->recycle) {
106 /* We are not recycling a solution/history from a past TaoSolve */
107 PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
108 }
109 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */
110 while (1) {
111 /* Call general purpose update function */
112 if (tao->ops->update) {
113 PetscUseTypeMethod(tao, update, tao->niter, tao->user_update);
114 PetscCall(TaoComputeObjective(tao, tao->solution, &cg->f));
115 }
116 PetscCall(TaoBNCGConductIteration(tao, gnorm));
117 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 }
120
TaoSetUp_BNCG(Tao tao)121 static PetscErrorCode TaoSetUp_BNCG(Tao tao)
122 {
123 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
124
125 PetscFunctionBegin;
126 if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
127 if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection));
128 if (!cg->W) PetscCall(VecDuplicate(tao->solution, &cg->W));
129 if (!cg->work) PetscCall(VecDuplicate(tao->solution, &cg->work));
130 if (!cg->sk) PetscCall(VecDuplicate(tao->solution, &cg->sk));
131 if (!cg->yk) PetscCall(VecDuplicate(tao->gradient, &cg->yk));
132 if (!cg->X_old) PetscCall(VecDuplicate(tao->solution, &cg->X_old));
133 if (!cg->G_old) PetscCall(VecDuplicate(tao->gradient, &cg->G_old));
134 if (cg->diag_scaling) {
135 PetscCall(VecDuplicate(tao->solution, &cg->d_work));
136 PetscCall(VecDuplicate(tao->solution, &cg->y_work));
137 PetscCall(VecDuplicate(tao->solution, &cg->g_work));
138 }
139 if (!cg->unprojected_gradient) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient));
140 if (!cg->unprojected_gradient_old) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient_old));
141 PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk));
142 if (cg->pc) PetscCall(MatLMVMSetJ0(cg->B, cg->pc));
143 PetscFunctionReturn(PETSC_SUCCESS);
144 }
145
TaoDestroy_BNCG(Tao tao)146 static PetscErrorCode TaoDestroy_BNCG(Tao tao)
147 {
148 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
149
150 PetscFunctionBegin;
151 if (tao->setupcalled) {
152 PetscCall(VecDestroy(&cg->W));
153 PetscCall(VecDestroy(&cg->work));
154 PetscCall(VecDestroy(&cg->X_old));
155 PetscCall(VecDestroy(&cg->G_old));
156 PetscCall(VecDestroy(&cg->unprojected_gradient));
157 PetscCall(VecDestroy(&cg->unprojected_gradient_old));
158 PetscCall(VecDestroy(&cg->g_work));
159 PetscCall(VecDestroy(&cg->d_work));
160 PetscCall(VecDestroy(&cg->y_work));
161 PetscCall(VecDestroy(&cg->sk));
162 PetscCall(VecDestroy(&cg->yk));
163 }
164 PetscCall(ISDestroy(&cg->active_lower));
165 PetscCall(ISDestroy(&cg->active_upper));
166 PetscCall(ISDestroy(&cg->active_fixed));
167 PetscCall(ISDestroy(&cg->active_idx));
168 PetscCall(ISDestroy(&cg->inactive_idx));
169 PetscCall(ISDestroy(&cg->inactive_old));
170 PetscCall(ISDestroy(&cg->new_inactives));
171 PetscCall(MatDestroy(&cg->B));
172 if (cg->pc) PetscCall(MatDestroy(&cg->pc));
173 PetscCall(PetscFree(tao->data));
174 PetscFunctionReturn(PETSC_SUCCESS);
175 }
176
TaoSetFromOptions_BNCG(Tao tao,PetscOptionItems PetscOptionsObject)177 static PetscErrorCode TaoSetFromOptions_BNCG(Tao tao, PetscOptionItems PetscOptionsObject)
178 {
179 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
180
181 PetscFunctionBegin;
182 PetscOptionsHeadBegin(PetscOptionsObject, "Nonlinear Conjugate Gradient method for unconstrained optimization");
183 PetscCall(PetscOptionsEnum("-tao_bncg_type", "CG update formula", "TaoBNCGTypes", TaoBNCGTypes, (PetscEnum)cg->cg_type, (PetscEnum *)&cg->cg_type, NULL));
184 if (cg->cg_type != TAO_BNCG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */
185 if (TAO_BNCG_GD == cg->cg_type) {
186 cg->cg_type = TAO_BNCG_PCGD;
187 /* Set scaling equal to none or, at best, scalar scaling. */
188 cg->unscaled_restart = PETSC_TRUE;
189 cg->diag_scaling = PETSC_FALSE;
190 }
191 PetscCall(PetscOptionsReal("-tao_bncg_hz_eta", "(developer) cutoff tolerance for HZ", "", cg->hz_eta, &cg->hz_eta, NULL));
192 PetscCall(PetscOptionsReal("-tao_bncg_eps", "(developer) cutoff value for restarts", "", cg->epsilon, &cg->epsilon, NULL));
193 PetscCall(PetscOptionsReal("-tao_bncg_dk_eta", "(developer) cutoff tolerance for DK", "", cg->dk_eta, &cg->dk_eta, NULL));
194 PetscCall(PetscOptionsReal("-tao_bncg_xi", "(developer) Parameter in the KD method", "", cg->xi, &cg->xi, NULL));
195 PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL));
196 PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL));
197 PetscCall(PetscOptionsReal("-tao_bncg_alpha", "(developer) parameter for the scalar scaling", "", cg->alpha, &cg->alpha, NULL));
198 PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL));
199 PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL));
200 PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling", "Enable diagonal Broyden-like preconditioning", "", cg->diag_scaling, &cg->diag_scaling, NULL));
201 PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart", "(developer) use dynamic restarts as in HZ, DK, KD", "", cg->use_dynamic_restart, &cg->use_dynamic_restart, NULL));
202 PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart", "(developer) use unscaled gradient restarts", "", cg->unscaled_restart, &cg->unscaled_restart, NULL));
203 PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL));
204 PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL));
205 PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL));
206 PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart", "(developer) Enable regular steepest descent restarting every fixed number of iterations", "", cg->spaced_restart, &cg->spaced_restart, NULL));
207 PetscCall(PetscOptionsBool("-tao_bncg_no_scaling", "Disable all scaling except in restarts", "", cg->no_scaling, &cg->no_scaling, NULL));
208 if (cg->no_scaling) {
209 cg->diag_scaling = PETSC_FALSE;
210 cg->alpha = -1.0;
211 }
212 if (cg->alpha == -1.0 && cg->cg_type == TAO_BNCG_KD && !cg->diag_scaling) { /* Some more default options that appear to be good. */
213 cg->neg_xi = PETSC_TRUE;
214 }
215 PetscCall(PetscOptionsBool("-tao_bncg_neg_xi", "(developer) Use negative xi when it might be a smaller descent direction than necessary", "", cg->neg_xi, &cg->neg_xi, NULL));
216 PetscCall(PetscOptionsEList("-tao_bncg_as_type", "active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->as_type], &cg->as_type, NULL));
217 PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables", "", cg->as_tol, &cg->as_tol, NULL));
218 PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables", "", cg->as_step, &cg->as_step, NULL));
219 PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts", "", cg->delta_min, &cg->delta_min, NULL));
220 PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts", "", cg->delta_max, &cg->delta_max, NULL));
221
222 PetscOptionsHeadEnd();
223 PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix));
224 PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_"));
225 PetscCall(MatSetFromOptions(cg->B));
226 PetscFunctionReturn(PETSC_SUCCESS);
227 }
228
TaoView_BNCG(Tao tao,PetscViewer viewer)229 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer)
230 {
231 PetscBool isascii;
232 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
233
234 PetscFunctionBegin;
235 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
236 if (isascii) {
237 PetscCall(PetscViewerASCIIPushTab(viewer));
238 PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", TaoBNCGTypes[cg->cg_type]));
239 PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %" PetscInt_FMT "\n", cg->skipped_updates));
240 PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %" PetscInt_FMT "\n", cg->resets));
241 PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %" PetscInt_FMT "\n", cg->pure_gd_steps));
242 PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %" PetscInt_FMT "\n", cg->descent_error));
243 PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %" PetscInt_FMT "\n", cg->ls_fails));
244 if (cg->diag_scaling) {
245 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
246 if (isascii) {
247 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
248 PetscCall(MatView(cg->B, viewer));
249 PetscCall(PetscViewerPopFormat(viewer));
250 }
251 }
252 PetscCall(PetscViewerASCIIPopTab(viewer));
253 }
254 PetscFunctionReturn(PETSC_SUCCESS);
255 }
256
TaoBNCGComputeScalarScaling(PetscReal yty,PetscReal yts,PetscReal sts,PetscReal * scale,PetscReal alpha)257 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha)
258 {
259 PetscReal a, b, c, sig1, sig2;
260
261 PetscFunctionBegin;
262 *scale = 0.0;
263 if (1.0 == alpha) *scale = yts / yty;
264 else if (0.0 == alpha) *scale = sts / yts;
265 else if (-1.0 == alpha) *scale = 1.0;
266 else {
267 a = yty;
268 b = yts;
269 c = sts;
270 a *= alpha;
271 b *= -(2.0 * alpha - 1.0);
272 c *= alpha - 1.0;
273 sig1 = (-b + PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a);
274 sig2 = (-b - PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a);
275 /* accept the positive root as the scalar */
276 if (sig1 > 0.0) *scale = sig1;
277 else if (sig2 > 0.0) *scale = sig2;
278 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar");
279 }
280 PetscFunctionReturn(PETSC_SUCCESS);
281 }
282
283 /*MC
284 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method.
285
286 Options Database Keys:
287 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled)
288 . -tao_bncg_eta <r> - restart tolerance
289 . -tao_bncg_type <taocg_type> - cg formula
290 . -tao_bncg_as_type <none,bertsekas> - active set estimation method
291 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation
292 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation
293 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision.
294 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method
295 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
296 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
297 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method
298 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method
299 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method
300 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method
301 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True.
302 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods
303 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts
304 . -tao_bncg_zeta <r> - Scaling parameter in the KD method
305 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps
306 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps
307 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart
308 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem
309 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations
310 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults.
311 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions
312
313 Notes:
314 CG formulas are:
315 + "gd" - Gradient Descent
316 . "fr" - Fletcher-Reeves
317 . "pr" - Polak-Ribiere-Polyak
318 . "prp" - Polak-Ribiere-Plus
319 . "hs" - Hestenes-Steifel
320 . "dy" - Dai-Yuan
321 . "ssml_bfgs" - Self-Scaling Memoryless BFGS
322 . "ssml_dfp" - Self-Scaling Memoryless DFP
323 . "ssml_brdn" - Self-Scaling Memoryless Broyden
324 . "hz" - Hager-Zhang (CG_DESCENT 5.3)
325 . "dk" - Dai-Kou (2013)
326 - "kd" - Kou-Dai (2015)
327
328 Level: beginner
329 M*/
330
TaoCreate_BNCG(Tao tao)331 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao)
332 {
333 TAO_BNCG *cg;
334 const char *morethuente_type = TAOLINESEARCHMT;
335
336 PetscFunctionBegin;
337 tao->ops->setup = TaoSetUp_BNCG;
338 tao->ops->solve = TaoSolve_BNCG;
339 tao->ops->view = TaoView_BNCG;
340 tao->ops->setfromoptions = TaoSetFromOptions_BNCG;
341 tao->ops->destroy = TaoDestroy_BNCG;
342
343 /* Override default settings (unless already changed) */
344 PetscCall(TaoParametersInitialize(tao));
345 PetscObjectParameterSetDefault(tao, max_it, 2000);
346 PetscObjectParameterSetDefault(tao, max_funcs, 4000);
347
348 /* Note: nondefault values should be used for nonlinear conjugate gradient */
349 /* method. In particular, gtol should be less than 0.5; the value used in */
350 /* Nocedal and Wright is 0.10. We use the default values for the */
351 /* linesearch because it seems to work better. */
352 PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch));
353 PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1));
354 PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type));
355 PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao));
356
357 PetscCall(PetscNew(&cg));
358 tao->data = (void *)cg;
359 PetscCall(KSPInitializePackage());
360 PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B));
361 PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1));
362 PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN));
363
364 cg->pc = NULL;
365
366 cg->dk_eta = 0.5;
367 cg->hz_eta = 0.4;
368 cg->dynamic_restart = PETSC_FALSE;
369 cg->unscaled_restart = PETSC_FALSE;
370 cg->no_scaling = PETSC_FALSE;
371 cg->delta_min = 1e-7;
372 cg->delta_max = 100;
373 cg->theta = 1.0;
374 cg->hz_theta = 1.0;
375 cg->dfp_scale = 1.0;
376 cg->bfgs_scale = 1.0;
377 cg->zeta = 0.1;
378 cg->min_quad = 6;
379 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/
380 cg->xi = 1.0;
381 cg->neg_xi = PETSC_TRUE;
382 cg->spaced_restart = PETSC_FALSE;
383 cg->tol_quad = 1e-8;
384 cg->as_step = 0.001;
385 cg->as_tol = 0.001;
386 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0 / 3.0); /* Just a little tighter*/
387 cg->as_type = CG_AS_BERTSEKAS;
388 cg->cg_type = TAO_BNCG_SSML_BFGS;
389 cg->alpha = 1.0;
390 cg->diag_scaling = PETSC_TRUE;
391 PetscFunctionReturn(PETSC_SUCCESS);
392 }
393
TaoBNCGResetUpdate(Tao tao,PetscReal gnormsq)394 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq)
395 {
396 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
397 PetscReal scaling;
398
399 PetscFunctionBegin;
400 ++cg->resets;
401 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23);
402 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling));
403 if (cg->unscaled_restart) {
404 scaling = 1.0;
405 ++cg->pure_gd_steps;
406 }
407 PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient));
408 /* Also want to reset our diagonal scaling with each restart */
409 if (cg->diag_scaling) PetscCall(MatLMVMReset(cg->B, PETSC_FALSE));
410 PetscFunctionReturn(PETSC_SUCCESS);
411 }
412
TaoBNCGCheckDynamicRestart(Tao tao,PetscReal stepsize,PetscReal gd,PetscReal gd_old,PetscBool * dynrestart,PetscReal fold)413 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold)
414 {
415 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
416 PetscReal quadinterp;
417
418 PetscFunctionBegin;
419 if (cg->f < cg->min_quad / 10) {
420 *dynrestart = PETSC_FALSE;
421 PetscFunctionReturn(PETSC_SUCCESS);
422 } /* just skip this since this strategy doesn't work well for functions near zero */
423 quadinterp = 2.0 * (cg->f - fold) / (stepsize * (gd + gd_old));
424 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad;
425 else {
426 cg->iter_quad = 0;
427 *dynrestart = PETSC_FALSE;
428 }
429 if (cg->iter_quad >= cg->min_quad) {
430 cg->iter_quad = 0;
431 *dynrestart = PETSC_TRUE;
432 }
433 PetscFunctionReturn(PETSC_SUCCESS);
434 }
435
TaoBNCGStepDirectionUpdate(Tao tao,PetscReal gnorm2,PetscReal step,PetscReal fold,PetscReal gnorm2_old,PetscReal dnorm,PetscBool pcgd_fallback)436 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback)
437 {
438 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
439 PetscReal gamma = 1.0, tau_k, beta;
440 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk = 1.0, gd;
441 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg;
442 PetscInt dim;
443 PetscBool cg_restart = PETSC_FALSE;
444
445 PetscFunctionBegin;
446 /* Local curvature check to see if we need to restart */
447 if (tao->niter >= 1 || tao->recycle) {
448 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
449 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm));
450 ynorm2 = ynorm * ynorm;
451 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk));
452 if (step * dnorm < PETSC_MACHINE_EPSILON || step * dk_yk < PETSC_MACHINE_EPSILON) {
453 cg_restart = PETSC_TRUE;
454 ++cg->skipped_updates;
455 }
456 if (cg->spaced_restart) {
457 PetscCall(VecGetSize(tao->gradient, &dim));
458 if (tao->niter % (dim * cg->min_restart_num)) cg_restart = PETSC_TRUE;
459 }
460 }
461 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */
462 if (cg->spaced_restart) {
463 PetscCall(VecGetSize(tao->gradient, &dim));
464 if (0 == tao->niter % (6 * dim)) cg_restart = PETSC_TRUE;
465 }
466 /* Compute the diagonal scaling vector if applicable */
467 if (cg->diag_scaling) PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient));
468
469 /* A note on diagonal scaling (to be added to paper):
470 For the FR, PR, PRP, and DY methods, the diagonally scaled versions
471 must be derived as a preconditioned CG method rather than as
472 a Hessian initialization like in the Broyden methods. */
473
474 /* In that case, one writes the objective function as
475 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k).
476 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to
477 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the
478 same under preconditioning. Note that A is diagonal, such that A^T = A. */
479
480 /* This yields questions like what the dot product d_k^T y_k
481 should look like. HZ mistakenly treats that as the same under
482 preconditioning, but that is not necessarily true. */
483
484 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation,
485 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}),
486 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is
487 NOT the same if our matrix used to construct the preconditioner is updated between iterations.
488 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */
489
490 /* Compute CG step direction */
491 if (cg_restart) {
492 PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
493 } else if (pcgd_fallback) {
494 /* Just like preconditioned CG */
495 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
496 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work));
497 } else if (ynorm2 > PETSC_MACHINE_EPSILON) {
498 switch (cg->cg_type) {
499 case TAO_BNCG_PCGD:
500 if (!cg->diag_scaling) {
501 if (!cg->no_scaling) {
502 cg->sts = step * step * dnorm * dnorm;
503 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha));
504 } else {
505 tau_k = 1.0;
506 ++cg->pure_gd_steps;
507 }
508 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient));
509 } else {
510 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
511 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work));
512 }
513 break;
514
515 case TAO_BNCG_HS:
516 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */
517 if (!cg->diag_scaling) {
518 cg->sts = step * step * dnorm * dnorm;
519 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
520 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha));
521 beta = tau_k * gkp1_yk / dk_yk;
522 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
523 } else {
524 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
525 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk));
526 beta = gkp1_yk / dk_yk;
527 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
528 }
529 break;
530
531 case TAO_BNCG_FR:
532 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
533 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
534 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm));
535 ynorm2 = ynorm * ynorm;
536 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk));
537 if (!cg->diag_scaling) {
538 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, step * step * dnorm * dnorm, &tau_k, cg->alpha));
539 beta = tau_k * gnorm2 / gnorm2_old;
540 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
541 } else {
542 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */
543 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
544 PetscCall(VecDot(tao->gradient, cg->g_work, &tmp));
545 beta = tmp / gnorm2_old;
546 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
547 }
548 break;
549
550 case TAO_BNCG_PRP:
551 snorm = step * dnorm;
552 if (!cg->diag_scaling) {
553 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
554 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
555 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha));
556 beta = tau_k * gkp1_yk / gnorm2_old;
557 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
558 } else {
559 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old));
560 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
561 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
562 beta = gkp1_yk / gnorm2_old;
563 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
564 }
565 break;
566
567 case TAO_BNCG_PRP_PLUS:
568 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
569 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm));
570 ynorm2 = ynorm * ynorm;
571 if (!cg->diag_scaling) {
572 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
573 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
574 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha));
575 beta = tau_k * gkp1_yk / gnorm2_old;
576 beta = PetscMax(beta, 0.0);
577 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
578 } else {
579 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */
580 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
581 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
582 beta = gkp1_yk / gnorm2_old;
583 beta = PetscMax(beta, 0.0);
584 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
585 }
586 break;
587
588 case TAO_BNCG_DY:
589 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property."
590 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */
591 if (!cg->diag_scaling) {
592 PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd));
593 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
594 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->yts, &tau_k, cg->alpha));
595 beta = tau_k * gnorm2 / (gd - gd_old);
596 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
597 } else {
598 PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work));
599 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
600 PetscCall(VecDot(cg->g_work, tao->gradient, >Dg));
601 PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old));
602 PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk));
603 dk_yk = dk_yk - gd_old;
604 beta = gtDg / dk_yk;
605 PetscCall(VecScale(cg->d_work, beta));
606 PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work));
607 }
608 break;
609
610 case TAO_BNCG_HZ:
611 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent."
612 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */
613 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
614 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
615 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
616 snorm = dnorm * step;
617 cg->yts = step * dk_yk;
618 if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold));
619 if (cg->dynamic_restart) {
620 PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
621 } else {
622 if (!cg->diag_scaling) {
623 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
624 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha));
625 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */
626 tmp = gd / dk_yk;
627 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk));
628 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */
629 beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm));
630 /* d <- -t*g + beta*t*d */
631 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
632 } else {
633 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
634 cg->yty = ynorm2;
635 cg->sts = snorm * snorm;
636 /* Apply the diagonal scaling to all my vectors */
637 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
638 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
639 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work));
640 /* Construct the constant ytDgkp1 */
641 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk));
642 /* Construct the constant for scaling Dkyk in the update */
643 tmp = gd / dk_yk;
644 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k));
645 tau_k = -tau_k * gd / (dk_yk * dk_yk);
646 /* beta is the constant which adds the dk contribution */
647 beta = gkp1_yk / dk_yk + cg->hz_theta * tau_k; /* HZ; (1.15) from DK 2013 */
648 /* From HZ2013, modified to account for diagonal scaling*/
649 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old));
650 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd));
651 beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm));
652 /* Do the update */
653 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
654 }
655 }
656 break;
657
658 case TAO_BNCG_DK:
659 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search."
660 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */
661 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
662 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
663 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
664 snorm = step * dnorm;
665 cg->yts = dk_yk * step;
666 if (!cg->diag_scaling) {
667 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
668 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha));
669 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */
670 tmp = gd / dk_yk;
671 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk) + gd / (dnorm * dnorm)) - step * gd / dk_yk;
672 beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm));
673 /* d <- -t*g + beta*t*d */
674 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk));
675 } else {
676 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
677 cg->yty = ynorm2;
678 cg->sts = snorm * snorm;
679 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
680 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
681 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work));
682 /* Construct the constant ytDgkp1 */
683 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk));
684 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k));
685 tau_k = tau_k * gd / (dk_yk * dk_yk);
686 tmp = gd / dk_yk;
687 /* beta is the constant which adds the dk contribution */
688 beta = gkp1_yk / dk_yk - step * tmp - tau_k;
689 /* Update this for the last term in beta */
690 PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk));
691 beta += tmp * dk_yk / (dnorm * dnorm); /* projection of y_work onto dk */
692 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd));
693 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old));
694 beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm));
695 /* Do the update */
696 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
697 }
698 break;
699
700 case TAO_BNCG_KD:
701 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization."
702 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */
703 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
704 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
705 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
706 snorm = step * dnorm;
707 cg->yts = dk_yk * step;
708 if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold));
709 if (cg->dynamic_restart) {
710 PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
711 } else {
712 if (!cg->diag_scaling) {
713 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
714 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha));
715 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)) - step * gd / dk_yk;
716 if (beta < cg->zeta * tau_k * gd / (dnorm * dnorm)) /* 0.1 is KD's zeta parameter */
717 {
718 beta = cg->zeta * tau_k * gd / (dnorm * dnorm);
719 gamma = 0.0;
720 } else {
721 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0 * gd / dk_yk;
722 /* This seems to be very effective when there's no tau_k scaling.
723 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */
724 else gamma = cg->xi * gd / dk_yk;
725 }
726 /* d <- -t*g + beta*t*d + t*tmp*yk */
727 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma * tau_k, beta, tao->gradient, cg->yk));
728 } else {
729 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
730 cg->yty = ynorm2;
731 cg->sts = snorm * snorm;
732 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
733 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
734 /* Construct the constant ytDgkp1 */
735 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk));
736 /* Construct the constant for scaling Dkyk in the update */
737 gamma = gd / dk_yk;
738 /* tau_k = -ytDy/(ytd)^2 * gd */
739 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k));
740 tau_k = tau_k * gd / (dk_yk * dk_yk);
741 /* beta is the constant which adds the d_k contribution */
742 beta = gkp1D_yk / dk_yk - step * gamma - tau_k;
743 /* Here is the requisite check */
744 PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp));
745 if (cg->neg_xi) {
746 /* modified KD implementation */
747 if (gkp1D_yk / dk_yk < 0) gamma = -1.0 * gd / dk_yk;
748 else gamma = cg->xi * gd / dk_yk;
749 if (beta < cg->zeta * tmp / (dnorm * dnorm)) {
750 beta = cg->zeta * tmp / (dnorm * dnorm);
751 gamma = 0.0;
752 }
753 } else { /* original KD 2015 implementation */
754 if (beta < cg->zeta * tmp / (dnorm * dnorm)) {
755 beta = cg->zeta * tmp / (dnorm * dnorm);
756 gamma = 0.0;
757 } else gamma = cg->xi * gd / dk_yk;
758 }
759 /* Do the update in two steps */
760 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
761 PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work));
762 }
763 }
764 break;
765
766 case TAO_BNCG_SSML_BFGS:
767 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory."
768 Discussion Papers 269 (1977). */
769 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
770 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
771 snorm = step * dnorm;
772 cg->yts = dk_yk * step;
773 cg->yty = ynorm2;
774 cg->sts = snorm * snorm;
775 if (!cg->diag_scaling) {
776 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
777 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha));
778 tmp = gd / dk_yk;
779 beta = tau_k * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * tmp;
780 /* d <- -t*g + beta*t*d + t*tmp*yk */
781 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp * tau_k, beta, tao->gradient, cg->yk));
782 } else {
783 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
784 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
785 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
786 /* compute scalar gamma */
787 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
788 PetscCall(VecDot(cg->y_work, cg->yk, &tmp));
789 gamma = gd / dk_yk;
790 /* Compute scalar beta */
791 beta = (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk;
792 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
793 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
794 }
795 break;
796
797 case TAO_BNCG_SSML_DFP:
798 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
799 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
800 snorm = step * dnorm;
801 cg->yts = dk_yk * step;
802 cg->yty = ynorm2;
803 cg->sts = snorm * snorm;
804 if (!cg->diag_scaling) {
805 /* Instead of a regular convex combination, we will solve a quadratic formula. */
806 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha));
807 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
808 tau_k = cg->dfp_scale * tau_k;
809 tmp = tau_k * gkp1_yk / cg->yty;
810 beta = -step * gd / dk_yk;
811 /* d <- -t*g + beta*d + tmp*yk */
812 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk));
813 } else {
814 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */
815 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
816 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
817 /* compute scalar gamma */
818 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
819 PetscCall(VecDot(cg->y_work, cg->yk, &tmp));
820 gamma = (gkp1_yk / tmp);
821 /* Compute scalar beta */
822 beta = -step * gd / dk_yk;
823 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
824 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
825 }
826 break;
827
828 case TAO_BNCG_SSML_BRDN:
829 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
830 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
831 snorm = step * dnorm;
832 cg->yts = step * dk_yk;
833 cg->yty = ynorm2;
834 cg->sts = snorm * snorm;
835 if (!cg->diag_scaling) {
836 /* Instead of a regular convex combination, we will solve a quadratic formula. */
837 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_bfgs, cg->bfgs_scale));
838 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_dfp, cg->dfp_scale));
839 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
840 tau_k = cg->theta * tau_bfgs + (1.0 - cg->theta) * tau_dfp;
841 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0,
842 it should reproduce the tau_dfp scaling. Same with dfp_scale. */
843 tmp = cg->theta * tau_bfgs * gd / dk_yk + (1 - cg->theta) * tau_dfp * gkp1_yk / cg->yty;
844 beta = cg->theta * tau_bfgs * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * gd / dk_yk;
845 /* d <- -t*g + beta*d + tmp*yk */
846 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk));
847 } else {
848 /* We have diagonal scaling enabled */
849 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
850 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
851 /* compute scalar gamma */
852 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
853 PetscCall(VecDot(cg->y_work, cg->yk, &tmp));
854 gamma = cg->theta * gd / dk_yk + (1 - cg->theta) * (gkp1_yk / tmp);
855 /* Compute scalar beta */
856 beta = cg->theta * (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk;
857 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
858 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
859 }
860 break;
861
862 default:
863 break;
864 }
865 }
866 PetscFunctionReturn(PETSC_SUCCESS);
867 }
868
TaoBNCGConductIteration(Tao tao,PetscReal gnorm)869 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm)
870 {
871 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
872 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING;
873 PetscReal step = 1.0, gnorm2, gd, dnorm = 0.0;
874 PetscReal gnorm2_old, f_old, resnorm, gnorm_old;
875 PetscBool pcgd_fallback = PETSC_FALSE;
876
877 PetscFunctionBegin;
878 /* We are now going to perform a line search along the direction. */
879 /* Store solution and gradient info before it changes */
880 PetscCall(VecCopy(tao->solution, cg->X_old));
881 PetscCall(VecCopy(tao->gradient, cg->G_old));
882 PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old));
883
884 gnorm_old = gnorm;
885 gnorm2_old = gnorm_old * gnorm_old;
886 f_old = cg->f;
887 /* Perform bounded line search. If we are recycling a solution from a previous */
888 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */
889 if (!(tao->recycle && 0 == tao->niter)) {
890 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */
891 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
892 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
893 PetscCall(TaoAddLineSearchCounts(tao));
894
895 /* Check linesearch failure */
896 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
897 ++cg->ls_fails;
898 if (cg->cg_type == TAO_BNCG_GD) {
899 /* Nothing left to do but fail out of the optimization */
900 step = 0.0;
901 tao->reason = TAO_DIVERGED_LS_FAILURE;
902 } else {
903 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */
904 PetscCall(VecCopy(cg->X_old, tao->solution));
905 PetscCall(VecCopy(cg->G_old, tao->gradient));
906 PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient));
907 gnorm = gnorm_old;
908 gnorm2 = gnorm2_old;
909 cg->f = f_old;
910
911 /* Fall back on preconditioned CG (so long as you're not already using it) */
912 if (cg->cg_type != TAO_BNCG_PCGD && cg->diag_scaling) {
913 pcgd_fallback = PETSC_TRUE;
914 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback));
915
916 PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
917 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
918
919 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
920 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
921 PetscCall(TaoAddLineSearchCounts(tao));
922
923 pcgd_fallback = PETSC_FALSE;
924 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
925 /* Going to perform a regular gradient descent step. */
926 ++cg->ls_fails;
927 step = 0.0;
928 }
929 }
930 /* Fall back on the scaled gradient step */
931 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
932 ++cg->ls_fails;
933 PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
934 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
935 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
936 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
937 PetscCall(TaoAddLineSearchCounts(tao));
938 }
939
940 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
941 /* Nothing left to do but fail out of the optimization */
942 ++cg->ls_fails;
943 step = 0.0;
944 tao->reason = TAO_DIVERGED_LS_FAILURE;
945 } else {
946 /* One of the fallbacks worked. Set them both back equal to false. */
947 pcgd_fallback = PETSC_FALSE;
948 }
949 }
950 }
951 /* Convergence test for line search failure */
952 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
953
954 /* Standard convergence test */
955 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W));
956 PetscCall(VecNorm(cg->W, NORM_2, &resnorm));
957 PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
958 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its));
959 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step));
960 PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
961 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
962 }
963 /* Assert we have an updated step and we need at least one more iteration. */
964 /* Calculate the next direction */
965 /* Estimate the active set at the new solution */
966 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type));
967 /* Compute the projected gradient and its norm */
968 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient));
969 if (cg->active_idx) PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0));
970 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
971 gnorm2 = gnorm * gnorm;
972
973 /* Calculate some quantities used in the StepDirectionUpdate. */
974 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm));
975 /* Update the step direction. */
976 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback));
977 ++tao->niter;
978 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
979
980 if (cg->cg_type != TAO_BNCG_GD) {
981 /* Figure out which previously active variables became inactive this iteration */
982 PetscCall(ISDestroy(&cg->new_inactives));
983 if (cg->inactive_idx && cg->inactive_old) PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives));
984 /* Selectively reset the CG step those freshly inactive variables */
985 if (cg->new_inactives) {
986 PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step));
987 PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad));
988 PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step));
989 PetscCall(VecScale(cg->inactive_step, -1.0));
990 PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step));
991 PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad));
992 }
993 /* Verify that this is a descent direction */
994 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
995 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm));
996 if (PetscIsInfOrNanReal(gd) || (gd / (dnorm * dnorm) <= -1e10 || gd / (dnorm * dnorm) >= -1e-10)) {
997 /* Not a descent direction, so we reset back to projected gradient descent */
998 PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
999 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
1000 ++cg->descent_error;
1001 } else {
1002 }
1003 }
1004 PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006
TaoBNCGSetH0(Tao tao,Mat H0)1007 PETSC_INTERN PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0)
1008 {
1009 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
1010 PetscBool same;
1011
1012 PetscFunctionBegin;
1013 PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same));
1014 if (same) {
1015 PetscCall(PetscObjectReference((PetscObject)H0));
1016 cg->pc = H0;
1017 }
1018 PetscFunctionReturn(PETSC_SUCCESS);
1019 }
1020
1021 /*@
1022 TaoBNCGGetType - Return the type for the `TAOBNCG` solver
1023
1024 Input Parameter:
1025 . tao - the `Tao` solver context
1026
1027 Output Parameter:
1028 . type - `TAOBNCG` type
1029
1030 Level: advanced
1031
1032 .seealso: `Tao`, `TAOBNCG`, `TaoBNCGSetType()`, `TaoBNCGType`
1033 @*/
TaoBNCGGetType(Tao tao,TaoBNCGType * type)1034 PetscErrorCode TaoBNCGGetType(Tao tao, TaoBNCGType *type)
1035 {
1036 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
1037 PetscBool same;
1038
1039 PetscFunctionBegin;
1040 PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same));
1041 PetscCheck(same, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_INCOMP, "TAO solver is not BNCG type");
1042 *type = cg->cg_type;
1043 PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045
1046 /*@
1047 TaoBNCGSetType - Set the type for the `TAOBNCG` solver
1048
1049 Input Parameters:
1050 + tao - the `Tao` solver context
1051 - type - `TAOBNCG` type
1052
1053 Level: advanced
1054
1055 .seealso: `Tao`, `TAOBNCG`, `TaoBNCGGetType()`, `TaoBNCGType`
1056 @*/
TaoBNCGSetType(Tao tao,TaoBNCGType type)1057 PetscErrorCode TaoBNCGSetType(Tao tao, TaoBNCGType type)
1058 {
1059 TAO_BNCG *cg = (TAO_BNCG *)tao->data;
1060 PetscBool same;
1061
1062 PetscFunctionBegin;
1063 PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same));
1064 if (same) cg->cg_type = type;
1065 PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067