xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision 37eeb8152ec6a2cf24186d3591c2c5de5dfd8fa5)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
21 {
22   PetscErrorCode ierr;
23 
24   PetscFunctionBegin;
25   ierr = PetscNew(p);CHKERRQ(ierr);
26   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
27   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
28   (*p)->next = NULL;
29   PetscFunctionReturn(0);
30 }
31 
32 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
33 {
34   PetscErrorCode ierr;
35   Vec_Chain      *p = head->next, *q;
36 
37   PetscFunctionBegin;
38   while(p) {
39     q = p->next;
40     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
41     ierr = PetscFree(p);CHKERRQ(ierr);
42     p = q;
43   }
44   head->next = NULL;
45   PetscFunctionReturn(0);
46 }
47 
48 
49 static PetscErrorCode TaoSolve_BMRM(Tao tao)
50 {
51   PetscErrorCode     ierr;
52   TAO_DF             df;
53   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
54 
55   /* Values and pointers to parts of the optimization problem */
56   PetscReal          f = 0.0;
57   Vec                W = tao->solution;
58   Vec                G = tao->gradient;
59   PetscReal          lambda;
60   PetscReal          bt;
61   Vec_Chain          grad_list, *tail_glist, *pgrad;
62   PetscInt           i;
63   PetscMPIInt        rank;
64 
65   /* Used in converged criteria check */
66   PetscReal          reg;
67   PetscReal          jtwt = 0.0, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
68   PetscReal          innerSolverTol;
69   MPI_Comm           comm;
70 
71   PetscFunctionBegin;
72   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
73   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
74   lambda = bmrm->lambda;
75 
76   /* Check Stopping Condition */
77   tao->step = 1.0;
78   max_jtwt = -BMRM_INFTY;
79   min_jw = BMRM_INFTY;
80   innerSolverTol = 1.0;
81   epsilon = 0.0;
82 
83   if (!rank) {
84     ierr = init_df_solver(&df);CHKERRQ(ierr);
85     grad_list.next = NULL;
86     tail_glist = &grad_list;
87   }
88 
89   df.tol = 1e-6;
90   tao->reason = TAO_CONTINUE_ITERATING;
91 
92   /*-----------------Algorithm Begins------------------------*/
93   /* make the scatter */
94   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
95   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
96   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
97 
98   /* NOTE: In application pass the sub-gradient of Remp(W) */
99   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
100   ierr = TaoLogConvergenceHistory(tao,f,1.0,0.0,tao->ksp_its);CHKERRQ(ierr);
101   ierr = TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step);CHKERRQ(ierr);
102   ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
103 
104   while (tao->reason == TAO_CONTINUE_ITERATING) {
105     /* Call general purpose update function */
106     if (tao->ops->update) {
107       ierr = (*tao->ops->update)(tao, tao->niter, tao->user_update);CHKERRQ(ierr);
108     }
109 
110     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
111     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
112     bt = f - bt;
113 
114     /* First gather the gradient to the master node */
115     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
116     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
117 
118     /* Bring up the inner solver */
119     if (!rank) {
120       ierr = ensure_df_space(tao->niter+1, &df);CHKERRQ(ierr);
121       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
122       tail_glist->next = pgrad;
123       tail_glist = pgrad;
124 
125       df.a[tao->niter] = 1.0;
126       df.f[tao->niter] = -bt;
127       df.u[tao->niter] = 1.0;
128       df.l[tao->niter] = 0.0;
129 
130       /* set up the Q */
131       pgrad = grad_list.next;
132       for (i=0; i<=tao->niter; i++) {
133         if (!pgrad) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Assert that there are at least tao->niter+1 pgrad available");
134         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
135         df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
136         pgrad = pgrad->next;
137       }
138 
139       if (tao->niter > 0) {
140         df.x[tao->niter] = 0.0;
141         ierr = solve(&df);CHKERRQ(ierr);
142       } else
143         df.x[0] = 1.0;
144 
145       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
146       jtwt = 0.0;
147       ierr = VecSet(bmrm->local_w, 0.0);CHKERRQ(ierr);
148       pgrad = grad_list.next;
149       for (i=0; i<=tao->niter; i++) {
150         jtwt -= df.x[i] * df.f[i];
151         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V);CHKERRQ(ierr);
152         pgrad = pgrad->next;
153       }
154 
155       ierr = VecNorm(bmrm->local_w, NORM_2, &reg);CHKERRQ(ierr);
156       reg = 0.5*lambda*reg*reg;
157       jtwt -= reg;
158     } /* end if rank == 0 */
159 
160     /* scatter the new W to all nodes */
161     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
162     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
163 
164     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
165 
166     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
167     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
168 
169     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
170     if (jw < min_jw) min_jw = jw;
171     if (jtwt > max_jtwt) max_jtwt = jtwt;
172 
173     pre_epsilon = epsilon;
174     epsilon = min_jw - jtwt;
175 
176     if (!rank) {
177       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
178       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
179 
180       /* if the annealing doesn't work well, lower the inner solver tolerance */
181       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
182 
183       df.tol = innerSolverTol*0.5;
184     }
185 
186     tao->niter++;
187     ierr = TaoLogConvergenceHistory(tao,min_jw,epsilon,0.0,tao->ksp_its);CHKERRQ(ierr);
188     ierr = TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step);CHKERRQ(ierr);
189     ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
190   }
191 
192   /* free all the memory */
193   if (!rank) {
194     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
195     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
196   }
197 
198   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
199   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 
203 
204 /* ---------------------------------------------------------- */
205 
206 static PetscErrorCode TaoSetup_BMRM(Tao tao)
207 {
208 
209   PetscErrorCode ierr;
210 
211   PetscFunctionBegin;
212   /* Allocate some arrays */
213   if (!tao->gradient) {
214     ierr = VecDuplicate(tao->solution, &tao->gradient);CHKERRQ(ierr);
215   }
216   PetscFunctionReturn(0);
217 }
218 
219 /*------------------------------------------------------------*/
220 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
221 {
222   PetscErrorCode ierr;
223 
224   PetscFunctionBegin;
225   ierr = PetscFree(tao->data);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 static PetscErrorCode TaoSetFromOptions_BMRM(PetscOptionItems *PetscOptionsObject,Tao tao)
230 {
231   PetscErrorCode ierr;
232   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
233 
234   PetscFunctionBegin;
235   ierr = PetscOptionsHead(PetscOptionsObject,"BMRM for regularized risk minimization");CHKERRQ(ierr);
236   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL);CHKERRQ(ierr);
237   ierr = PetscOptionsTail();CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240 
241 /*------------------------------------------------------------*/
242 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
243 {
244   PetscBool      isascii;
245   PetscErrorCode ierr;
246 
247   PetscFunctionBegin;
248   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
249   if (isascii) {
250     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
251     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
252   }
253   PetscFunctionReturn(0);
254 }
255 
256 /*------------------------------------------------------------*/
257 /*MC
258   TAOBMRM - bundle method for regularized risk minimization
259 
260   Options Database Keys:
261 . - tao_bmrm_lambda - regulariser weight
262 
263   Level: beginner
264 M*/
265 
266 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
267 {
268   TAO_BMRM       *bmrm;
269   PetscErrorCode ierr;
270 
271   PetscFunctionBegin;
272   tao->ops->setup = TaoSetup_BMRM;
273   tao->ops->solve = TaoSolve_BMRM;
274   tao->ops->view  = TaoView_BMRM;
275   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
276   tao->ops->destroy = TaoDestroy_BMRM;
277 
278   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
279   bmrm->lambda = 1.0;
280   tao->data = (void*)bmrm;
281 
282   /* Override default settings (unless already changed) */
283   if (!tao->max_it_changed) tao->max_it = 2000;
284   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
285   if (!tao->gatol_changed) tao->gatol = 1.0e-12;
286   if (!tao->grtol_changed) tao->grtol = 1.0e-12;
287 
288   PetscFunctionReturn(0);
289 }
290 
291 PetscErrorCode init_df_solver(TAO_DF *df)
292 {
293   PetscInt       i, n = INCRE_DIM;
294   PetscErrorCode ierr;
295 
296   PetscFunctionBegin;
297   /* default values */
298   df->maxProjIter = 200;
299   df->maxPGMIter = 300000;
300   df->b = 1.0;
301 
302   /* memory space required by Dai-Fletcher */
303   df->cur_num_cp = n;
304   ierr = PetscMalloc1(n, &df->f);CHKERRQ(ierr);
305   ierr = PetscMalloc1(n, &df->a);CHKERRQ(ierr);
306   ierr = PetscMalloc1(n, &df->l);CHKERRQ(ierr);
307   ierr = PetscMalloc1(n, &df->u);CHKERRQ(ierr);
308   ierr = PetscMalloc1(n, &df->x);CHKERRQ(ierr);
309   ierr = PetscMalloc1(n, &df->Q);CHKERRQ(ierr);
310 
311   for (i = 0; i < n; i ++) {
312     ierr = PetscMalloc1(n, &df->Q[i]);CHKERRQ(ierr);
313   }
314 
315   ierr = PetscMalloc1(n, &df->g);CHKERRQ(ierr);
316   ierr = PetscMalloc1(n, &df->y);CHKERRQ(ierr);
317   ierr = PetscMalloc1(n, &df->tempv);CHKERRQ(ierr);
318   ierr = PetscMalloc1(n, &df->d);CHKERRQ(ierr);
319   ierr = PetscMalloc1(n, &df->Qd);CHKERRQ(ierr);
320   ierr = PetscMalloc1(n, &df->t);CHKERRQ(ierr);
321   ierr = PetscMalloc1(n, &df->xplus);CHKERRQ(ierr);
322   ierr = PetscMalloc1(n, &df->tplus);CHKERRQ(ierr);
323   ierr = PetscMalloc1(n, &df->sk);CHKERRQ(ierr);
324   ierr = PetscMalloc1(n, &df->yk);CHKERRQ(ierr);
325 
326   ierr = PetscMalloc1(n, &df->ipt);CHKERRQ(ierr);
327   ierr = PetscMalloc1(n, &df->ipt2);CHKERRQ(ierr);
328   ierr = PetscMalloc1(n, &df->uv);CHKERRQ(ierr);
329   PetscFunctionReturn(0);
330 }
331 
332 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
333 {
334   PetscErrorCode ierr;
335   PetscReal      *tmp, **tmp_Q;
336   PetscInt       i, n, old_n;
337 
338   PetscFunctionBegin;
339   df->dim = dim;
340   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
341 
342   old_n = df->cur_num_cp;
343   df->cur_num_cp += INCRE_DIM;
344   n = df->cur_num_cp;
345 
346   /* memory space required by dai-fletcher */
347   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
348   ierr = PetscArraycpy(tmp, df->f, old_n);CHKERRQ(ierr);
349   ierr = PetscFree(df->f);CHKERRQ(ierr);
350   df->f = tmp;
351 
352   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
353   ierr = PetscArraycpy(tmp, df->a, old_n);CHKERRQ(ierr);
354   ierr = PetscFree(df->a);CHKERRQ(ierr);
355   df->a = tmp;
356 
357   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
358   ierr = PetscArraycpy(tmp, df->l, old_n);CHKERRQ(ierr);
359   ierr = PetscFree(df->l);CHKERRQ(ierr);
360   df->l = tmp;
361 
362   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
363   ierr = PetscArraycpy(tmp, df->u, old_n);CHKERRQ(ierr);
364   ierr = PetscFree(df->u);CHKERRQ(ierr);
365   df->u = tmp;
366 
367   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
368   ierr = PetscArraycpy(tmp, df->x, old_n);CHKERRQ(ierr);
369   ierr = PetscFree(df->x);CHKERRQ(ierr);
370   df->x = tmp;
371 
372   ierr = PetscMalloc1(n, &tmp_Q);CHKERRQ(ierr);
373   for (i = 0; i < n; i ++) {
374     ierr = PetscMalloc1(n, &tmp_Q[i]);CHKERRQ(ierr);
375     if (i < old_n) {
376       ierr = PetscArraycpy(tmp_Q[i], df->Q[i], old_n);CHKERRQ(ierr);
377       ierr = PetscFree(df->Q[i]);CHKERRQ(ierr);
378     }
379   }
380 
381   ierr = PetscFree(df->Q);CHKERRQ(ierr);
382   df->Q = tmp_Q;
383 
384   ierr = PetscFree(df->g);CHKERRQ(ierr);
385   ierr = PetscMalloc1(n, &df->g);CHKERRQ(ierr);
386 
387   ierr = PetscFree(df->y);CHKERRQ(ierr);
388   ierr = PetscMalloc1(n, &df->y);CHKERRQ(ierr);
389 
390   ierr = PetscFree(df->tempv);CHKERRQ(ierr);
391   ierr = PetscMalloc1(n, &df->tempv);CHKERRQ(ierr);
392 
393   ierr = PetscFree(df->d);CHKERRQ(ierr);
394   ierr = PetscMalloc1(n, &df->d);CHKERRQ(ierr);
395 
396   ierr = PetscFree(df->Qd);CHKERRQ(ierr);
397   ierr = PetscMalloc1(n, &df->Qd);CHKERRQ(ierr);
398 
399   ierr = PetscFree(df->t);CHKERRQ(ierr);
400   ierr = PetscMalloc1(n, &df->t);CHKERRQ(ierr);
401 
402   ierr = PetscFree(df->xplus);CHKERRQ(ierr);
403   ierr = PetscMalloc1(n, &df->xplus);CHKERRQ(ierr);
404 
405   ierr = PetscFree(df->tplus);CHKERRQ(ierr);
406   ierr = PetscMalloc1(n, &df->tplus);CHKERRQ(ierr);
407 
408   ierr = PetscFree(df->sk);CHKERRQ(ierr);
409   ierr = PetscMalloc1(n, &df->sk);CHKERRQ(ierr);
410 
411   ierr = PetscFree(df->yk);CHKERRQ(ierr);
412   ierr = PetscMalloc1(n, &df->yk);CHKERRQ(ierr);
413 
414   ierr = PetscFree(df->ipt);CHKERRQ(ierr);
415   ierr = PetscMalloc1(n, &df->ipt);CHKERRQ(ierr);
416 
417   ierr = PetscFree(df->ipt2);CHKERRQ(ierr);
418   ierr = PetscMalloc1(n, &df->ipt2);CHKERRQ(ierr);
419 
420   ierr = PetscFree(df->uv);CHKERRQ(ierr);
421   ierr = PetscMalloc1(n, &df->uv);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 PetscErrorCode destroy_df_solver(TAO_DF *df)
426 {
427   PetscErrorCode ierr;
428   PetscInt       i;
429 
430   PetscFunctionBegin;
431   ierr = PetscFree(df->f);CHKERRQ(ierr);
432   ierr = PetscFree(df->a);CHKERRQ(ierr);
433   ierr = PetscFree(df->l);CHKERRQ(ierr);
434   ierr = PetscFree(df->u);CHKERRQ(ierr);
435   ierr = PetscFree(df->x);CHKERRQ(ierr);
436 
437   for (i = 0; i < df->cur_num_cp; i ++) {
438     ierr = PetscFree(df->Q[i]);CHKERRQ(ierr);
439   }
440   ierr = PetscFree(df->Q);CHKERRQ(ierr);
441   ierr = PetscFree(df->ipt);CHKERRQ(ierr);
442   ierr = PetscFree(df->ipt2);CHKERRQ(ierr);
443   ierr = PetscFree(df->uv);CHKERRQ(ierr);
444   ierr = PetscFree(df->g);CHKERRQ(ierr);
445   ierr = PetscFree(df->y);CHKERRQ(ierr);
446   ierr = PetscFree(df->tempv);CHKERRQ(ierr);
447   ierr = PetscFree(df->d);CHKERRQ(ierr);
448   ierr = PetscFree(df->Qd);CHKERRQ(ierr);
449   ierr = PetscFree(df->t);CHKERRQ(ierr);
450   ierr = PetscFree(df->xplus);CHKERRQ(ierr);
451   ierr = PetscFree(df->tplus);CHKERRQ(ierr);
452   ierr = PetscFree(df->sk);CHKERRQ(ierr);
453   ierr = PetscFree(df->yk);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
458 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
459 {
460   PetscReal r = 0.0;
461   PetscInt  i;
462 
463   for (i = 0; i < n; i++){
464     x[i] = -c[i] + lambda*a[i];
465     if (x[i] > u[i])     x[i] = u[i];
466     else if(x[i] < l[i]) x[i] = l[i];
467     r += a[i]*x[i];
468   }
469   return r - b;
470 }
471 
472 /** Modified Dai-Fletcher QP projector solves the problem:
473  *
474  *      minimise  0.5*x'*x - c'*x
475  *      subj to   a'*x = b
476  *                l \leq x \leq u
477  *
478  *  \param c The point to be projected onto feasible set
479  */
480 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
481 {
482   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
483   PetscReal      r, rl, ru, s;
484   PetscInt       innerIter;
485   PetscBool      nonNegativeSlack = PETSC_FALSE;
486   PetscErrorCode ierr;
487 
488   *lam_ext = 0;
489   lambda  = 0;
490   dlambda = 0.5;
491   innerIter = 1;
492 
493   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
494    *
495    *  Optimality conditions for \phi:
496    *
497    *  1. lambda   <= 0
498    *  2. r        <= 0
499    *  3. r*lambda == 0
500    */
501 
502   /* Bracketing Phase */
503   r = phi(x, n, lambda, a, b, c, l, u);
504 
505   if(nonNegativeSlack) {
506     /* inequality constraint, i.e., with \xi >= 0 constraint */
507     if (r < TOL_R) return 0;
508   } else  {
509     /* equality constraint ,i.e., without \xi >= 0 constraint */
510     if (PetscAbsReal(r) < TOL_R) return 0;
511   }
512 
513   if (r < 0.0){
514     lambdal = lambda;
515     rl      = r;
516     lambda  = lambda + dlambda;
517     r       = phi(x, n, lambda, a, b, c, l, u);
518     while (r < 0.0 && dlambda < BMRM_INFTY)  {
519       lambdal = lambda;
520       s       = rl/r - 1.0;
521       if (s < 0.1) s = 0.1;
522       dlambda = dlambda + dlambda/s;
523       lambda  = lambda + dlambda;
524       rl      = r;
525       r       = phi(x, n, lambda, a, b, c, l, u);
526     }
527     lambdau = lambda;
528     ru      = r;
529   } else {
530     lambdau = lambda;
531     ru      = r;
532     lambda  = lambda - dlambda;
533     r       = phi(x, n, lambda, a, b, c, l, u);
534     while (r > 0.0 && dlambda > -BMRM_INFTY) {
535       lambdau = lambda;
536       s       = ru/r - 1.0;
537       if (s < 0.1) s = 0.1;
538       dlambda = dlambda + dlambda/s;
539       lambda  = lambda - dlambda;
540       ru      = r;
541       r       = phi(x, n, lambda, a, b, c, l, u);
542     }
543     lambdal = lambda;
544     rl      = r;
545   }
546 
547   if(PetscAbsReal(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
548 
549   if(ru == 0){
550     return innerIter;
551   }
552 
553   /* Secant Phase */
554   s       = 1.0 - rl/ru;
555   dlambda = dlambda/s;
556   lambda  = lambdau - dlambda;
557   r       = phi(x, n, lambda, a, b, c, l, u);
558 
559   while (PetscAbsReal(r) > TOL_R
560          && dlambda > TOL_LAM * (1.0 + PetscAbsReal(lambda))
561          && innerIter < df->maxProjIter){
562     innerIter++;
563     if (r > 0.0){
564       if (s <= 2.0){
565         lambdau = lambda;
566         ru      = r;
567         s       = 1.0 - rl/ru;
568         dlambda = (lambdau - lambdal) / s;
569         lambda  = lambdau - dlambda;
570       } else {
571         s          = ru/r-1.0;
572         if (s < 0.1) s = 0.1;
573         dlambda    = (lambdau - lambda) / s;
574         lambda_new = 0.75*lambdal + 0.25*lambda;
575         if (lambda_new < (lambda - dlambda))
576           lambda_new = lambda - dlambda;
577         lambdau    = lambda;
578         ru         = r;
579         lambda     = lambda_new;
580         s          = (lambdau - lambdal) / (lambdau - lambda);
581       }
582     } else {
583       if (s >= 2.0){
584         lambdal = lambda;
585         rl      = r;
586         s       = 1.0 - rl/ru;
587         dlambda = (lambdau - lambdal) / s;
588         lambda  = lambdau - dlambda;
589       } else {
590         s          = rl/r - 1.0;
591         if (s < 0.1) s = 0.1;
592         dlambda    = (lambda-lambdal) / s;
593         lambda_new = 0.75*lambdau + 0.25*lambda;
594         if (lambda_new > (lambda + dlambda))
595           lambda_new = lambda + dlambda;
596         lambdal    = lambda;
597         rl         = r;
598         lambda     = lambda_new;
599         s          = (lambdau - lambdal) / (lambdau-lambda);
600       }
601     }
602     r = phi(x, n, lambda, a, b, c, l, u);
603   }
604 
605   *lam_ext = lambda;
606   if(innerIter >= df->maxProjIter) {
607     ierr = PetscInfo(NULL,"WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
608   }
609   return innerIter;
610 }
611 
612 
613 PetscErrorCode solve(TAO_DF *df)
614 {
615   PetscErrorCode ierr;
616   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
617   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
618   PetscReal      DELTAsv, ProdDELTAsv;
619   PetscReal      c, *tempQ;
620   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
621   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
622   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
623   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
624   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
625 
626   /*** variables for the adaptive nonmonotone linesearch ***/
627   PetscInt    L, llast;
628   PetscReal   fr, fbest, fv, fc, fv0;
629 
630   c = BMRM_INFTY;
631 
632   DELTAsv = EPS_SV;
633   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
634   else  ProdDELTAsv = EPS_SV;
635 
636   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
637 
638   lam_ext = 0.0;
639 
640   /* Project the initial solution */
641   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
642 
643   /* Compute gradient
644      g = Q*x + f; */
645 
646   it = 0;
647   for (i = 0; i < dim; i++) {
648     if (PetscAbsReal(x[i]) > ProdDELTAsv) ipt[it++] = i;
649   }
650 
651   ierr = PetscArrayzero(t, dim);CHKERRQ(ierr);
652   for (i = 0; i < it; i++){
653     tempQ = Q[ipt[i]];
654     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
655   }
656   for (i = 0; i < dim; i++){
657     g[i] = t[i] + f[i];
658   }
659 
660 
661   /* y = -(x_{k} - g_{k}) */
662   for (i = 0; i < dim; i++){
663     y[i] = g[i] - x[i];
664   }
665 
666   /* Project x_{k} - g_{k} */
667   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
668 
669   /* y = P(x_{k} - g_{k}) - x_{k} */
670   max = ALPHA_MIN;
671   for (i = 0; i < dim; i++){
672     y[i] = tempv[i] - x[i];
673     if (PetscAbsReal(y[i]) > max) max = PetscAbsReal(y[i]);
674   }
675 
676   if (max < tol*1e-3){
677     return 0;
678   }
679 
680   alpha = 1.0 / max;
681 
682   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
683   fv0   = 0.0;
684   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
685 
686   /*** adaptive nonmonotone linesearch ***/
687   L     = 2;
688   fr    = ALPHA_MAX;
689   fbest = fv0;
690   fc    = fv0;
691   llast = 0;
692   akold = bkold = 0.0;
693 
694   /***      Iterator begins     ***/
695   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
696 
697     /* tempv = -(x_{k} - alpha*g_{k}) */
698     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
699 
700     /* Project x_{k} - alpha*g_{k} */
701     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
702 
703 
704     /* gd = \inner{d_{k}}{g_{k}}
705         d = P(x_{k} - alpha*g_{k}) - x_{k}
706     */
707     gd = 0.0;
708     for (i = 0; i < dim; i++){
709       d[i] = y[i] - x[i];
710       gd  += d[i] * g[i];
711     }
712 
713     /* Gradient computation  */
714 
715     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
716 
717     it = it2 = 0;
718     for (i = 0; i < dim; i++){
719       if (PetscAbsReal(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
720     }
721     for (i = 0; i < dim; i++) {
722       if (PetscAbsReal(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
723     }
724 
725     ierr = PetscArrayzero(Qd, dim);CHKERRQ(ierr);
726     /* compute Qd = Q*d */
727     if (it < it2){
728       for (i = 0; i < it; i++){
729         tempQ = Q[ipt[i]];
730         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
731       }
732     } else { /* compute Qd = Q*y-t */
733       for (i = 0; i < it2; i++){
734         tempQ = Q[ipt2[i]];
735         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
736       }
737       for (j = 0; j < dim; j++) Qd[j] -= t[j];
738     }
739 
740     /* ak = inner{d_{k}}{d_{k}} */
741     ak = 0.0;
742     for (i = 0; i < dim; i++) ak += d[i] * d[i];
743 
744     bk = 0.0;
745     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
746 
747     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
748     else lamnew = 1.0;
749 
750     /* fv is computing f(x_{k} + d_{k}) */
751     fv = 0.0;
752     for (i = 0; i < dim; i++){
753       xplus[i] = x[i] + d[i];
754       tplus[i] = t[i] + Qd[i];
755       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
756     }
757 
758     /* fr is fref */
759     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
760       lscount++;
761       fv = 0.0;
762       for (i = 0; i < dim; i++){
763         xplus[i] = x[i] + lamnew*d[i];
764         tplus[i] = t[i] + lamnew*Qd[i];
765         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
766       }
767     }
768 
769     for (i = 0; i < dim; i++){
770       sk[i] = xplus[i] - x[i];
771       yk[i] = tplus[i] - t[i];
772       x[i]  = xplus[i];
773       t[i]  = tplus[i];
774       g[i]  = t[i] + f[i];
775     }
776 
777     /* update the line search control parameters */
778     if (fv < fbest){
779       fbest = fv;
780       fc    = fv;
781       llast = 0;
782     } else {
783       fc = (fc > fv ? fc : fv);
784       llast++;
785       if (llast == L){
786         fr    = fc;
787         fc    = fv;
788         llast = 0;
789       }
790     }
791 
792     ak = bk = 0.0;
793     for (i = 0; i < dim; i++){
794       ak += sk[i] * sk[i];
795       bk += sk[i] * yk[i];
796     }
797 
798     if (bk <= EPS*ak) alpha = ALPHA_MAX;
799     else {
800       if (bkold < EPS*akold) alpha = ak/bk;
801       else alpha = (akold+ak)/(bkold+bk);
802 
803       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
804       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
805     }
806 
807     akold = ak;
808     bkold = bk;
809 
810     /*** stopping criterion based on KKT conditions ***/
811     /* at optimal, gradient of lagrangian w.r.t. x is zero */
812 
813     bk = 0.0;
814     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
815 
816     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
817       it     = 0;
818       luv    = 0;
819       kktlam = 0.0;
820       for (i = 0; i < dim; i++){
821         /* x[i] is active hence lagrange multipliers for box constraints
822                 are zero. The lagrange multiplier for ineq. const. is then
823                 defined as below
824         */
825         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
826           ipt[it++] = i;
827           kktlam    = kktlam - a[i]*g[i];
828         } else  uv[luv++] = i;
829       }
830 
831       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
832       else {
833         kktlam = kktlam/it;
834         info   = 1;
835         for (i = 0; i < it; i++) {
836           if (PetscAbsReal(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
837             info = 0;
838             break;
839           }
840         }
841         if (info == 1)  {
842           for (i = 0; i < luv; i++)  {
843             if (x[uv[i]] <= DELTAsv){
844               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
845                      not be zero. So, the gradient without beta is > 0
846               */
847               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
848                 info = 0;
849                 break;
850               }
851             } else {
852               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
853                      not be zero. So, the gradient without eta is < 0
854               */
855               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
856                 info = 0;
857                 break;
858               }
859             }
860           }
861         }
862 
863         if (info == 1) return 0;
864       }
865     }
866   }
867   return 0;
868 }
869 
870 
871