xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision e75c7bc61fc538cbc84223efcf615e7634e8ca4e)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 /*------------------------------------------------------------*/
11 /* The main solver function
12 
13    f = Remp(W)          This is what the user provides us from the application layer
14    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
15 
16    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
17 */
18 
19 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
20 {
21   PetscErrorCode ierr;
22 
23   PetscFunctionBegin;
24   ierr = PetscNew(p);CHKERRQ(ierr);
25   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
26   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
27   (*p)->next = NULL;
28   PetscFunctionReturn(0);
29 }
30 
31 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
32 {
33   PetscErrorCode ierr;
34   Vec_Chain      *p = head->next, *q;
35 
36   PetscFunctionBegin;
37   while (p) {
38     q = p->next;
39     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
40     ierr = PetscFree(p);CHKERRQ(ierr);
41     p = q;
42   }
43   head->next = NULL;
44   PetscFunctionReturn(0);
45 }
46 
47 static PetscErrorCode TaoSolve_BMRM(Tao tao)
48 {
49   PetscErrorCode     ierr;
50   TAO_DF             df;
51   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
52 
53   /* Values and pointers to parts of the optimization problem */
54   PetscReal          f = 0.0;
55   Vec                W = tao->solution;
56   Vec                G = tao->gradient;
57   PetscReal          lambda;
58   PetscReal          bt;
59   Vec_Chain          grad_list, *tail_glist, *pgrad;
60   PetscInt           i;
61   PetscMPIInt        rank;
62 
63   /* Used in converged criteria check */
64   PetscReal          reg;
65   PetscReal          jtwt = 0.0, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
66   PetscReal          innerSolverTol;
67   MPI_Comm           comm;
68 
69   PetscFunctionBegin;
70   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
71   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
72   lambda = bmrm->lambda;
73 
74   /* Check Stopping Condition */
75   tao->step = 1.0;
76   max_jtwt = -BMRM_INFTY;
77   min_jw = BMRM_INFTY;
78   innerSolverTol = 1.0;
79   epsilon = 0.0;
80 
81   if (rank == 0) {
82     ierr = init_df_solver(&df);CHKERRQ(ierr);
83     grad_list.next = NULL;
84     tail_glist = &grad_list;
85   }
86 
87   df.tol = 1e-6;
88   tao->reason = TAO_CONTINUE_ITERATING;
89 
90   /*-----------------Algorithm Begins------------------------*/
91   /* make the scatter */
92   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
93   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
94   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
95 
96   /* NOTE: In application pass the sub-gradient of Remp(W) */
97   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
98   ierr = TaoLogConvergenceHistory(tao,f,1.0,0.0,tao->ksp_its);CHKERRQ(ierr);
99   ierr = TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step);CHKERRQ(ierr);
100   ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
101 
102   while (tao->reason == TAO_CONTINUE_ITERATING) {
103     /* Call general purpose update function */
104     if (tao->ops->update) {
105       ierr = (*tao->ops->update)(tao, tao->niter, tao->user_update);CHKERRQ(ierr);
106     }
107 
108     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
109     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
110     bt = f - bt;
111 
112     /* First gather the gradient to the rank-0 node */
113     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
114     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
115 
116     /* Bring up the inner solver */
117     if (rank == 0) {
118       ierr = ensure_df_space(tao->niter+1, &df);CHKERRQ(ierr);
119       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
120       tail_glist->next = pgrad;
121       tail_glist = pgrad;
122 
123       df.a[tao->niter] = 1.0;
124       df.f[tao->niter] = -bt;
125       df.u[tao->niter] = 1.0;
126       df.l[tao->niter] = 0.0;
127 
128       /* set up the Q */
129       pgrad = grad_list.next;
130       for (i=0; i<=tao->niter; i++) {
131         if (!pgrad) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Assert that there are at least tao->niter+1 pgrad available");
132         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
133         df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
134         pgrad = pgrad->next;
135       }
136 
137       if (tao->niter > 0) {
138         df.x[tao->niter] = 0.0;
139         ierr = solve(&df);CHKERRQ(ierr);
140       } else
141         df.x[0] = 1.0;
142 
143       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
144       jtwt = 0.0;
145       ierr = VecSet(bmrm->local_w, 0.0);CHKERRQ(ierr);
146       pgrad = grad_list.next;
147       for (i=0; i<=tao->niter; i++) {
148         jtwt -= df.x[i] * df.f[i];
149         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V);CHKERRQ(ierr);
150         pgrad = pgrad->next;
151       }
152 
153       ierr = VecNorm(bmrm->local_w, NORM_2, &reg);CHKERRQ(ierr);
154       reg = 0.5*lambda*reg*reg;
155       jtwt -= reg;
156     } /* end if rank == 0 */
157 
158     /* scatter the new W to all nodes */
159     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
160     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
161 
162     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
163 
164     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRMPI(ierr);
165     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRMPI(ierr);
166 
167     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
168     if (jw < min_jw) min_jw = jw;
169     if (jtwt > max_jtwt) max_jtwt = jtwt;
170 
171     pre_epsilon = epsilon;
172     epsilon = min_jw - jtwt;
173 
174     if (rank == 0) {
175       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
176       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
177 
178       /* if the annealing doesn't work well, lower the inner solver tolerance */
179       if (pre_epsilon < epsilon) innerSolverTol *= 0.2;
180 
181       df.tol = innerSolverTol*0.5;
182     }
183 
184     tao->niter++;
185     ierr = TaoLogConvergenceHistory(tao,min_jw,epsilon,0.0,tao->ksp_its);CHKERRQ(ierr);
186     ierr = TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step);CHKERRQ(ierr);
187     ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
188   }
189 
190   /* free all the memory */
191   if (rank == 0) {
192     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
193     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
194   }
195 
196   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
197   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 /* ---------------------------------------------------------- */
202 
203 static PetscErrorCode TaoSetup_BMRM(Tao tao)
204 {
205 
206   PetscErrorCode ierr;
207 
208   PetscFunctionBegin;
209   /* Allocate some arrays */
210   if (!tao->gradient) {
211     ierr = VecDuplicate(tao->solution, &tao->gradient);CHKERRQ(ierr);
212   }
213   PetscFunctionReturn(0);
214 }
215 
216 /*------------------------------------------------------------*/
217 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
218 {
219   PetscErrorCode ierr;
220 
221   PetscFunctionBegin;
222   ierr = PetscFree(tao->data);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 static PetscErrorCode TaoSetFromOptions_BMRM(PetscOptionItems *PetscOptionsObject,Tao tao)
227 {
228   PetscErrorCode ierr;
229   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
230 
231   PetscFunctionBegin;
232   ierr = PetscOptionsHead(PetscOptionsObject,"BMRM for regularized risk minimization");CHKERRQ(ierr);
233   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL);CHKERRQ(ierr);
234   ierr = PetscOptionsTail();CHKERRQ(ierr);
235   PetscFunctionReturn(0);
236 }
237 
238 /*------------------------------------------------------------*/
239 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
240 {
241   PetscBool      isascii;
242   PetscErrorCode ierr;
243 
244   PetscFunctionBegin;
245   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
246   if (isascii) {
247     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
248     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 /*------------------------------------------------------------*/
254 /*MC
255   TAOBMRM - bundle method for regularized risk minimization
256 
257   Options Database Keys:
258 . - tao_bmrm_lambda - regulariser weight
259 
260   Level: beginner
261 M*/
262 
263 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
264 {
265   TAO_BMRM       *bmrm;
266   PetscErrorCode ierr;
267 
268   PetscFunctionBegin;
269   tao->ops->setup = TaoSetup_BMRM;
270   tao->ops->solve = TaoSolve_BMRM;
271   tao->ops->view  = TaoView_BMRM;
272   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
273   tao->ops->destroy = TaoDestroy_BMRM;
274 
275   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
276   bmrm->lambda = 1.0;
277   tao->data = (void*)bmrm;
278 
279   /* Override default settings (unless already changed) */
280   if (!tao->max_it_changed) tao->max_it = 2000;
281   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
282   if (!tao->gatol_changed) tao->gatol = 1.0e-12;
283   if (!tao->grtol_changed) tao->grtol = 1.0e-12;
284 
285   PetscFunctionReturn(0);
286 }
287 
288 PetscErrorCode init_df_solver(TAO_DF *df)
289 {
290   PetscInt       i, n = INCRE_DIM;
291   PetscErrorCode ierr;
292 
293   PetscFunctionBegin;
294   /* default values */
295   df->maxProjIter = 200;
296   df->maxPGMIter = 300000;
297   df->b = 1.0;
298 
299   /* memory space required by Dai-Fletcher */
300   df->cur_num_cp = n;
301   ierr = PetscMalloc1(n, &df->f);CHKERRQ(ierr);
302   ierr = PetscMalloc1(n, &df->a);CHKERRQ(ierr);
303   ierr = PetscMalloc1(n, &df->l);CHKERRQ(ierr);
304   ierr = PetscMalloc1(n, &df->u);CHKERRQ(ierr);
305   ierr = PetscMalloc1(n, &df->x);CHKERRQ(ierr);
306   ierr = PetscMalloc1(n, &df->Q);CHKERRQ(ierr);
307 
308   for (i = 0; i < n; i ++) {
309     ierr = PetscMalloc1(n, &df->Q[i]);CHKERRQ(ierr);
310   }
311 
312   ierr = PetscMalloc1(n, &df->g);CHKERRQ(ierr);
313   ierr = PetscMalloc1(n, &df->y);CHKERRQ(ierr);
314   ierr = PetscMalloc1(n, &df->tempv);CHKERRQ(ierr);
315   ierr = PetscMalloc1(n, &df->d);CHKERRQ(ierr);
316   ierr = PetscMalloc1(n, &df->Qd);CHKERRQ(ierr);
317   ierr = PetscMalloc1(n, &df->t);CHKERRQ(ierr);
318   ierr = PetscMalloc1(n, &df->xplus);CHKERRQ(ierr);
319   ierr = PetscMalloc1(n, &df->tplus);CHKERRQ(ierr);
320   ierr = PetscMalloc1(n, &df->sk);CHKERRQ(ierr);
321   ierr = PetscMalloc1(n, &df->yk);CHKERRQ(ierr);
322 
323   ierr = PetscMalloc1(n, &df->ipt);CHKERRQ(ierr);
324   ierr = PetscMalloc1(n, &df->ipt2);CHKERRQ(ierr);
325   ierr = PetscMalloc1(n, &df->uv);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
330 {
331   PetscErrorCode ierr;
332   PetscReal      *tmp, **tmp_Q;
333   PetscInt       i, n, old_n;
334 
335   PetscFunctionBegin;
336   df->dim = dim;
337   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
338 
339   old_n = df->cur_num_cp;
340   df->cur_num_cp += INCRE_DIM;
341   n = df->cur_num_cp;
342 
343   /* memory space required by dai-fletcher */
344   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
345   ierr = PetscArraycpy(tmp, df->f, old_n);CHKERRQ(ierr);
346   ierr = PetscFree(df->f);CHKERRQ(ierr);
347   df->f = tmp;
348 
349   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
350   ierr = PetscArraycpy(tmp, df->a, old_n);CHKERRQ(ierr);
351   ierr = PetscFree(df->a);CHKERRQ(ierr);
352   df->a = tmp;
353 
354   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
355   ierr = PetscArraycpy(tmp, df->l, old_n);CHKERRQ(ierr);
356   ierr = PetscFree(df->l);CHKERRQ(ierr);
357   df->l = tmp;
358 
359   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
360   ierr = PetscArraycpy(tmp, df->u, old_n);CHKERRQ(ierr);
361   ierr = PetscFree(df->u);CHKERRQ(ierr);
362   df->u = tmp;
363 
364   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
365   ierr = PetscArraycpy(tmp, df->x, old_n);CHKERRQ(ierr);
366   ierr = PetscFree(df->x);CHKERRQ(ierr);
367   df->x = tmp;
368 
369   ierr = PetscMalloc1(n, &tmp_Q);CHKERRQ(ierr);
370   for (i = 0; i < n; i ++) {
371     ierr = PetscMalloc1(n, &tmp_Q[i]);CHKERRQ(ierr);
372     if (i < old_n) {
373       ierr = PetscArraycpy(tmp_Q[i], df->Q[i], old_n);CHKERRQ(ierr);
374       ierr = PetscFree(df->Q[i]);CHKERRQ(ierr);
375     }
376   }
377 
378   ierr = PetscFree(df->Q);CHKERRQ(ierr);
379   df->Q = tmp_Q;
380 
381   ierr = PetscFree(df->g);CHKERRQ(ierr);
382   ierr = PetscMalloc1(n, &df->g);CHKERRQ(ierr);
383 
384   ierr = PetscFree(df->y);CHKERRQ(ierr);
385   ierr = PetscMalloc1(n, &df->y);CHKERRQ(ierr);
386 
387   ierr = PetscFree(df->tempv);CHKERRQ(ierr);
388   ierr = PetscMalloc1(n, &df->tempv);CHKERRQ(ierr);
389 
390   ierr = PetscFree(df->d);CHKERRQ(ierr);
391   ierr = PetscMalloc1(n, &df->d);CHKERRQ(ierr);
392 
393   ierr = PetscFree(df->Qd);CHKERRQ(ierr);
394   ierr = PetscMalloc1(n, &df->Qd);CHKERRQ(ierr);
395 
396   ierr = PetscFree(df->t);CHKERRQ(ierr);
397   ierr = PetscMalloc1(n, &df->t);CHKERRQ(ierr);
398 
399   ierr = PetscFree(df->xplus);CHKERRQ(ierr);
400   ierr = PetscMalloc1(n, &df->xplus);CHKERRQ(ierr);
401 
402   ierr = PetscFree(df->tplus);CHKERRQ(ierr);
403   ierr = PetscMalloc1(n, &df->tplus);CHKERRQ(ierr);
404 
405   ierr = PetscFree(df->sk);CHKERRQ(ierr);
406   ierr = PetscMalloc1(n, &df->sk);CHKERRQ(ierr);
407 
408   ierr = PetscFree(df->yk);CHKERRQ(ierr);
409   ierr = PetscMalloc1(n, &df->yk);CHKERRQ(ierr);
410 
411   ierr = PetscFree(df->ipt);CHKERRQ(ierr);
412   ierr = PetscMalloc1(n, &df->ipt);CHKERRQ(ierr);
413 
414   ierr = PetscFree(df->ipt2);CHKERRQ(ierr);
415   ierr = PetscMalloc1(n, &df->ipt2);CHKERRQ(ierr);
416 
417   ierr = PetscFree(df->uv);CHKERRQ(ierr);
418   ierr = PetscMalloc1(n, &df->uv);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 PetscErrorCode destroy_df_solver(TAO_DF *df)
423 {
424   PetscErrorCode ierr;
425   PetscInt       i;
426 
427   PetscFunctionBegin;
428   ierr = PetscFree(df->f);CHKERRQ(ierr);
429   ierr = PetscFree(df->a);CHKERRQ(ierr);
430   ierr = PetscFree(df->l);CHKERRQ(ierr);
431   ierr = PetscFree(df->u);CHKERRQ(ierr);
432   ierr = PetscFree(df->x);CHKERRQ(ierr);
433 
434   for (i = 0; i < df->cur_num_cp; i ++) {
435     ierr = PetscFree(df->Q[i]);CHKERRQ(ierr);
436   }
437   ierr = PetscFree(df->Q);CHKERRQ(ierr);
438   ierr = PetscFree(df->ipt);CHKERRQ(ierr);
439   ierr = PetscFree(df->ipt2);CHKERRQ(ierr);
440   ierr = PetscFree(df->uv);CHKERRQ(ierr);
441   ierr = PetscFree(df->g);CHKERRQ(ierr);
442   ierr = PetscFree(df->y);CHKERRQ(ierr);
443   ierr = PetscFree(df->tempv);CHKERRQ(ierr);
444   ierr = PetscFree(df->d);CHKERRQ(ierr);
445   ierr = PetscFree(df->Qd);CHKERRQ(ierr);
446   ierr = PetscFree(df->t);CHKERRQ(ierr);
447   ierr = PetscFree(df->xplus);CHKERRQ(ierr);
448   ierr = PetscFree(df->tplus);CHKERRQ(ierr);
449   ierr = PetscFree(df->sk);CHKERRQ(ierr);
450   ierr = PetscFree(df->yk);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
455 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
456 {
457   PetscReal r = 0.0;
458   PetscInt  i;
459 
460   for (i = 0; i < n; i++) {
461     x[i] = -c[i] + lambda*a[i];
462     if (x[i] > u[i])     x[i] = u[i];
463     else if (x[i] < l[i]) x[i] = l[i];
464     r += a[i]*x[i];
465   }
466   return r - b;
467 }
468 
469 /** Modified Dai-Fletcher QP projector solves the problem:
470  *
471  *      minimise  0.5*x'*x - c'*x
472  *      subj to   a'*x = b
473  *                l \leq x \leq u
474  *
475  *  \param c The point to be projected onto feasible set
476  */
477 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
478 {
479   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
480   PetscReal      r, rl, ru, s;
481   PetscInt       innerIter;
482   PetscBool      nonNegativeSlack = PETSC_FALSE;
483   PetscErrorCode ierr;
484 
485   *lam_ext = 0;
486   lambda  = 0;
487   dlambda = 0.5;
488   innerIter = 1;
489 
490   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
491    *
492    *  Optimality conditions for \phi:
493    *
494    *  1. lambda   <= 0
495    *  2. r        <= 0
496    *  3. r*lambda == 0
497    */
498 
499   /* Bracketing Phase */
500   r = phi(x, n, lambda, a, b, c, l, u);
501 
502   if (nonNegativeSlack) {
503     /* inequality constraint, i.e., with \xi >= 0 constraint */
504     if (r < TOL_R) return 0;
505   } else  {
506     /* equality constraint ,i.e., without \xi >= 0 constraint */
507     if (PetscAbsReal(r) < TOL_R) return 0;
508   }
509 
510   if (r < 0.0) {
511     lambdal = lambda;
512     rl      = r;
513     lambda  = lambda + dlambda;
514     r       = phi(x, n, lambda, a, b, c, l, u);
515     while (r < 0.0 && dlambda < BMRM_INFTY)  {
516       lambdal = lambda;
517       s       = rl/r - 1.0;
518       if (s < 0.1) s = 0.1;
519       dlambda = dlambda + dlambda/s;
520       lambda  = lambda + dlambda;
521       rl      = r;
522       r       = phi(x, n, lambda, a, b, c, l, u);
523     }
524     lambdau = lambda;
525     ru      = r;
526   } else {
527     lambdau = lambda;
528     ru      = r;
529     lambda  = lambda - dlambda;
530     r       = phi(x, n, lambda, a, b, c, l, u);
531     while (r > 0.0 && dlambda > -BMRM_INFTY) {
532       lambdau = lambda;
533       s       = ru/r - 1.0;
534       if (s < 0.1) s = 0.1;
535       dlambda = dlambda + dlambda/s;
536       lambda  = lambda - dlambda;
537       ru      = r;
538       r       = phi(x, n, lambda, a, b, c, l, u);
539     }
540     lambdal = lambda;
541     rl      = r;
542   }
543 
544   if (PetscAbsReal(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
545 
546   if (ru == 0) {
547     return innerIter;
548   }
549 
550   /* Secant Phase */
551   s       = 1.0 - rl/ru;
552   dlambda = dlambda/s;
553   lambda  = lambdau - dlambda;
554   r       = phi(x, n, lambda, a, b, c, l, u);
555 
556   while (PetscAbsReal(r) > TOL_R
557          && dlambda > TOL_LAM * (1.0 + PetscAbsReal(lambda))
558          && innerIter < df->maxProjIter) {
559     innerIter++;
560     if (r > 0.0) {
561       if (s <= 2.0) {
562         lambdau = lambda;
563         ru      = r;
564         s       = 1.0 - rl/ru;
565         dlambda = (lambdau - lambdal) / s;
566         lambda  = lambdau - dlambda;
567       } else {
568         s          = ru/r-1.0;
569         if (s < 0.1) s = 0.1;
570         dlambda    = (lambdau - lambda) / s;
571         lambda_new = 0.75*lambdal + 0.25*lambda;
572         if (lambda_new < (lambda - dlambda))
573           lambda_new = lambda - dlambda;
574         lambdau    = lambda;
575         ru         = r;
576         lambda     = lambda_new;
577         s          = (lambdau - lambdal) / (lambdau - lambda);
578       }
579     } else {
580       if (s >= 2.0) {
581         lambdal = lambda;
582         rl      = r;
583         s       = 1.0 - rl/ru;
584         dlambda = (lambdau - lambdal) / s;
585         lambda  = lambdau - dlambda;
586       } else {
587         s          = rl/r - 1.0;
588         if (s < 0.1) s = 0.1;
589         dlambda    = (lambda-lambdal) / s;
590         lambda_new = 0.75*lambdau + 0.25*lambda;
591         if (lambda_new > (lambda + dlambda))
592           lambda_new = lambda + dlambda;
593         lambdal    = lambda;
594         rl         = r;
595         lambda     = lambda_new;
596         s          = (lambdau - lambdal) / (lambdau-lambda);
597       }
598     }
599     r = phi(x, n, lambda, a, b, c, l, u);
600   }
601 
602   *lam_ext = lambda;
603   if (innerIter >= df->maxProjIter) {
604     ierr = PetscInfo(NULL,"WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
605   }
606   return innerIter;
607 }
608 
609 PetscErrorCode solve(TAO_DF *df)
610 {
611   PetscErrorCode ierr;
612   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0;
613   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
614   PetscReal      DELTAsv, ProdDELTAsv;
615   PetscReal      c, *tempQ;
616   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
617   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
618   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
619   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
620   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
621 
622   /* variables for the adaptive nonmonotone linesearch */
623   PetscInt    L, llast;
624   PetscReal   fr, fbest, fv, fc, fv0;
625 
626   c = BMRM_INFTY;
627 
628   DELTAsv = EPS_SV;
629   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
630   else  ProdDELTAsv = EPS_SV;
631 
632   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
633 
634   lam_ext = 0.0;
635 
636   /* Project the initial solution */
637   project(dim, a, b, tempv, l, u, x, &lam_ext, df);
638 
639   /* Compute gradient
640      g = Q*x + f; */
641 
642   it = 0;
643   for (i = 0; i < dim; i++) {
644     if (PetscAbsReal(x[i]) > ProdDELTAsv) ipt[it++] = i;
645   }
646 
647   ierr = PetscArrayzero(t, dim);CHKERRQ(ierr);
648   for (i = 0; i < it; i++) {
649     tempQ = Q[ipt[i]];
650     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
651   }
652   for (i = 0; i < dim; i++) {
653     g[i] = t[i] + f[i];
654   }
655 
656   /* y = -(x_{k} - g_{k}) */
657   for (i = 0; i < dim; i++) {
658     y[i] = g[i] - x[i];
659   }
660 
661   /* Project x_{k} - g_{k} */
662   project(dim, a, b, y, l, u, tempv, &lam_ext, df);
663 
664   /* y = P(x_{k} - g_{k}) - x_{k} */
665   max = ALPHA_MIN;
666   for (i = 0; i < dim; i++) {
667     y[i] = tempv[i] - x[i];
668     if (PetscAbsReal(y[i]) > max) max = PetscAbsReal(y[i]);
669   }
670 
671   if (max < tol*1e-3) {
672     return 0;
673   }
674 
675   alpha = 1.0 / max;
676 
677   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
678   fv0   = 0.0;
679   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
680 
681   /* adaptive nonmonotone linesearch */
682   L     = 2;
683   fr    = ALPHA_MAX;
684   fbest = fv0;
685   fc    = fv0;
686   llast = 0;
687   akold = bkold = 0.0;
688 
689   /*     Iterator begins     */
690   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
691 
692     /* tempv = -(x_{k} - alpha*g_{k}) */
693     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
694 
695     /* Project x_{k} - alpha*g_{k} */
696     project(dim, a, b, tempv, l, u, y, &lam_ext, df);
697 
698     /* gd = \inner{d_{k}}{g_{k}}
699         d = P(x_{k} - alpha*g_{k}) - x_{k}
700     */
701     gd = 0.0;
702     for (i = 0; i < dim; i++) {
703       d[i] = y[i] - x[i];
704       gd  += d[i] * g[i];
705     }
706 
707     /* Gradient computation  */
708 
709     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
710 
711     it = it2 = 0;
712     for (i = 0; i < dim; i++) {
713       if (PetscAbsReal(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
714     }
715     for (i = 0; i < dim; i++) {
716       if (PetscAbsReal(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
717     }
718 
719     ierr = PetscArrayzero(Qd, dim);CHKERRQ(ierr);
720     /* compute Qd = Q*d */
721     if (it < it2) {
722       for (i = 0; i < it; i++) {
723         tempQ = Q[ipt[i]];
724         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
725       }
726     } else { /* compute Qd = Q*y-t */
727       for (i = 0; i < it2; i++) {
728         tempQ = Q[ipt2[i]];
729         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
730       }
731       for (j = 0; j < dim; j++) Qd[j] -= t[j];
732     }
733 
734     /* ak = inner{d_{k}}{d_{k}} */
735     ak = 0.0;
736     for (i = 0; i < dim; i++) ak += d[i] * d[i];
737 
738     bk = 0.0;
739     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
740 
741     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
742     else lamnew = 1.0;
743 
744     /* fv is computing f(x_{k} + d_{k}) */
745     fv = 0.0;
746     for (i = 0; i < dim; i++) {
747       xplus[i] = x[i] + d[i];
748       tplus[i] = t[i] + Qd[i];
749       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
750     }
751 
752     /* fr is fref */
753     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)) {
754       lscount++;
755       fv = 0.0;
756       for (i = 0; i < dim; i++) {
757         xplus[i] = x[i] + lamnew*d[i];
758         tplus[i] = t[i] + lamnew*Qd[i];
759         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
760       }
761     }
762 
763     for (i = 0; i < dim; i++) {
764       sk[i] = xplus[i] - x[i];
765       yk[i] = tplus[i] - t[i];
766       x[i]  = xplus[i];
767       t[i]  = tplus[i];
768       g[i]  = t[i] + f[i];
769     }
770 
771     /* update the line search control parameters */
772     if (fv < fbest) {
773       fbest = fv;
774       fc    = fv;
775       llast = 0;
776     } else {
777       fc = (fc > fv ? fc : fv);
778       llast++;
779       if (llast == L) {
780         fr    = fc;
781         fc    = fv;
782         llast = 0;
783       }
784     }
785 
786     ak = bk = 0.0;
787     for (i = 0; i < dim; i++) {
788       ak += sk[i] * sk[i];
789       bk += sk[i] * yk[i];
790     }
791 
792     if (bk <= EPS*ak) alpha = ALPHA_MAX;
793     else {
794       if (bkold < EPS*akold) alpha = ak/bk;
795       else alpha = (akold+ak)/(bkold+bk);
796 
797       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
798       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
799     }
800 
801     akold = ak;
802     bkold = bk;
803 
804     /* stopping criterion based on KKT conditions */
805     /* at optimal, gradient of lagrangian w.r.t. x is zero */
806 
807     bk = 0.0;
808     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
809 
810     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)) {
811       it     = 0;
812       luv    = 0;
813       kktlam = 0.0;
814       for (i = 0; i < dim; i++) {
815         /* x[i] is active hence lagrange multipliers for box constraints
816                 are zero. The lagrange multiplier for ineq. const. is then
817                 defined as below
818         */
819         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)) {
820           ipt[it++] = i;
821           kktlam    = kktlam - a[i]*g[i];
822         } else  uv[luv++] = i;
823       }
824 
825       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
826       else {
827         kktlam = kktlam/it;
828         info   = 1;
829         for (i = 0; i < it; i++) {
830           if (PetscAbsReal(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
831             info = 0;
832             break;
833           }
834         }
835         if (info == 1)  {
836           for (i = 0; i < luv; i++)  {
837             if (x[uv[i]] <= DELTAsv) {
838               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
839                      not be zero. So, the gradient without beta is > 0
840               */
841               if (g[uv[i]] + kktlam*a[uv[i]] < -tol) {
842                 info = 0;
843                 break;
844               }
845             } else {
846               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
847                      not be zero. So, the gradient without eta is < 0
848               */
849               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
850                 info = 0;
851                 break;
852               }
853             }
854           }
855         }
856 
857         if (info == 1) return 0;
858       }
859     }
860   }
861   return 0;
862 }
863