xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision 4f02bc6a7df4e4b03c783003a8e0899ee35fcb05)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "make_grad_node"
22 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscNew(p);CHKERRQ(ierr);
28   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
29   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
30   (*p)->next = NULL;
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "destroy_grad_list"
36 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
37 {
38   PetscErrorCode ierr;
39   Vec_Chain      *p = head->next, *q;
40 
41   PetscFunctionBegin;
42   while(p) {
43     q = p->next;
44     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
45     ierr = PetscFree(p);CHKERRQ(ierr);
46     p = q;
47   }
48   head->next = NULL;
49   PetscFunctionReturn(0);
50 }
51 
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "TaoSolve_BMRM"
55 static PetscErrorCode TaoSolve_BMRM(Tao tao)
56 {
57   PetscErrorCode     ierr;
58   TaoConvergedReason reason;
59   TAO_DF             df;
60   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
61 
62   /* Values and pointers to parts of the optimization problem */
63   PetscReal          f = 0.0;
64   Vec                W = tao->solution;
65   Vec                G = tao->gradient;
66   PetscReal          lambda;
67   PetscReal          bt;
68   Vec_Chain          grad_list, *tail_glist, *pgrad;
69   PetscInt           i;
70   PetscMPIInt        rank;
71 
72   /* Used in converged criteria check */
73   PetscReal          reg;
74   PetscReal          jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
75   PetscReal          innerSolverTol;
76   MPI_Comm           comm;
77 
78   PetscFunctionBegin;
79   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
80   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
81   lambda = bmrm->lambda;
82 
83   /* Check Stopping Condition */
84   tao->step = 1.0;
85   max_jtwt = -BMRM_INFTY;
86   min_jw = BMRM_INFTY;
87   innerSolverTol = 1.0;
88   epsilon = 0.0;
89 
90   if (!rank) {
91     ierr = init_df_solver(&df);CHKERRQ(ierr);
92     grad_list.next = NULL;
93     tail_glist = &grad_list;
94   }
95 
96   df.tol = 1e-6;
97   reason = TAO_CONTINUE_ITERATING;
98 
99   /*-----------------Algorithm Begins------------------------*/
100   /* make the scatter */
101   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
102   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
103   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
104 
105   /* NOTE: In application pass the sub-gradient of Remp(W) */
106   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
107   ierr = TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
108   while (reason == TAO_CONTINUE_ITERATING) {
109     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
110     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
111     bt = f - bt;
112 
113     /* First gather the gradient to the master node */
114     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
115     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
116 
117     /* Bring up the inner solver */
118     if (!rank) {
119       ierr = ensure_df_space(tao->niter+1, &df); CHKERRQ(ierr);
120       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
121       tail_glist->next = pgrad;
122       tail_glist = pgrad;
123 
124       df.a[tao->niter] = 1.0;
125       df.f[tao->niter] = -bt;
126       df.u[tao->niter] = 1.0;
127       df.l[tao->niter] = 0.0;
128 
129       /* set up the Q */
130       pgrad = grad_list.next;
131       for (i=0; i<=tao->niter; i++) {
132         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
133         df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
134         pgrad = pgrad->next;
135       }
136 
137       if (tao->niter > 0) {
138         df.x[tao->niter] = 0.0;
139         ierr = solve(&df); CHKERRQ(ierr);
140       } else
141         df.x[0] = 1.0;
142 
143       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
144       jtwt = 0.0;
145       ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr);
146       pgrad = grad_list.next;
147       for (i=0; i<=tao->niter; i++) {
148         jtwt -= df.x[i] * df.f[i];
149         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr);
150         pgrad = pgrad->next;
151       }
152 
153       ierr = VecNorm(bmrm->local_w, NORM_2, &reg); CHKERRQ(ierr);
154       reg = 0.5*lambda*reg*reg;
155       jtwt -= reg;
156     } /* end if rank == 0 */
157 
158     /* scatter the new W to all nodes */
159     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
160     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
161 
162     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
163 
164     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
165     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
166 
167     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
168     if (jw < min_jw) min_jw = jw;
169     if (jtwt > max_jtwt) max_jtwt = jtwt;
170 
171     pre_epsilon = epsilon;
172     epsilon = min_jw - jtwt;
173 
174     if (!rank) {
175       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
176       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
177 
178       /* if the annealing doesn't work well, lower the inner solver tolerance */
179       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
180 
181       df.tol = innerSolverTol*0.5;
182     }
183 
184     tao->niter++;
185     ierr = TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
186   }
187 
188   /* free all the memory */
189   if (!rank) {
190     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
191     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
192   }
193 
194   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
195   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
196   PetscFunctionReturn(0);
197 }
198 
199 
200 /* ---------------------------------------------------------- */
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "TaoSetup_BMRM"
204 static PetscErrorCode TaoSetup_BMRM(Tao tao)
205 {
206 
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   /* Allocate some arrays */
211   if (!tao->gradient) {
212     ierr = VecDuplicate(tao->solution, &tao->gradient);   CHKERRQ(ierr);
213   }
214   PetscFunctionReturn(0);
215 }
216 
217 /*------------------------------------------------------------*/
218 #undef __FUNCT__
219 #define __FUNCT__ "TaoDestroy_BMRM"
220 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
221 {
222   PetscErrorCode ierr;
223 
224   PetscFunctionBegin;
225   ierr = PetscFree(tao->data);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 #undef __FUNCT__
230 #define __FUNCT__ "TaoSetFromOptions_BMRM"
231 static PetscErrorCode TaoSetFromOptions_BMRM(PetscOptionItems *PetscOptionsObject,Tao tao)
232 {
233   PetscErrorCode ierr;
234   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
235 
236   PetscFunctionBegin;
237   ierr = PetscOptionsHead(PetscOptionsObject,"BMRM for regularized risk minimization");CHKERRQ(ierr);
238   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL); CHKERRQ(ierr);
239   ierr = PetscOptionsTail();CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 
243 /*------------------------------------------------------------*/
244 #undef __FUNCT__
245 #define __FUNCT__ "TaoView_BMRM"
246 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
247 {
248   PetscBool      isascii;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
253   if (isascii) {
254     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
255     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 /*------------------------------------------------------------*/
261 /*MC
262   TAOBMRM - bundle method for regularized risk minimization
263 
264   Options Database Keys:
265 . - tao_bmrm_lambda - regulariser weight
266 
267   Level: beginner
268 M*/
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "TaoCreate_BMRM"
272 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
273 {
274   TAO_BMRM       *bmrm;
275   PetscErrorCode ierr;
276 
277   PetscFunctionBegin;
278   tao->ops->setup = TaoSetup_BMRM;
279   tao->ops->solve = TaoSolve_BMRM;
280   tao->ops->view  = TaoView_BMRM;
281   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
282   tao->ops->destroy = TaoDestroy_BMRM;
283 
284   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
285   bmrm->lambda = 1.0;
286   tao->data = (void*)bmrm;
287 
288   /* Override default settings (unless already changed) */
289   if (!tao->max_it_changed) tao->max_it = 2000;
290   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
291   if (!tao->gatol_changed) tao->gatol = 1.0e-12;
292   if (!tao->grtol_changed) tao->grtol = 1.0e-12;
293 
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "init_df_solver"
299 PetscErrorCode init_df_solver(TAO_DF *df)
300 {
301   PetscInt       i, n = INCRE_DIM;
302   PetscErrorCode ierr;
303 
304   PetscFunctionBegin;
305   /* default values */
306   df->maxProjIter = 200;
307   df->maxPGMIter = 300000;
308   df->b = 1.0;
309 
310   /* memory space required by Dai-Fletcher */
311   df->cur_num_cp = n;
312   ierr = PetscMalloc1(n, &df->f); CHKERRQ(ierr);
313   ierr = PetscMalloc1(n, &df->a); CHKERRQ(ierr);
314   ierr = PetscMalloc1(n, &df->l); CHKERRQ(ierr);
315   ierr = PetscMalloc1(n, &df->u); CHKERRQ(ierr);
316   ierr = PetscMalloc1(n, &df->x); CHKERRQ(ierr);
317   ierr = PetscMalloc1(n, &df->Q); CHKERRQ(ierr);
318 
319   for (i = 0; i < n; i ++) {
320     ierr = PetscMalloc1(n, &df->Q[i]); CHKERRQ(ierr);
321   }
322 
323   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
324   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
325   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
326   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
327   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
328   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
329   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
330   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
331   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
332   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
333 
334   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
335   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
336   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 
340 #undef __FUNCT__
341 #define __FUNCT__ "ensure_df_space"
342 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
343 {
344   PetscErrorCode ierr;
345   PetscReal      *tmp, **tmp_Q;
346   PetscInt       i, n, old_n;
347 
348   PetscFunctionBegin;
349   df->dim = dim;
350   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
351 
352   old_n = df->cur_num_cp;
353   df->cur_num_cp += INCRE_DIM;
354   n = df->cur_num_cp;
355 
356   /* memory space required by dai-fletcher */
357   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
358   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
359   ierr = PetscFree(df->f); CHKERRQ(ierr);
360   df->f = tmp;
361 
362   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
363   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
364   ierr = PetscFree(df->a); CHKERRQ(ierr);
365   df->a = tmp;
366 
367   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
368   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
369   ierr = PetscFree(df->l); CHKERRQ(ierr);
370   df->l = tmp;
371 
372   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
373   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
374   ierr = PetscFree(df->u); CHKERRQ(ierr);
375   df->u = tmp;
376 
377   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
378   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
379   ierr = PetscFree(df->x); CHKERRQ(ierr);
380   df->x = tmp;
381 
382   ierr = PetscMalloc1(n, &tmp_Q); CHKERRQ(ierr);
383   for (i = 0; i < n; i ++) {
384     ierr = PetscMalloc1(n, &tmp_Q[i]); CHKERRQ(ierr);
385     if (i < old_n) {
386       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr);
387       ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
388     }
389   }
390 
391   ierr = PetscFree(df->Q); CHKERRQ(ierr);
392   df->Q = tmp_Q;
393 
394   ierr = PetscFree(df->g); CHKERRQ(ierr);
395   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
396 
397   ierr = PetscFree(df->y); CHKERRQ(ierr);
398   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
399 
400   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
401   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
402 
403   ierr = PetscFree(df->d); CHKERRQ(ierr);
404   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
405 
406   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
407   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
408 
409   ierr = PetscFree(df->t); CHKERRQ(ierr);
410   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
411 
412   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
413   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
414 
415   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
416   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
417 
418   ierr = PetscFree(df->sk); CHKERRQ(ierr);
419   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
420 
421   ierr = PetscFree(df->yk); CHKERRQ(ierr);
422   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
423 
424   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
425   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
426 
427   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
428   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
429 
430   ierr = PetscFree(df->uv); CHKERRQ(ierr);
431   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNCT__
436 #define __FUNCT__ "destroy_df_solver"
437 PetscErrorCode destroy_df_solver(TAO_DF *df)
438 {
439   PetscErrorCode ierr;
440   PetscInt       i;
441 
442   PetscFunctionBegin;
443   ierr = PetscFree(df->f); CHKERRQ(ierr);
444   ierr = PetscFree(df->a); CHKERRQ(ierr);
445   ierr = PetscFree(df->l); CHKERRQ(ierr);
446   ierr = PetscFree(df->u); CHKERRQ(ierr);
447   ierr = PetscFree(df->x); CHKERRQ(ierr);
448 
449   for (i = 0; i < df->cur_num_cp; i ++) {
450     ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
451   }
452   ierr = PetscFree(df->Q); CHKERRQ(ierr);
453   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
454   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
455   ierr = PetscFree(df->uv); CHKERRQ(ierr);
456   ierr = PetscFree(df->g); CHKERRQ(ierr);
457   ierr = PetscFree(df->y); CHKERRQ(ierr);
458   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
459   ierr = PetscFree(df->d); CHKERRQ(ierr);
460   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
461   ierr = PetscFree(df->t); CHKERRQ(ierr);
462   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
463   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
464   ierr = PetscFree(df->sk); CHKERRQ(ierr);
465   ierr = PetscFree(df->yk); CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
470 #undef __FUNCT__
471 #define __FUNCT__ "phi"
472 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
473 {
474   PetscReal r = 0.0;
475   PetscInt  i;
476 
477   for (i = 0; i < n; i++){
478     x[i] = -c[i] + lambda*a[i];
479     if (x[i] > u[i])     x[i] = u[i];
480     else if(x[i] < l[i]) x[i] = l[i];
481     r += a[i]*x[i];
482   }
483   return r - b;
484 }
485 
486 /** Modified Dai-Fletcher QP projector solves the problem:
487  *
488  *      minimise  0.5*x'*x - c'*x
489  *      subj to   a'*x = b
490  *                l \leq x \leq u
491  *
492  *  \param c The point to be projected onto feasible set
493  */
494 #undef __FUNCT__
495 #define __FUNCT__ "project"
496 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
497 {
498   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
499   PetscReal      r, rl, ru, s;
500   PetscInt       innerIter;
501   PetscBool      nonNegativeSlack = PETSC_FALSE;
502   PetscErrorCode ierr;
503 
504   *lam_ext = 0;
505   lambda  = 0;
506   dlambda = 0.5;
507   innerIter = 1;
508 
509   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
510    *
511    *  Optimality conditions for \phi:
512    *
513    *  1. lambda   <= 0
514    *  2. r        <= 0
515    *  3. r*lambda == 0
516    */
517 
518   /* Bracketing Phase */
519   r = phi(x, n, lambda, a, b, c, l, u);
520 
521   if(nonNegativeSlack) {
522     /* inequality constraint, i.e., with \xi >= 0 constraint */
523     if (r < TOL_R) return 0;
524   } else  {
525     /* equality constraint ,i.e., without \xi >= 0 constraint */
526     if (fabs(r) < TOL_R) return 0;
527   }
528 
529   if (r < 0.0){
530     lambdal = lambda;
531     rl      = r;
532     lambda  = lambda + dlambda;
533     r       = phi(x, n, lambda, a, b, c, l, u);
534     while (r < 0.0 && dlambda < BMRM_INFTY)  {
535       lambdal = lambda;
536       s       = rl/r - 1.0;
537       if (s < 0.1) s = 0.1;
538       dlambda = dlambda + dlambda/s;
539       lambda  = lambda + dlambda;
540       rl      = r;
541       r       = phi(x, n, lambda, a, b, c, l, u);
542     }
543     lambdau = lambda;
544     ru      = r;
545   } else {
546     lambdau = lambda;
547     ru      = r;
548     lambda  = lambda - dlambda;
549     r       = phi(x, n, lambda, a, b, c, l, u);
550     while (r > 0.0 && dlambda > -BMRM_INFTY) {
551       lambdau = lambda;
552       s       = ru/r - 1.0;
553       if (s < 0.1) s = 0.1;
554       dlambda = dlambda + dlambda/s;
555       lambda  = lambda - dlambda;
556       ru      = r;
557       r       = phi(x, n, lambda, a, b, c, l, u);
558     }
559     lambdal = lambda;
560     rl      = r;
561   }
562 
563   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
564 
565   if(ru == 0){
566     lambda = lambdau;
567     r = phi(x, n, lambda, a, b, c, l, u);
568     return innerIter;
569   }
570 
571   /* Secant Phase */
572   s       = 1.0 - rl/ru;
573   dlambda = dlambda/s;
574   lambda  = lambdau - dlambda;
575   r       = phi(x, n, lambda, a, b, c, l, u);
576 
577   while (fabs(r) > TOL_R
578          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
579          && innerIter < df->maxProjIter){
580     innerIter++;
581     if (r > 0.0){
582       if (s <= 2.0){
583         lambdau = lambda;
584         ru      = r;
585         s       = 1.0 - rl/ru;
586         dlambda = (lambdau - lambdal) / s;
587         lambda  = lambdau - dlambda;
588       } else {
589         s          = ru/r-1.0;
590         if (s < 0.1) s = 0.1;
591         dlambda    = (lambdau - lambda) / s;
592         lambda_new = 0.75*lambdal + 0.25*lambda;
593         if (lambda_new < (lambda - dlambda))
594           lambda_new = lambda - dlambda;
595         lambdau    = lambda;
596         ru         = r;
597         lambda     = lambda_new;
598         s          = (lambdau - lambdal) / (lambdau - lambda);
599       }
600     } else {
601       if (s >= 2.0){
602         lambdal = lambda;
603         rl      = r;
604         s       = 1.0 - rl/ru;
605         dlambda = (lambdau - lambdal) / s;
606         lambda  = lambdau - dlambda;
607       } else {
608         s          = rl/r - 1.0;
609         if (s < 0.1) s = 0.1;
610         dlambda    = (lambda-lambdal) / s;
611         lambda_new = 0.75*lambdau + 0.25*lambda;
612         if (lambda_new > (lambda + dlambda))
613           lambda_new = lambda + dlambda;
614         lambdal    = lambda;
615         rl         = r;
616         lambda     = lambda_new;
617         s          = (lambdau - lambdal) / (lambdau-lambda);
618       }
619     }
620     r = phi(x, n, lambda, a, b, c, l, u);
621   }
622 
623   *lam_ext = lambda;
624   if(innerIter >= df->maxProjIter) {
625     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
626   }
627   return innerIter;
628 }
629 
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "solve"
633 PetscErrorCode solve(TAO_DF *df)
634 {
635   PetscErrorCode ierr;
636   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
637   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
638   PetscReal      DELTAsv, ProdDELTAsv;
639   PetscReal      c, *tempQ;
640   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
641   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
642   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
643   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
644   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
645 
646   /*** variables for the adaptive nonmonotone linesearch ***/
647   PetscInt    L, llast;
648   PetscReal   fr, fbest, fv, fc, fv0;
649 
650   c = BMRM_INFTY;
651 
652   DELTAsv = EPS_SV;
653   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
654   else  ProdDELTAsv = EPS_SV;
655 
656   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
657 
658   lam_ext = 0.0;
659 
660   /* Project the initial solution */
661   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
662 
663   /* Compute gradient
664      g = Q*x + f; */
665 
666   it = 0;
667   for (i = 0; i < dim; i++) {
668     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
669   }
670 
671   ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr);
672   for (i = 0; i < it; i++){
673     tempQ = Q[ipt[i]];
674     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
675   }
676   for (i = 0; i < dim; i++){
677     g[i] = t[i] + f[i];
678   }
679 
680 
681   /* y = -(x_{k} - g_{k}) */
682   for (i = 0; i < dim; i++){
683     y[i] = g[i] - x[i];
684   }
685 
686   /* Project x_{k} - g_{k} */
687   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
688 
689   /* y = P(x_{k} - g_{k}) - x_{k} */
690   max = ALPHA_MIN;
691   for (i = 0; i < dim; i++){
692     y[i] = tempv[i] - x[i];
693     if (fabs(y[i]) > max) max = fabs(y[i]);
694   }
695 
696   if (max < tol*1e-3){
697     lscount = 0;
698     innerIter    = 0;
699     return 0;
700   }
701 
702   alpha = 1.0 / max;
703 
704   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
705   fv0   = 0.0;
706   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
707 
708   /*** adaptive nonmonotone linesearch ***/
709   L     = 2;
710   fr    = ALPHA_MAX;
711   fbest = fv0;
712   fc    = fv0;
713   llast = 0;
714   akold = bkold = 0.0;
715 
716   /***      Iterator begins     ***/
717   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
718 
719     /* tempv = -(x_{k} - alpha*g_{k}) */
720     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
721 
722     /* Project x_{k} - alpha*g_{k} */
723     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
724 
725 
726     /* gd = \inner{d_{k}}{g_{k}}
727         d = P(x_{k} - alpha*g_{k}) - x_{k}
728     */
729     gd = 0.0;
730     for (i = 0; i < dim; i++){
731       d[i] = y[i] - x[i];
732       gd  += d[i] * g[i];
733     }
734 
735     /* Gradient computation  */
736 
737     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
738 
739     it = it2 = 0;
740     for (i = 0; i < dim; i++){
741       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
742     }
743     for (i = 0; i < dim; i++) {
744       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
745     }
746 
747     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr);
748     /* compute Qd = Q*d */
749     if (it < it2){
750       for (i = 0; i < it; i++){
751         tempQ = Q[ipt[i]];
752         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
753       }
754     } else { /* compute Qd = Q*y-t */
755       for (i = 0; i < it2; i++){
756         tempQ = Q[ipt2[i]];
757         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
758       }
759       for (j = 0; j < dim; j++) Qd[j] -= t[j];
760     }
761 
762     /* ak = inner{d_{k}}{d_{k}} */
763     ak = 0.0;
764     for (i = 0; i < dim; i++) ak += d[i] * d[i];
765 
766     bk = 0.0;
767     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
768 
769     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
770     else lamnew = 1.0;
771 
772     /* fv is computing f(x_{k} + d_{k}) */
773     fv = 0.0;
774     for (i = 0; i < dim; i++){
775       xplus[i] = x[i] + d[i];
776       tplus[i] = t[i] + Qd[i];
777       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
778     }
779 
780     /* fr is fref */
781     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
782       lscount++;
783       fv = 0.0;
784       for (i = 0; i < dim; i++){
785         xplus[i] = x[i] + lamnew*d[i];
786         tplus[i] = t[i] + lamnew*Qd[i];
787         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
788       }
789     }
790 
791     for (i = 0; i < dim; i++){
792       sk[i] = xplus[i] - x[i];
793       yk[i] = tplus[i] - t[i];
794       x[i]  = xplus[i];
795       t[i]  = tplus[i];
796       g[i]  = t[i] + f[i];
797     }
798 
799     /* update the line search control parameters */
800     if (fv < fbest){
801       fbest = fv;
802       fc    = fv;
803       llast = 0;
804     } else {
805       fc = (fc > fv ? fc : fv);
806       llast++;
807       if (llast == L){
808         fr    = fc;
809         fc    = fv;
810         llast = 0;
811       }
812     }
813 
814     ak = bk = 0.0;
815     for (i = 0; i < dim; i++){
816       ak += sk[i] * sk[i];
817       bk += sk[i] * yk[i];
818     }
819 
820     if (bk <= EPS*ak) alpha = ALPHA_MAX;
821     else {
822       if (bkold < EPS*akold) alpha = ak/bk;
823       else alpha = (akold+ak)/(bkold+bk);
824 
825       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
826       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
827     }
828 
829     akold = ak;
830     bkold = bk;
831 
832     /*** stopping criterion based on KKT conditions ***/
833     /* at optimal, gradient of lagrangian w.r.t. x is zero */
834 
835     bk = 0.0;
836     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
837 
838     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
839       it     = 0;
840       luv    = 0;
841       kktlam = 0.0;
842       for (i = 0; i < dim; i++){
843         /* x[i] is active hence lagrange multipliers for box constraints
844                 are zero. The lagrange multiplier for ineq. const. is then
845                 defined as below
846         */
847         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
848           ipt[it++] = i;
849           kktlam    = kktlam - a[i]*g[i];
850         } else  uv[luv++] = i;
851       }
852 
853       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
854       else {
855         kktlam = kktlam/it;
856         info   = 1;
857         for (i = 0; i < it; i++) {
858           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
859             info = 0;
860             break;
861           }
862         }
863         if (info == 1)  {
864           for (i = 0; i < luv; i++)  {
865             if (x[uv[i]] <= DELTAsv){
866               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
867                      not be zero. So, the gradient without beta is > 0
868               */
869               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
870                 info = 0;
871                 break;
872               }
873             } else {
874               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
875                      not be zero. So, the gradient without eta is < 0
876               */
877               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
878                 info = 0;
879                 break;
880               }
881             }
882           }
883         }
884 
885         if (info == 1) return 0;
886       }
887     }
888   }
889   return 0;
890 }
891 
892 
893