xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision a7fac7c2b47dbcd8ece0cc2dfdfe0e63be1bb7b5)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "make_grad_node"
22 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscNew(p);CHKERRQ(ierr);
28   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
29   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
30   (*p)->next = NULL;
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "destroy_grad_list"
36 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
37 {
38   PetscErrorCode ierr;
39   Vec_Chain      *p = head->next, *q;
40 
41   PetscFunctionBegin;
42   while(p) {
43     q = p->next;
44     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
45     ierr = PetscFree(p);CHKERRQ(ierr);
46     p = q;
47   }
48   head->next = NULL;
49   PetscFunctionReturn(0);
50 }
51 
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "TaoSolve_BMRM"
55 static PetscErrorCode TaoSolve_BMRM(Tao tao)
56 {
57   PetscErrorCode     ierr;
58   TaoConvergedReason reason;
59   TAO_DF             df;
60   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
61 
62   /* Values and pointers to parts of the optimization problem */
63   PetscReal          f = 0.0;
64   Vec                W = tao->solution;
65   Vec                G = tao->gradient;
66   PetscReal          lambda;
67   PetscReal          bt;
68   Vec_Chain          grad_list, *tail_glist, *pgrad;
69   PetscInt           i;
70   PetscMPIInt        rank;
71 
72   /* Used in converged criteria check */
73   PetscReal          reg;
74   PetscReal          jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
75   PetscReal          innerSolverTol;
76   MPI_Comm           comm;
77 
78   PetscFunctionBegin;
79   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
80   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
81   lambda = bmrm->lambda;
82 
83   /* Check Stopping Condition */
84   tao->step = 1.0;
85   max_jtwt = -BMRM_INFTY;
86   min_jw = BMRM_INFTY;
87   innerSolverTol = 1.0;
88   epsilon = 0.0;
89 
90   if (!rank) {
91     ierr = init_df_solver(&df);CHKERRQ(ierr);
92     grad_list.next = NULL;
93     tail_glist = &grad_list;
94   }
95 
96   df.tol = 1e-6;
97   reason = TAO_CONTINUE_ITERATING;
98 
99   /*-----------------Algorithm Begins------------------------*/
100   /* make the scatter */
101   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
102   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
103   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
104 
105   /* NOTE: In application pass the sub-gradient of Remp(W) */
106   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
107   ierr = TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
108   while (reason == TAO_CONTINUE_ITERATING) {
109     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
110     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
111     bt = f - bt;
112 
113     /* First gather the gradient to the master node */
114     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
115     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
116 
117     /* Bring up the inner solver */
118     if (!rank) {
119       ierr = ensure_df_space(tao->niter+1, &df); CHKERRQ(ierr);
120       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
121       tail_glist->next = pgrad;
122       tail_glist = pgrad;
123 
124       df.a[tao->niter] = 1.0;
125       df.f[tao->niter] = -bt;
126       df.u[tao->niter] = 1.0;
127       df.l[tao->niter] = 0.0;
128 
129       /* set up the Q */
130       pgrad = grad_list.next;
131       for (i=0; i<=tao->niter; i++) {
132         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
133         df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
134         pgrad = pgrad->next;
135       }
136 
137       if (tao->niter > 0) {
138         df.x[tao->niter] = 0.0;
139         ierr = solve(&df); CHKERRQ(ierr);
140       } else
141         df.x[0] = 1.0;
142 
143       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
144       jtwt = 0.0;
145       ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr);
146       pgrad = grad_list.next;
147       for (i=0; i<=tao->niter; i++) {
148         jtwt -= df.x[i] * df.f[i];
149         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr);
150         pgrad = pgrad->next;
151       }
152 
153       ierr = VecNorm(bmrm->local_w, NORM_2, &reg); CHKERRQ(ierr);
154       reg = 0.5*lambda*reg*reg;
155       jtwt -= reg;
156     } /* end if rank == 0 */
157 
158     /* scatter the new W to all nodes */
159     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
160     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
161 
162     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
163 
164     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
165     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
166 
167     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
168     if (jw < min_jw) min_jw = jw;
169     if (jtwt > max_jtwt) max_jtwt = jtwt;
170 
171     pre_epsilon = epsilon;
172     epsilon = min_jw - jtwt;
173 
174     if (!rank) {
175       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
176       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
177 
178       /* if the annealing doesn't work well, lower the inner solver tolerance */
179       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
180 
181       df.tol = innerSolverTol*0.5;
182     }
183 
184     tao->niter++;
185     ierr = TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
186   }
187 
188   /* free all the memory */
189   if (!rank) {
190     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
191     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
192   }
193 
194   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
195   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
196   PetscFunctionReturn(0);
197 }
198 
199 
200 /* ---------------------------------------------------------- */
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "TaoSetup_BMRM"
204 static PetscErrorCode TaoSetup_BMRM(Tao tao)
205 {
206 
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   /* Allocate some arrays */
211   if (!tao->gradient) {
212     ierr = VecDuplicate(tao->solution, &tao->gradient);   CHKERRQ(ierr);
213   }
214   PetscFunctionReturn(0);
215 }
216 
217 /*------------------------------------------------------------*/
218 #undef __FUNCT__
219 #define __FUNCT__ "TaoDestroy_BMRM"
220 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
221 {
222   PetscErrorCode ierr;
223 
224   PetscFunctionBegin;
225   ierr = PetscFree(tao->data);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 #undef __FUNCT__
230 #define __FUNCT__ "TaoSetFromOptions_BMRM"
231 static PetscErrorCode TaoSetFromOptions_BMRM(PetscOptions *PetscOptionsObject,Tao tao)
232 {
233   PetscErrorCode ierr;
234   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
235 
236   PetscFunctionBegin;
237   ierr = PetscOptionsHead(PetscOptionsObject,"BMRM for regularized risk minimization");CHKERRQ(ierr);
238   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL); CHKERRQ(ierr);
239   ierr = PetscOptionsTail();CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 
243 /*------------------------------------------------------------*/
244 #undef __FUNCT__
245 #define __FUNCT__ "TaoView_BMRM"
246 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
247 {
248   PetscBool      isascii;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
253   if (isascii) {
254     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
255     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 /*------------------------------------------------------------*/
261 /*MC
262   TAOBMRM - bundle method for regularized risk minimization
263 
264   Options Database Keys:
265 . - tao_bmrm_lambda - regulariser weight
266 
267   Level: beginner
268 M*/
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "TaoCreate_BMRM"
272 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
273 {
274   TAO_BMRM       *bmrm;
275   PetscErrorCode ierr;
276 
277   PetscFunctionBegin;
278   tao->ops->setup = TaoSetup_BMRM;
279   tao->ops->solve = TaoSolve_BMRM;
280   tao->ops->view  = TaoView_BMRM;
281   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
282   tao->ops->destroy = TaoDestroy_BMRM;
283 
284   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
285   bmrm->lambda = 1.0;
286   tao->data = (void*)bmrm;
287 
288   /* Override default settings (unless already changed) */
289   if (!tao->max_it_changed) tao->max_it = 2000;
290   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
291   if (!tao->fatol_changed) tao->fatol = 1.0e-12;
292   if (!tao->frtol_changed) tao->frtol = 1.0e-12;
293   if (!tao->gatol_changed) tao->gatol = 1.0e-12;
294   if (!tao->grtol_changed) tao->grtol = 1.0e-12;
295 
296   PetscFunctionReturn(0);
297 }
298 
299 #undef __FUNCT__
300 #define __FUNCT__ "init_df_solver"
301 PetscErrorCode init_df_solver(TAO_DF *df)
302 {
303   PetscInt       i, n = INCRE_DIM;
304   PetscErrorCode ierr;
305 
306   PetscFunctionBegin;
307   /* default values */
308   df->maxProjIter = 200;
309   df->maxPGMIter = 300000;
310   df->b = 1.0;
311 
312   /* memory space required by Dai-Fletcher */
313   df->cur_num_cp = n;
314   ierr = PetscMalloc1(n, &df->f); CHKERRQ(ierr);
315   ierr = PetscMalloc1(n, &df->a); CHKERRQ(ierr);
316   ierr = PetscMalloc1(n, &df->l); CHKERRQ(ierr);
317   ierr = PetscMalloc1(n, &df->u); CHKERRQ(ierr);
318   ierr = PetscMalloc1(n, &df->x); CHKERRQ(ierr);
319   ierr = PetscMalloc1(n, &df->Q); CHKERRQ(ierr);
320 
321   for (i = 0; i < n; i ++) {
322     ierr = PetscMalloc1(n, &df->Q[i]); CHKERRQ(ierr);
323   }
324 
325   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
326   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
327   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
328   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
329   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
330   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
331   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
332   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
333   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
334   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
335 
336   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
337   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
338   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 #undef __FUNCT__
343 #define __FUNCT__ "ensure_df_space"
344 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
345 {
346   PetscErrorCode ierr;
347   PetscReal      *tmp, **tmp_Q;
348   PetscInt       i, n, old_n;
349 
350   PetscFunctionBegin;
351   df->dim = dim;
352   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
353 
354   old_n = df->cur_num_cp;
355   df->cur_num_cp += INCRE_DIM;
356   n = df->cur_num_cp;
357 
358   /* memory space required by dai-fletcher */
359   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
360   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
361   ierr = PetscFree(df->f); CHKERRQ(ierr);
362   df->f = tmp;
363 
364   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
365   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
366   ierr = PetscFree(df->a); CHKERRQ(ierr);
367   df->a = tmp;
368 
369   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
370   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
371   ierr = PetscFree(df->l); CHKERRQ(ierr);
372   df->l = tmp;
373 
374   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
375   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
376   ierr = PetscFree(df->u); CHKERRQ(ierr);
377   df->u = tmp;
378 
379   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
380   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
381   ierr = PetscFree(df->x); CHKERRQ(ierr);
382   df->x = tmp;
383 
384   ierr = PetscMalloc1(n, &tmp_Q); CHKERRQ(ierr);
385   for (i = 0; i < n; i ++) {
386     ierr = PetscMalloc1(n, &tmp_Q[i]); CHKERRQ(ierr);
387     if (i < old_n) {
388       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr);
389       ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
390     }
391   }
392 
393   ierr = PetscFree(df->Q); CHKERRQ(ierr);
394   df->Q = tmp_Q;
395 
396   ierr = PetscFree(df->g); CHKERRQ(ierr);
397   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
398 
399   ierr = PetscFree(df->y); CHKERRQ(ierr);
400   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
401 
402   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
403   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
404 
405   ierr = PetscFree(df->d); CHKERRQ(ierr);
406   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
407 
408   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
409   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
410 
411   ierr = PetscFree(df->t); CHKERRQ(ierr);
412   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
413 
414   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
415   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
416 
417   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
418   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
419 
420   ierr = PetscFree(df->sk); CHKERRQ(ierr);
421   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
422 
423   ierr = PetscFree(df->yk); CHKERRQ(ierr);
424   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
425 
426   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
427   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
428 
429   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
430   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
431 
432   ierr = PetscFree(df->uv); CHKERRQ(ierr);
433   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNCT__
438 #define __FUNCT__ "destroy_df_solver"
439 PetscErrorCode destroy_df_solver(TAO_DF *df)
440 {
441   PetscErrorCode ierr;
442   PetscInt       i;
443 
444   PetscFunctionBegin;
445   ierr = PetscFree(df->f); CHKERRQ(ierr);
446   ierr = PetscFree(df->a); CHKERRQ(ierr);
447   ierr = PetscFree(df->l); CHKERRQ(ierr);
448   ierr = PetscFree(df->u); CHKERRQ(ierr);
449   ierr = PetscFree(df->x); CHKERRQ(ierr);
450 
451   for (i = 0; i < df->cur_num_cp; i ++) {
452     ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
453   }
454   ierr = PetscFree(df->Q); CHKERRQ(ierr);
455   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
456   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
457   ierr = PetscFree(df->uv); CHKERRQ(ierr);
458   ierr = PetscFree(df->g); CHKERRQ(ierr);
459   ierr = PetscFree(df->y); CHKERRQ(ierr);
460   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
461   ierr = PetscFree(df->d); CHKERRQ(ierr);
462   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
463   ierr = PetscFree(df->t); CHKERRQ(ierr);
464   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
465   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
466   ierr = PetscFree(df->sk); CHKERRQ(ierr);
467   ierr = PetscFree(df->yk); CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
472 #undef __FUNCT__
473 #define __FUNCT__ "phi"
474 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
475 {
476   PetscReal r = 0.0;
477   PetscInt  i;
478 
479   for (i = 0; i < n; i++){
480     x[i] = -c[i] + lambda*a[i];
481     if (x[i] > u[i])     x[i] = u[i];
482     else if(x[i] < l[i]) x[i] = l[i];
483     r += a[i]*x[i];
484   }
485   return r - b;
486 }
487 
488 /** Modified Dai-Fletcher QP projector solves the problem:
489  *
490  *      minimise  0.5*x'*x - c'*x
491  *      subj to   a'*x = b
492  *                l \leq x \leq u
493  *
494  *  \param c The point to be projected onto feasible set
495  */
496 #undef __FUNCT__
497 #define __FUNCT__ "project"
498 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
499 {
500   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
501   PetscReal      r, rl, ru, s;
502   PetscInt       innerIter;
503   PetscBool      nonNegativeSlack = PETSC_FALSE;
504   PetscErrorCode ierr;
505 
506   *lam_ext = 0;
507   lambda  = 0;
508   dlambda = 0.5;
509   innerIter = 1;
510 
511   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
512    *
513    *  Optimality conditions for \phi:
514    *
515    *  1. lambda   <= 0
516    *  2. r        <= 0
517    *  3. r*lambda == 0
518    */
519 
520   /* Bracketing Phase */
521   r = phi(x, n, lambda, a, b, c, l, u);
522 
523   if(nonNegativeSlack) {
524     /* inequality constraint, i.e., with \xi >= 0 constraint */
525     if (r < TOL_R) return 0;
526   } else  {
527     /* equality constraint ,i.e., without \xi >= 0 constraint */
528     if (fabs(r) < TOL_R) return 0;
529   }
530 
531   if (r < 0.0){
532     lambdal = lambda;
533     rl      = r;
534     lambda  = lambda + dlambda;
535     r       = phi(x, n, lambda, a, b, c, l, u);
536     while (r < 0.0 && dlambda < BMRM_INFTY)  {
537       lambdal = lambda;
538       s       = rl/r - 1.0;
539       if (s < 0.1) s = 0.1;
540       dlambda = dlambda + dlambda/s;
541       lambda  = lambda + dlambda;
542       rl      = r;
543       r       = phi(x, n, lambda, a, b, c, l, u);
544     }
545     lambdau = lambda;
546     ru      = r;
547   } else {
548     lambdau = lambda;
549     ru      = r;
550     lambda  = lambda - dlambda;
551     r       = phi(x, n, lambda, a, b, c, l, u);
552     while (r > 0.0 && dlambda > -BMRM_INFTY) {
553       lambdau = lambda;
554       s       = ru/r - 1.0;
555       if (s < 0.1) s = 0.1;
556       dlambda = dlambda + dlambda/s;
557       lambda  = lambda - dlambda;
558       ru      = r;
559       r       = phi(x, n, lambda, a, b, c, l, u);
560     }
561     lambdal = lambda;
562     rl      = r;
563   }
564 
565   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
566 
567   if(ru == 0){
568     lambda = lambdau;
569     r = phi(x, n, lambda, a, b, c, l, u);
570     return innerIter;
571   }
572 
573   /* Secant Phase */
574   s       = 1.0 - rl/ru;
575   dlambda = dlambda/s;
576   lambda  = lambdau - dlambda;
577   r       = phi(x, n, lambda, a, b, c, l, u);
578 
579   while (fabs(r) > TOL_R
580          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
581          && innerIter < df->maxProjIter){
582     innerIter++;
583     if (r > 0.0){
584       if (s <= 2.0){
585         lambdau = lambda;
586         ru      = r;
587         s       = 1.0 - rl/ru;
588         dlambda = (lambdau - lambdal) / s;
589         lambda  = lambdau - dlambda;
590       } else {
591         s          = ru/r-1.0;
592         if (s < 0.1) s = 0.1;
593         dlambda    = (lambdau - lambda) / s;
594         lambda_new = 0.75*lambdal + 0.25*lambda;
595         if (lambda_new < (lambda - dlambda))
596           lambda_new = lambda - dlambda;
597         lambdau    = lambda;
598         ru         = r;
599         lambda     = lambda_new;
600         s          = (lambdau - lambdal) / (lambdau - lambda);
601       }
602     } else {
603       if (s >= 2.0){
604         lambdal = lambda;
605         rl      = r;
606         s       = 1.0 - rl/ru;
607         dlambda = (lambdau - lambdal) / s;
608         lambda  = lambdau - dlambda;
609       } else {
610         s          = rl/r - 1.0;
611         if (s < 0.1) s = 0.1;
612         dlambda    = (lambda-lambdal) / s;
613         lambda_new = 0.75*lambdau + 0.25*lambda;
614         if (lambda_new > (lambda + dlambda))
615           lambda_new = lambda + dlambda;
616         lambdal    = lambda;
617         rl         = r;
618         lambda     = lambda_new;
619         s          = (lambdau - lambdal) / (lambdau-lambda);
620       }
621     }
622     r = phi(x, n, lambda, a, b, c, l, u);
623   }
624 
625   *lam_ext = lambda;
626   if(innerIter >= df->maxProjIter) {
627     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
628   }
629   return innerIter;
630 }
631 
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "solve"
635 PetscErrorCode solve(TAO_DF *df)
636 {
637   PetscErrorCode ierr;
638   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
639   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
640   PetscReal      DELTAsv, ProdDELTAsv;
641   PetscReal      c, *tempQ;
642   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
643   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
644   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
645   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
646   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
647 
648   /*** variables for the adaptive nonmonotone linesearch ***/
649   PetscInt    L, llast;
650   PetscReal   fr, fbest, fv, fc, fv0;
651 
652   c = BMRM_INFTY;
653 
654   DELTAsv = EPS_SV;
655   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
656   else  ProdDELTAsv = EPS_SV;
657 
658   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
659 
660   lam_ext = 0.0;
661 
662   /* Project the initial solution */
663   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
664 
665   /* Compute gradient
666      g = Q*x + f; */
667 
668   it = 0;
669   for (i = 0; i < dim; i++) {
670     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
671   }
672 
673   ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr);
674   for (i = 0; i < it; i++){
675     tempQ = Q[ipt[i]];
676     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
677   }
678   for (i = 0; i < dim; i++){
679     g[i] = t[i] + f[i];
680   }
681 
682 
683   /* y = -(x_{k} - g_{k}) */
684   for (i = 0; i < dim; i++){
685     y[i] = g[i] - x[i];
686   }
687 
688   /* Project x_{k} - g_{k} */
689   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
690 
691   /* y = P(x_{k} - g_{k}) - x_{k} */
692   max = ALPHA_MIN;
693   for (i = 0; i < dim; i++){
694     y[i] = tempv[i] - x[i];
695     if (fabs(y[i]) > max) max = fabs(y[i]);
696   }
697 
698   if (max < tol*1e-3){
699     lscount = 0;
700     innerIter    = 0;
701     return 0;
702   }
703 
704   alpha = 1.0 / max;
705 
706   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
707   fv0   = 0.0;
708   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
709 
710   /*** adaptive nonmonotone linesearch ***/
711   L     = 2;
712   fr    = ALPHA_MAX;
713   fbest = fv0;
714   fc    = fv0;
715   llast = 0;
716   akold = bkold = 0.0;
717 
718   /***      Iterator begins     ***/
719   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
720 
721     /* tempv = -(x_{k} - alpha*g_{k}) */
722     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
723 
724     /* Project x_{k} - alpha*g_{k} */
725     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
726 
727 
728     /* gd = \inner{d_{k}}{g_{k}}
729         d = P(x_{k} - alpha*g_{k}) - x_{k}
730     */
731     gd = 0.0;
732     for (i = 0; i < dim; i++){
733       d[i] = y[i] - x[i];
734       gd  += d[i] * g[i];
735     }
736 
737     /* Gradient computation  */
738 
739     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
740 
741     it = it2 = 0;
742     for (i = 0; i < dim; i++){
743       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
744     }
745     for (i = 0; i < dim; i++) {
746       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
747     }
748 
749     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr);
750     /* compute Qd = Q*d */
751     if (it < it2){
752       for (i = 0; i < it; i++){
753         tempQ = Q[ipt[i]];
754         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
755       }
756     } else { /* compute Qd = Q*y-t */
757       for (i = 0; i < it2; i++){
758         tempQ = Q[ipt2[i]];
759         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
760       }
761       for (j = 0; j < dim; j++) Qd[j] -= t[j];
762     }
763 
764     /* ak = inner{d_{k}}{d_{k}} */
765     ak = 0.0;
766     for (i = 0; i < dim; i++) ak += d[i] * d[i];
767 
768     bk = 0.0;
769     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
770 
771     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
772     else lamnew = 1.0;
773 
774     /* fv is computing f(x_{k} + d_{k}) */
775     fv = 0.0;
776     for (i = 0; i < dim; i++){
777       xplus[i] = x[i] + d[i];
778       tplus[i] = t[i] + Qd[i];
779       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
780     }
781 
782     /* fr is fref */
783     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
784       lscount++;
785       fv = 0.0;
786       for (i = 0; i < dim; i++){
787         xplus[i] = x[i] + lamnew*d[i];
788         tplus[i] = t[i] + lamnew*Qd[i];
789         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
790       }
791     }
792 
793     for (i = 0; i < dim; i++){
794       sk[i] = xplus[i] - x[i];
795       yk[i] = tplus[i] - t[i];
796       x[i]  = xplus[i];
797       t[i]  = tplus[i];
798       g[i]  = t[i] + f[i];
799     }
800 
801     /* update the line search control parameters */
802     if (fv < fbest){
803       fbest = fv;
804       fc    = fv;
805       llast = 0;
806     } else {
807       fc = (fc > fv ? fc : fv);
808       llast++;
809       if (llast == L){
810         fr    = fc;
811         fc    = fv;
812         llast = 0;
813       }
814     }
815 
816     ak = bk = 0.0;
817     for (i = 0; i < dim; i++){
818       ak += sk[i] * sk[i];
819       bk += sk[i] * yk[i];
820     }
821 
822     if (bk <= EPS*ak) alpha = ALPHA_MAX;
823     else {
824       if (bkold < EPS*akold) alpha = ak/bk;
825       else alpha = (akold+ak)/(bkold+bk);
826 
827       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
828       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
829     }
830 
831     akold = ak;
832     bkold = bk;
833 
834     /*** stopping criterion based on KKT conditions ***/
835     /* at optimal, gradient of lagrangian w.r.t. x is zero */
836 
837     bk = 0.0;
838     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
839 
840     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
841       it     = 0;
842       luv    = 0;
843       kktlam = 0.0;
844       for (i = 0; i < dim; i++){
845         /* x[i] is active hence lagrange multipliers for box constraints
846                 are zero. The lagrange multiplier for ineq. const. is then
847                 defined as below
848         */
849         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
850           ipt[it++] = i;
851           kktlam    = kktlam - a[i]*g[i];
852         } else  uv[luv++] = i;
853       }
854 
855       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
856       else {
857         kktlam = kktlam/it;
858         info   = 1;
859         for (i = 0; i < it; i++) {
860           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
861             info = 0;
862             break;
863           }
864         }
865         if (info == 1)  {
866           for (i = 0; i < luv; i++)  {
867             if (x[uv[i]] <= DELTAsv){
868               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
869                      not be zero. So, the gradient without beta is > 0
870               */
871               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
872                 info = 0;
873                 break;
874               }
875             } else {
876               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
877                      not be zero. So, the gradient without eta is < 0
878               */
879               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
880                 info = 0;
881                 break;
882               }
883             }
884           }
885         }
886 
887         if (info == 1) return 0;
888       }
889     }
890   }
891   return 0;
892 }
893 
894 
895