xref: /petsc/src/tao/constrained/impls/ipm/ipm.c (revision e4cb33bb7dbdbae9285fba102465ca0f1dcb3977) !
1 #include <petsctaolinesearch.h>
2 #include <../src/tao/constrained/impls/ipm/ipm.h> /*I "ipm.h" I*/
3 /*
4 #define DEBUG_IPM
5 #define DEBUG_K
6 #define DEBUG_SCATTER
7 #define DEBUG_KKT
8 */
9 /*
10    x,d in R^n
11    f in R
12    nb = mi + nlb+nub
13    s in R^nb is slack vector CI(x) / x-XL / -x+XU
14    bin in R^mi (tao->constraints_inequality)
15    beq in R^me (tao->constraints_equality)
16    lamdai in R^nb (ipmP->lamdai)
17    lamdae in R^me (ipmP->lamdae)
18    Jeq in R^(me x n) (tao->jacobian_equality)
19    Jin in R^(mi x n) (tao->jacobian_inequality)
20    Ai in  R^(nb x n) (ipmP->Ai)
21    H in R^(n x n) (tao->hessian)
22    min f=(1/2)*x'*H*x + d'*x
23    s.t.  CE(x) == 0
24          CI(x) >= 0
25          x >= tao->XL
26          -x >= -tao->XU
27 */
28 
29 static PetscErrorCode IPMComputeKKT(Tao tao);
30 static PetscErrorCode IPMPushInitialPoint(Tao tao);
31 static PetscErrorCode IPMEvaluate(Tao tao);
32 static PetscErrorCode IPMUpdateK(Tao tao);
33 static PetscErrorCode IPMUpdateAi(Tao tao);
34 static PetscErrorCode IPMGatherRHS(Tao tao,Vec,Vec,Vec,Vec,Vec);
35 static PetscErrorCode IPMScatterStep(Tao tao,Vec,Vec,Vec,Vec,Vec);
36 static PetscErrorCode IPMInitializeBounds(Tao tao);
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "TaoSolve_IPM"
40 static PetscErrorCode TaoSolve_IPM(Tao tao)
41 {
42   PetscErrorCode     ierr;
43   TAO_IPM            *ipmP = (TAO_IPM*)tao->data;
44   TaoConvergedReason reason = TAO_CONTINUE_ITERATING;
45   PetscInt           iter = 0,its,i;
46   PetscScalar        stepsize=1.0;
47   PetscScalar        step_s,step_l,alpha,tau,sigma,phi_target;
48 
49   PetscFunctionBegin;
50   /* Push initial point away from bounds */
51 #if defined(DEBUG_IPM)
52   ierr = VecNorm(tao->solution,NORM_2,&tau);CHKERRQ(ierr);
53   ierr = VecView(tao->solution,0);CHKERRQ(ierr);
54 #endif
55   ierr = IPMInitializeBounds(tao);CHKERRQ(ierr);
56   ierr = IPMPushInitialPoint(tao);CHKERRQ(ierr);
57 #if defined(DEBUG_IPM)
58   ierr = VecNorm(tao->solution,NORM_2,&tau);CHKERRQ(ierr);
59   ierr = VecView(tao->solution,0);CHKERRQ(ierr);
60   /*  PetscPrintf(PETSC_COMM_WORLD,"||x0|| = %g\n",(double)tau); */
61 #endif
62   ierr = VecCopy(tao->solution,ipmP->rhs_x);CHKERRQ(ierr);
63   ierr = IPMEvaluate(tao);CHKERRQ(ierr);
64   ierr = IPMComputeKKT(tao);CHKERRQ(ierr);
65   ierr = TaoMonitor(tao,iter++,ipmP->kkt_f,ipmP->phi,0.0,1.0,&reason);
66 
67   while (reason == TAO_CONTINUE_ITERATING) {
68     ierr = IPMUpdateK(tao);CHKERRQ(ierr);
69     /*
70        rhs.x    = -rd
71        rhs.lame = -rpe
72        rhs.lami = -rpi
73        rhs.com  = -com
74     */
75 
76     ierr = VecCopy(ipmP->rd,ipmP->rhs_x);CHKERRQ(ierr);
77     if (ipmP->me > 0) {
78       ierr = VecCopy(ipmP->rpe,ipmP->rhs_lamdae);CHKERRQ(ierr);
79     }
80     if (ipmP->nb > 0) {
81       ierr = VecCopy(ipmP->rpi,ipmP->rhs_lamdai);CHKERRQ(ierr);
82       ierr = VecCopy(ipmP->complementarity,ipmP->rhs_s);CHKERRQ(ierr);
83     }
84     ierr = IPMGatherRHS(tao,ipmP->bigrhs,ipmP->rhs_x,ipmP->rhs_lamdae,ipmP->rhs_lamdai,ipmP->rhs_s);CHKERRQ(ierr);
85     ierr = VecScale(ipmP->bigrhs,-1.0);CHKERRQ(ierr);
86 
87     /* solve K * step = rhs */
88     ierr = KSPSetOperators(tao->ksp,ipmP->K,ipmP->K,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
89     ierr = KSPSolve(tao->ksp,ipmP->bigrhs,ipmP->bigstep);CHKERRQ(ierr);
90 
91     ierr = IPMScatterStep(tao,ipmP->bigstep,tao->stepdirection,ipmP->ds,ipmP->dlamdae,ipmP->dlamdai);CHKERRQ(ierr);
92     ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr);
93     tao->ksp_its += its;
94 #if defined DEBUG_KKT
95     PetscPrintf(PETSC_COMM_WORLD,"first solve.\n");
96     PetscPrintf(PETSC_COMM_WORLD,"rhs_lamdai\n");
97     /* VecView(ipmP->rhs_lamdai,0);
98     ierr = VecView(ipmP->bigrhs,0);
99     ierr = VecView(ipmP->bigstep,0); */
100     PetscScalar norm1,norm2;
101     ierr = VecNorm(ipmP->bigrhs,NORM_2,&norm1);
102     ierr = VecNorm(ipmP->bigstep,NORM_2,&norm2);
103     PetscPrintf(PETSC_COMM_WORLD,"||rhs|| = %g\t ||step|| = %g\n",(double)norm1,(double)norm2);
104 #endif
105      /* Find distance along step direction to closest bound */
106     if (ipmP->nb > 0) {
107       ierr = VecStepBoundInfo(ipmP->s,ipmP->ds,ipmP->Zero_nb,ipmP->Inf_nb,&step_s,NULL,NULL);CHKERRQ(ierr);
108       ierr = VecStepBoundInfo(ipmP->lamdai,ipmP->dlamdai,ipmP->Zero_nb,ipmP->Inf_nb,&step_l,NULL,NULL);CHKERRQ(ierr);
109       alpha = PetscMin(step_s,step_l);
110       alpha = PetscMin(alpha,1.0);
111       ipmP->alpha1 = alpha;
112     } else {
113       ipmP->alpha1 = alpha = 1.0;
114     }
115 
116     /* x_aff = x + alpha*d */
117     ierr = VecCopy(tao->solution,ipmP->save_x);CHKERRQ(ierr);
118     if (ipmP->me > 0) {
119       ierr = VecCopy(ipmP->lamdae,ipmP->save_lamdae);CHKERRQ(ierr);
120     }
121     if (ipmP->nb > 0) {
122       ierr = VecCopy(ipmP->lamdai,ipmP->save_lamdai);CHKERRQ(ierr);
123       ierr = VecCopy(ipmP->s,ipmP->save_s);CHKERRQ(ierr);
124     }
125 
126     ierr = VecAXPY(tao->solution,alpha,tao->stepdirection);CHKERRQ(ierr);
127     if (ipmP->me > 0) {
128       ierr = VecAXPY(ipmP->lamdae,alpha,ipmP->dlamdae);CHKERRQ(ierr);
129     }
130     if (ipmP->nb > 0) {
131       ierr = VecAXPY(ipmP->lamdai,alpha,ipmP->dlamdai);CHKERRQ(ierr);
132       ierr = VecAXPY(ipmP->s,alpha,ipmP->ds);CHKERRQ(ierr);
133     }
134 
135     /* Recompute kkt to find centering parameter sigma = (new_mu/old_mu)^3 */
136     if (ipmP->mu == 0.0) {
137       sigma = 0.0;
138     } else {
139       sigma = 1.0/ipmP->mu;
140     }
141     ierr = IPMComputeKKT(tao);CHKERRQ(ierr);
142     sigma *= ipmP->mu;
143     sigma*=sigma*sigma;
144 
145     /* revert kkt info */
146     ierr = VecCopy(ipmP->save_x,tao->solution);CHKERRQ(ierr);
147     if (ipmP->me > 0) {
148       ierr = VecCopy(ipmP->save_lamdae,ipmP->lamdae);CHKERRQ(ierr);
149     }
150     if (ipmP->nb > 0) {
151       ierr = VecCopy(ipmP->save_lamdai,ipmP->lamdai);CHKERRQ(ierr);
152       ierr = VecCopy(ipmP->save_s,ipmP->s);CHKERRQ(ierr);
153     }
154     ierr = IPMComputeKKT(tao);CHKERRQ(ierr);
155 
156     /* update rhs with new complementarity vector */
157     if (ipmP->nb > 0) {
158       ierr = VecCopy(ipmP->complementarity,ipmP->rhs_s);CHKERRQ(ierr);
159       ierr = VecScale(ipmP->rhs_s,-1.0);CHKERRQ(ierr);
160       ierr = VecShift(ipmP->rhs_s,sigma*ipmP->mu);CHKERRQ(ierr);
161     }
162     ierr = IPMGatherRHS(tao,ipmP->bigrhs,NULL,NULL,NULL,ipmP->rhs_s);CHKERRQ(ierr);
163 
164     /* solve K * step = rhs */
165     ierr = KSPSetOperators(tao->ksp,ipmP->K,ipmP->K,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
166     ierr = KSPSolve(tao->ksp,ipmP->bigrhs,ipmP->bigstep);CHKERRQ(ierr);
167 
168     ierr = IPMScatterStep(tao,ipmP->bigstep,tao->stepdirection,ipmP->ds,ipmP->dlamdae,ipmP->dlamdai);CHKERRQ(ierr);
169     ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr);
170 #if defined DEBUG_KKT2
171     PetscPrintf(PETSC_COMM_WORLD,"rhs_lamdai\n");
172     VecView(ipmP->rhs_lamdai,0);
173     ierr = VecView(ipmP->bigrhs,0);
174     ierr = VecView(ipmP->bigstep,0);
175 #endif
176     tao->ksp_its += its;
177 
178     if (ipmP->nb > 0) {
179       /* Get max step size and apply frac-to-boundary */
180       tau = PetscMax(ipmP->taumin,1.0-ipmP->mu);
181       tau = PetscMin(tau,1.0);
182       if (tau != 1.0) {
183         ierr = VecScale(ipmP->s,tau);CHKERRQ(ierr);
184         ierr = VecScale(ipmP->lamdai,tau);CHKERRQ(ierr);
185       }
186       ierr = VecStepBoundInfo(ipmP->s,ipmP->ds,ipmP->Zero_nb,ipmP->Inf_nb,&step_s,NULL,NULL);CHKERRQ(ierr);
187       ierr = VecStepBoundInfo(ipmP->lamdai,ipmP->dlamdai,ipmP->Zero_nb,ipmP->Inf_nb,&step_l,NULL,NULL);CHKERRQ(ierr);
188       if (tau != 1.0) {
189         ierr = VecCopy(ipmP->save_s,ipmP->s);CHKERRQ(ierr);
190         ierr = VecCopy(ipmP->save_lamdai,ipmP->lamdai);CHKERRQ(ierr);
191       }
192       alpha = PetscMin(step_s,step_l);
193       alpha = PetscMin(alpha,1.0);
194     } else {
195       alpha = 1.0;
196     }
197     ipmP->alpha2 = alpha;
198     /* TODO make phi_target meaningful */
199     phi_target = ipmP->dec * ipmP->phi;
200     for (i=0; i<11;i++) {
201 #if defined DEBUG_KKT2
202     PetscPrintf(PETSC_COMM_WORLD,"alpha2=%g\n",(double)alpha);
203       PetscPrintf(PETSC_COMM_WORLD,"old point:\n");
204       VecView(tao->solution,0);
205       VecView(ipmP->lamdae,0);
206       VecView(ipmP->s,0);
207       VecView(ipmP->lamdai,0);
208 #endif
209       ierr = VecAXPY(tao->solution,alpha,tao->stepdirection);CHKERRQ(ierr);
210       if (ipmP->nb > 0) {
211         ierr = VecAXPY(ipmP->s,alpha,ipmP->ds);CHKERRQ(ierr);
212         ierr = VecAXPY(ipmP->lamdai,alpha,ipmP->dlamdai);CHKERRQ(ierr);
213       }
214       if (ipmP->me > 0) {
215         ierr = VecAXPY(ipmP->lamdae,alpha,ipmP->dlamdae);CHKERRQ(ierr);
216       }
217 #if defined DEBUG_KKT
218       PetscPrintf(PETSC_COMM_WORLD,"step direction:\n");
219       VecView(tao->stepdirection,0);
220       /* VecView(ipmP->dlamdae,0);
221       VecView(ipmP->ds,0);
222       VecView(ipmP->dlamdai,0);
223 
224       PetscPrintf(PETSC_COMM_WORLD,"New iterate:\n");
225       VecView(tao->solution,0);
226       VecView(ipmP->lamdae,0);
227       VecView(ipmP->s,0);
228       VecView(ipmP->lamdai,0); */
229 #endif
230       /* update dual variables */
231       if (ipmP->me > 0) {
232         ierr = VecCopy(ipmP->lamdae,tao->DE);CHKERRQ(ierr);
233       }
234       /* TODO: fix
235       if (ipmP->nb > 0) {
236         ierr = VecScatterBegin
237         PetscInt lstart,lend;
238 
239         ierr = VecGetOwnershipRange(ipmP->lamdai,&lstart,&lend);
240         ierr = VecGetArray(ipmP->lamdai,&li);CHKERRQ(ierr);
241         ierr = VecGetArray(tao->DI,&di);CHKERRQ(ierr);
242         for (j=lstart;j<lend;j++) {
243           if (j < ipmP->nilb) {
244             di[j] = li[j];
245           }
246         }
247         ierr = VecRestoreArray(ipmP->lamdai,&li);CHKERRQ(ierr);
248         ierr = VecRestoreArray(tao->DI,&di);CHKERRQ(ierr);
249       }
250       */
251 
252       ierr = IPMEvaluate(tao);CHKERRQ(ierr);
253       ierr = IPMComputeKKT(tao);CHKERRQ(ierr);
254       if (ipmP->phi <= phi_target) break;
255       alpha /= 2.0;
256     }
257 
258     ierr = TaoMonitor(tao,iter,ipmP->kkt_f,ipmP->phi,0.0,stepsize,&reason);CHKERRQ(ierr);
259     iter++;
260   }
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "TaoSetup_IPM"
266 static PetscErrorCode TaoSetup_IPM(Tao tao)
267 {
268   TAO_IPM        *ipmP = (TAO_IPM*)tao->data;
269   PetscErrorCode ierr;
270 
271   PetscFunctionBegin;
272   ipmP->nb = ipmP->mi = ipmP->me = 0;
273   ipmP->K=0;
274   ierr = VecGetSize(tao->solution,&ipmP->n);CHKERRQ(ierr);
275   if (!tao->gradient) {
276     ierr = VecDuplicate(tao->solution, &tao->gradient);CHKERRQ(ierr);
277     ierr = VecDuplicate(tao->solution, &tao->stepdirection);CHKERRQ(ierr);
278     ierr = VecDuplicate(tao->solution, &ipmP->rd);CHKERRQ(ierr);
279     ierr = VecDuplicate(tao->solution, &ipmP->rhs_x);CHKERRQ(ierr);
280     ierr = VecDuplicate(tao->solution, &ipmP->work);CHKERRQ(ierr);
281     ierr = VecDuplicate(tao->solution, &ipmP->save_x);CHKERRQ(ierr);
282   }
283   if (tao->constraints_equality) {
284     ierr = VecGetSize(tao->constraints_equality,&ipmP->me);CHKERRQ(ierr);
285     ierr = VecDuplicate(tao->constraints_equality,&ipmP->lamdae);CHKERRQ(ierr);
286     ierr = VecDuplicate(tao->constraints_equality,&ipmP->dlamdae);CHKERRQ(ierr);
287     ierr = VecDuplicate(tao->constraints_equality,&ipmP->rhs_lamdae);CHKERRQ(ierr);
288     ierr = VecDuplicate(tao->constraints_equality,&ipmP->save_lamdae);CHKERRQ(ierr);
289     ierr = VecDuplicate(tao->constraints_equality,&ipmP->rpe);CHKERRQ(ierr);
290     ierr = VecDuplicate(tao->constraints_equality,&tao->DE);CHKERRQ(ierr);
291   }
292   if (tao->constraints_inequality) {
293     ierr = VecDuplicate(tao->constraints_inequality,&tao->DI);CHKERRQ(ierr);
294   }
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "IPMInitializeBounds"
300 static PetscErrorCode IPMInitializeBounds(Tao tao)
301 {
302   TAO_IPM        *ipmP = (TAO_IPM*)tao->data;
303   Vec            xtmp;
304   PetscInt       xstart,xend;
305   PetscInt       ucstart,ucend; /* user ci */
306   PetscInt       ucestart,uceend; /* user ce */
307   PetscInt       sstart,send;
308   PetscInt       bigsize;
309   PetscInt       i,counter,nloc;
310   PetscInt       *cind,*xind,*ucind,*uceind,*stepind;
311   VecType        vtype;
312   const PetscInt *xli,*xui;
313   PetscInt       xl_offset,xu_offset;
314   IS             bigxl,bigxu,isuc,isc,isx,sis,is1;
315   PetscErrorCode ierr;
316   MPI_Comm       comm;
317 
318   PetscFunctionBegin;
319   cind=xind=ucind=uceind=stepind=0;
320   ipmP->mi=0;
321   ipmP->nxlb=0;
322   ipmP->nxub=0;
323   ipmP->nb=0;
324   ipmP->nslack=0;
325 
326   ierr = VecDuplicate(tao->solution,&xtmp);CHKERRQ(ierr);
327   if (!tao->XL && !tao->XU && tao->ops->computebounds) {
328     ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr);
329   }
330   if (tao->XL) {
331     ierr = VecSet(xtmp,PETSC_NINFINITY);CHKERRQ(ierr);
332     ierr = VecWhichGreaterThan(tao->XL,xtmp,&ipmP->isxl);CHKERRQ(ierr);
333     ierr = ISGetSize(ipmP->isxl,&ipmP->nxlb);CHKERRQ(ierr);
334   } else {
335     ipmP->nxlb=0;
336   }
337   if (tao->XU) {
338     ierr = VecSet(xtmp,PETSC_INFINITY);CHKERRQ(ierr);
339     ierr = VecWhichLessThan(tao->XU,xtmp,&ipmP->isxu);CHKERRQ(ierr);
340     ierr = ISGetSize(ipmP->isxu,&ipmP->nxub);CHKERRQ(ierr);
341   } else {
342     ipmP->nxub=0;
343   }
344   ierr = VecDestroy(&xtmp);CHKERRQ(ierr);
345   if (tao->constraints_inequality) {
346     ierr = VecGetSize(tao->constraints_inequality,&ipmP->mi);CHKERRQ(ierr);
347   } else {
348     ipmP->mi = 0;
349   }
350 #if defined DEBUG_K
351   PetscPrintf(PETSC_COMM_WORLD,"isxl:\n");
352   if (ipmP->nxlb) {
353     ISView(ipmP->isxl,0);
354   }
355   PetscPrintf(PETSC_COMM_WORLD,"isxu:\n");
356   if (ipmP->nxub) {
357     ISView(ipmP->isxu,0);
358   }
359 
360 #endif
361   ipmP->nb = ipmP->nxlb + ipmP->nxub + ipmP->mi;
362 
363   comm = ((PetscObject)(tao->solution))->comm;
364 
365   bigsize = ipmP->n+2*ipmP->nb+ipmP->me;
366   ierr = PetscMalloc1(bigsize,&stepind);CHKERRQ(ierr);
367   ierr = PetscMalloc1(ipmP->n,&xind);CHKERRQ(ierr);
368   ierr = PetscMalloc1(ipmP->me,&uceind);CHKERRQ(ierr);
369   ierr = VecGetOwnershipRange(tao->solution,&xstart,&xend);CHKERRQ(ierr);
370 
371   if (ipmP->nb > 0) {
372     ierr = VecCreate(comm,&ipmP->s);CHKERRQ(ierr);
373     ierr = VecSetSizes(ipmP->s,PETSC_DECIDE,ipmP->nb);CHKERRQ(ierr);
374     ierr = VecSetFromOptions(ipmP->s);CHKERRQ(ierr);
375     ierr = VecDuplicate(ipmP->s,&ipmP->ds);CHKERRQ(ierr);
376     ierr = VecDuplicate(ipmP->s,&ipmP->rhs_s);CHKERRQ(ierr);
377     ierr = VecDuplicate(ipmP->s,&ipmP->complementarity);CHKERRQ(ierr);
378     ierr = VecDuplicate(ipmP->s,&ipmP->ci);CHKERRQ(ierr);
379 
380     ierr = VecDuplicate(ipmP->s,&ipmP->lamdai);CHKERRQ(ierr);
381     ierr = VecDuplicate(ipmP->s,&ipmP->dlamdai);CHKERRQ(ierr);
382     ierr = VecDuplicate(ipmP->s,&ipmP->rhs_lamdai);CHKERRQ(ierr);
383     ierr = VecDuplicate(ipmP->s,&ipmP->save_lamdai);CHKERRQ(ierr);
384 
385     ierr = VecDuplicate(ipmP->s,&ipmP->save_s);CHKERRQ(ierr);
386     ierr = VecDuplicate(ipmP->s,&ipmP->rpi);CHKERRQ(ierr);
387     ierr = VecDuplicate(ipmP->s,&ipmP->Zero_nb);CHKERRQ(ierr);
388     ierr = VecSet(ipmP->Zero_nb,0.0);CHKERRQ(ierr);
389     ierr = VecDuplicate(ipmP->s,&ipmP->One_nb);CHKERRQ(ierr);
390     ierr = VecSet(ipmP->One_nb,1.0);CHKERRQ(ierr);
391     ierr = VecDuplicate(ipmP->s,&ipmP->Inf_nb);CHKERRQ(ierr);
392     ierr = VecSet(ipmP->Inf_nb,PETSC_INFINITY);CHKERRQ(ierr);
393 
394     ierr = PetscMalloc1(ipmP->nb,&cind);CHKERRQ(ierr);
395     ierr = PetscMalloc1(ipmP->mi,&ucind);CHKERRQ(ierr);
396     ierr = VecGetOwnershipRange(ipmP->s,&sstart,&send);CHKERRQ(ierr);
397 
398     if (ipmP->mi > 0) {
399       ierr = VecGetOwnershipRange(tao->constraints_inequality,&ucstart,&ucend);CHKERRQ(ierr);
400       counter=0;
401       for (i=ucstart;i<ucend;i++) {
402         cind[counter++] = i;
403       }
404       ierr = ISCreateGeneral(comm,counter,cind,PETSC_COPY_VALUES,&isuc);CHKERRQ(ierr);
405       ierr = ISCreateGeneral(comm,counter,cind,PETSC_COPY_VALUES,&isc);CHKERRQ(ierr);
406       ierr = VecScatterCreate(tao->constraints_inequality,isuc,ipmP->ci,isc,&ipmP->ci_scat);CHKERRQ(ierr);
407 
408       ierr = ISDestroy(&isuc);
409       ierr = ISDestroy(&isc);
410     }
411     /* need to know how may xbound indices are on each process */
412     /* TODO better way */
413     if (ipmP->nxlb) {
414       ierr = ISAllGather(ipmP->isxl,&bigxl);CHKERRQ(ierr);
415       ierr = ISGetIndices(bigxl,&xli);CHKERRQ(ierr);
416       /* find offsets for this processor */
417       xl_offset = ipmP->mi;
418       for (i=0;i<ipmP->nxlb;i++) {
419         if (xli[i] < xstart) {
420           xl_offset++;
421         } else break;
422       }
423       ierr = ISRestoreIndices(bigxl,&xli);CHKERRQ(ierr);
424 
425       ierr = ISGetIndices(ipmP->isxl,&xli);CHKERRQ(ierr);
426       ierr = ISGetLocalSize(ipmP->isxl,&nloc);CHKERRQ(ierr);
427       for (i=0;i<nloc;i++) {
428         xind[i] = xli[i];
429         cind[i] = xl_offset+i;
430       }
431 
432       ierr = ISCreateGeneral(comm,nloc,xind,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr);
433       ierr = ISCreateGeneral(comm,nloc,cind,PETSC_COPY_VALUES,&isc);CHKERRQ(ierr);
434       ierr = VecScatterCreate(tao->XL,isx,ipmP->ci,isc,&ipmP->xl_scat);CHKERRQ(ierr);
435       ierr = ISDestroy(&isx);CHKERRQ(ierr);
436       ierr = ISDestroy(&isc);CHKERRQ(ierr);
437       ierr = ISDestroy(&bigxl);CHKERRQ(ierr);
438     }
439 
440     if (ipmP->nxub) {
441       ierr = ISAllGather(ipmP->isxu,&bigxu);CHKERRQ(ierr);
442       ierr = ISGetIndices(bigxu,&xui);CHKERRQ(ierr);
443       /* find offsets for this processor */
444       xu_offset = ipmP->mi + ipmP->nxlb;
445       for (i=0;i<ipmP->nxub;i++) {
446         if (xui[i] < xstart) {
447           xu_offset++;
448         } else break;
449       }
450       ierr = ISRestoreIndices(bigxu,&xui);CHKERRQ(ierr);
451 
452       ierr = ISGetIndices(ipmP->isxu,&xui);CHKERRQ(ierr);
453       ierr = ISGetLocalSize(ipmP->isxu,&nloc);CHKERRQ(ierr);
454       for (i=0;i<nloc;i++) {
455         xind[i] = xui[i];
456         cind[i] = xu_offset+i;
457       }
458 
459       ierr = ISCreateGeneral(comm,nloc,xind,PETSC_COPY_VALUES,&isx);CHKERRQ(ierr);
460       ierr = ISCreateGeneral(comm,nloc,cind,PETSC_COPY_VALUES,&isc);CHKERRQ(ierr);
461       ierr = VecScatterCreate(tao->XU,isx,ipmP->ci,isc,&ipmP->xu_scat);CHKERRQ(ierr);
462       ierr = ISDestroy(&isx);CHKERRQ(ierr);
463       ierr = ISDestroy(&isc);CHKERRQ(ierr);
464       ierr = ISDestroy(&bigxu);CHKERRQ(ierr);
465     }
466   }
467   ierr = VecCreate(comm,&ipmP->bigrhs);CHKERRQ(ierr);
468   ierr = VecGetType(tao->solution,&vtype);CHKERRQ(ierr);
469   ierr = VecSetType(ipmP->bigrhs,vtype);CHKERRQ(ierr);
470   ierr = VecSetSizes(ipmP->bigrhs,PETSC_DECIDE,bigsize);CHKERRQ(ierr);
471   ierr = VecSetFromOptions(ipmP->bigrhs);CHKERRQ(ierr);
472   ierr = VecDuplicate(ipmP->bigrhs,&ipmP->bigstep);CHKERRQ(ierr);
473 
474   /* create scatters for step->x and x->rhs */
475   for (i=xstart;i<xend;i++) {
476     stepind[i-xstart] = i;
477     xind[i-xstart] = i;
478   }
479   ierr = ISCreateGeneral(comm,xend-xstart,stepind,PETSC_COPY_VALUES,&sis);CHKERRQ(ierr);
480   ierr = ISCreateGeneral(comm,xend-xstart,xind,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr);
481   ierr = VecScatterCreate(ipmP->bigstep,sis,tao->solution,is1,&ipmP->step1);CHKERRQ(ierr);
482   ierr = VecScatterCreate(tao->solution,is1,ipmP->bigrhs,sis,&ipmP->rhs1);CHKERRQ(ierr);
483   ierr = ISDestroy(&sis);CHKERRQ(ierr);
484   ierr = ISDestroy(&is1);CHKERRQ(ierr);
485 
486   if (ipmP->nb > 0) {
487     for (i=sstart;i<send;i++) {
488       stepind[i-sstart] = i+ipmP->n;
489       cind[i-sstart] = i;
490     }
491     ierr = ISCreateGeneral(comm,send-sstart,stepind,PETSC_COPY_VALUES,&sis);CHKERRQ(ierr);
492     ierr = ISCreateGeneral(comm,send-sstart,cind,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr);
493     ierr = VecScatterCreate(ipmP->bigstep,sis,ipmP->s,is1,&ipmP->step2);CHKERRQ(ierr);
494     ierr = ISDestroy(&sis);CHKERRQ(ierr);
495 
496     for (i=sstart;i<send;i++) {
497       stepind[i-sstart] = i+ipmP->n+ipmP->me;
498       cind[i-sstart] = i;
499     }
500     ierr = ISCreateGeneral(comm,send-sstart,stepind,PETSC_COPY_VALUES,&sis);CHKERRQ(ierr);
501     ierr = VecScatterCreate(ipmP->s,is1,ipmP->bigrhs,sis,&ipmP->rhs3);CHKERRQ(ierr);
502     ierr = ISDestroy(&sis);CHKERRQ(ierr);
503     ierr = ISDestroy(&is1);CHKERRQ(ierr);
504   }
505 
506   if (ipmP->me > 0) {
507     ierr = VecGetOwnershipRange(tao->constraints_equality,&ucestart,&uceend);CHKERRQ(ierr);
508     for (i=ucestart;i<uceend;i++) {
509       stepind[i-ucestart] = i + ipmP->n+ipmP->nb;
510       uceind[i-ucestart] = i;
511     }
512 
513     ierr = ISCreateGeneral(comm,uceend-ucestart,stepind,PETSC_COPY_VALUES,&sis);CHKERRQ(ierr);
514     ierr = ISCreateGeneral(comm,uceend-ucestart,uceind,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr);
515     ierr = VecScatterCreate(ipmP->bigstep,sis,tao->constraints_equality,is1,&ipmP->step3);CHKERRQ(ierr);
516     ierr = ISDestroy(&sis);CHKERRQ(ierr);
517 
518     for (i=ucestart;i<uceend;i++) {
519       stepind[i-ucestart] = i + ipmP->n;
520     }
521 
522     ierr = ISCreateGeneral(comm,uceend-ucestart,stepind,PETSC_COPY_VALUES,&sis);CHKERRQ(ierr);
523     ierr = VecScatterCreate(tao->constraints_equality,is1,ipmP->bigrhs,sis,&ipmP->rhs2);CHKERRQ(ierr);
524     ierr = ISDestroy(&sis);CHKERRQ(ierr);
525     ierr = ISDestroy(&is1);CHKERRQ(ierr);
526   }
527 
528   if (ipmP->nb > 0) {
529     for (i=sstart;i<send;i++) {
530       stepind[i-sstart] = i + ipmP->n + ipmP->nb + ipmP->me;
531       cind[i-sstart] = i;
532     }
533     ierr = ISCreateGeneral(comm,send-sstart,cind,PETSC_COPY_VALUES,&is1);
534     ierr = ISCreateGeneral(comm,send-sstart,stepind,PETSC_COPY_VALUES,&sis);CHKERRQ(ierr);
535     ierr = VecScatterCreate(ipmP->bigstep,sis,ipmP->s,is1,&ipmP->step4);CHKERRQ(ierr);
536     ierr = VecScatterCreate(ipmP->s,is1,ipmP->bigrhs,sis,&ipmP->rhs4);CHKERRQ(ierr);
537     ierr = ISDestroy(&sis);CHKERRQ(ierr);
538     ierr = ISDestroy(&is1);CHKERRQ(ierr);
539   }
540 
541   ierr = PetscFree(stepind);CHKERRQ(ierr);
542   ierr = PetscFree(cind);CHKERRQ(ierr);
543   ierr = PetscFree(ucind);CHKERRQ(ierr);
544   ierr = PetscFree(uceind);CHKERRQ(ierr);
545   ierr = PetscFree(xind);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "TaoDestroy_IPM"
551 static PetscErrorCode TaoDestroy_IPM(Tao tao)
552 {
553   TAO_IPM        *ipmP = (TAO_IPM*)tao->data;
554   PetscErrorCode ierr;
555 
556   PetscFunctionBegin;
557   ierr = VecDestroy(&ipmP->rd);CHKERRQ(ierr);
558   ierr = VecDestroy(&ipmP->rpe);CHKERRQ(ierr);
559   ierr = VecDestroy(&ipmP->rpi);CHKERRQ(ierr);
560   ierr = VecDestroy(&ipmP->work);CHKERRQ(ierr);
561   ierr = VecDestroy(&ipmP->lamdae);CHKERRQ(ierr);
562   ierr = VecDestroy(&ipmP->lamdai);CHKERRQ(ierr);
563   ierr = VecDestroy(&ipmP->s);CHKERRQ(ierr);
564   ierr = VecDestroy(&ipmP->ds);CHKERRQ(ierr);
565   ierr = VecDestroy(&ipmP->ci);CHKERRQ(ierr);
566 
567   ierr = VecDestroy(&ipmP->rhs_x);CHKERRQ(ierr);
568   ierr = VecDestroy(&ipmP->rhs_lamdae);CHKERRQ(ierr);
569   ierr = VecDestroy(&ipmP->rhs_lamdai);CHKERRQ(ierr);
570   ierr = VecDestroy(&ipmP->rhs_s);CHKERRQ(ierr);
571 
572   ierr = VecDestroy(&ipmP->save_x);CHKERRQ(ierr);
573   ierr = VecDestroy(&ipmP->save_lamdae);CHKERRQ(ierr);
574   ierr = VecDestroy(&ipmP->save_lamdai);CHKERRQ(ierr);
575   ierr = VecDestroy(&ipmP->save_s);CHKERRQ(ierr);
576 
577   ierr = VecScatterDestroy(&ipmP->step1);CHKERRQ(ierr);
578   ierr = VecScatterDestroy(&ipmP->step2);CHKERRQ(ierr);
579   ierr = VecScatterDestroy(&ipmP->step3);CHKERRQ(ierr);
580   ierr = VecScatterDestroy(&ipmP->step4);CHKERRQ(ierr);
581 
582   ierr = VecScatterDestroy(&ipmP->rhs1);CHKERRQ(ierr);
583   ierr = VecScatterDestroy(&ipmP->rhs2);CHKERRQ(ierr);
584   ierr = VecScatterDestroy(&ipmP->rhs3);CHKERRQ(ierr);
585   ierr = VecScatterDestroy(&ipmP->rhs4);CHKERRQ(ierr);
586 
587   ierr = VecScatterDestroy(&ipmP->ci_scat);CHKERRQ(ierr);
588   ierr = VecScatterDestroy(&ipmP->xl_scat);CHKERRQ(ierr);
589   ierr = VecScatterDestroy(&ipmP->xu_scat);CHKERRQ(ierr);
590 
591   ierr = VecDestroy(&ipmP->dlamdai);CHKERRQ(ierr);
592   ierr = VecDestroy(&ipmP->dlamdae);CHKERRQ(ierr);
593   ierr = VecDestroy(&ipmP->Zero_nb);CHKERRQ(ierr);
594   ierr = VecDestroy(&ipmP->One_nb);CHKERRQ(ierr);
595   ierr = VecDestroy(&ipmP->Inf_nb);CHKERRQ(ierr);
596   ierr = VecDestroy(&ipmP->complementarity);CHKERRQ(ierr);
597 
598   ierr = VecDestroy(&ipmP->bigrhs);CHKERRQ(ierr);
599   ierr = VecDestroy(&ipmP->bigstep);CHKERRQ(ierr);
600   ierr = MatDestroy(&ipmP->Ai);CHKERRQ(ierr);
601   ierr = MatDestroy(&ipmP->K);CHKERRQ(ierr);
602   ierr = ISDestroy(&ipmP->isxu);CHKERRQ(ierr);
603   ierr = ISDestroy(&ipmP->isxl);CHKERRQ(ierr);
604   ierr = PetscFree(tao->data);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "TaoSetFromOptions_IPM"
610 static PetscErrorCode TaoSetFromOptions_IPM(Tao tao)
611 {
612   TAO_IPM        *ipmP = (TAO_IPM*)tao->data;
613   PetscErrorCode ierr;
614   PetscBool      flg;
615 
616   PetscFunctionBegin;
617   ierr = PetscOptionsHead("IPM method for constrained optimization");CHKERRQ(ierr);
618   ierr = PetscOptionsBool("-ipm_monitorkkt","monitor kkt status",NULL,ipmP->monitorkkt,&ipmP->monitorkkt,&flg);CHKERRQ(ierr);
619   ierr = PetscOptionsReal("-ipm_pushs","parameter to push initial slack variables away from bounds",NULL,ipmP->pushs,&ipmP->pushs,&flg);
620   ierr = PetscOptionsReal("-ipm_pushnu","parameter to push initial (inequality) dual variables away from bounds",NULL,ipmP->pushnu,&ipmP->pushnu,&flg);
621   ierr = PetscOptionsTail();CHKERRQ(ierr);
622   ierr =KSPSetFromOptions(tao->ksp);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "TaoView_IPM"
628 static PetscErrorCode TaoView_IPM(Tao tao, PetscViewer viewer)
629 {
630   return 0;
631 }
632 
633 /* IPMObjectiveAndGradient()
634    f = d'x + 0.5 * x' * H * x
635    rd = H*x + d + Ae'*lame - Ai'*lami
636    rpe = Ae*x - be
637    rpi = Ai*x - yi - bi
638    mu = yi' * lami/mi;
639    com = yi.*lami
640 
641    phi = ||rd|| + ||rpe|| + ||rpi|| + ||com||
642 */
643 /*
644 #undef __FUNCT__
645 #define __FUNCT__ "IPMObjective"
646 static PetscErrorCode IPMObjective(TaoLineSearch ls, Vec X, PetscReal *f, void *tptr)
647 {
648   Tao tao = (Tao)tptr;
649   TAO_IPM *ipmP = (TAO_IPM*)tao->data;
650   PetscErrorCode ierr;
651   PetscFunctionBegin;
652   ierr = IPMComputeKKT(tao);CHKERRQ(ierr);
653   *f = ipmP->phi;
654   PetscFunctionReturn(0);
655 }
656 */
657 
658 /*
659    f = d'x + 0.5 * x' * H * x
660    rd = H*x + d + Ae'*lame - Ai'*lami
661        Ai =   jac_ineq
662                I (w/lb)
663               -I (w/ub)
664 
665    rpe = ce
666    rpi = ci - s;
667    com = s.*lami
668    mu = yi' * lami/mi;
669 
670    phi = ||rd|| + ||rpe|| + ||rpi|| + ||com||
671 */
672 #undef __FUNCT__
673 #define __FUNCT__ "IPMComputeKKT"
674 static PetscErrorCode IPMComputeKKT(Tao tao)
675 {
676   TAO_IPM        *ipmP = (TAO_IPM *)tao->data;
677   PetscScalar    norm;
678   PetscErrorCode ierr;
679 
680   PetscFunctionBegin;
681   ierr = VecCopy(tao->gradient,ipmP->rd);CHKERRQ(ierr);
682 
683   if (ipmP->me > 0) {
684     /* rd = gradient + Ae'*lamdae */
685     ierr = MatMultTranspose(tao->jacobian_equality,ipmP->lamdae,ipmP->work);CHKERRQ(ierr);
686     ierr = VecAXPY(ipmP->rd, 1.0, ipmP->work);CHKERRQ(ierr);
687 
688 #if defined DEBUG_KKT
689     PetscPrintf(PETSC_COMM_WORLD,"\nAe.lamdae\n");
690     ierr = VecView(ipmP->work,0);
691 #endif
692     /* rpe = ce(x) */
693     ierr = VecCopy(tao->constraints_equality,ipmP->rpe);CHKERRQ(ierr);
694   }
695   if (ipmP->nb > 0) {
696     /* rd = rd - Ai'*lamdai */
697     ierr = MatMultTranspose(ipmP->Ai,ipmP->lamdai,ipmP->work);CHKERRQ(ierr);
698     ierr = VecAXPY(ipmP->rd, -1.0, ipmP->work);CHKERRQ(ierr);
699 #if defined DEBUG_KKT
700     PetscPrintf(PETSC_COMM_WORLD,"\nAi\n");
701     ierr = MatView(ipmP->Ai,0);
702     PetscPrintf(PETSC_COMM_WORLD,"\nAi.lamdai\n");
703     ierr = VecView(ipmP->work,0);
704 #endif
705     /* rpi = cin - s */
706     ierr = VecCopy(ipmP->ci,ipmP->rpi);CHKERRQ(ierr);
707     ierr = VecAXPY(ipmP->rpi, -1.0, ipmP->s);CHKERRQ(ierr);
708 
709     /* com = s .* lami */
710     ierr = VecPointwiseMult(ipmP->complementarity, ipmP->s,ipmP->lamdai);CHKERRQ(ierr);
711   }
712   /* phi = ||rd; rpe; rpi; com|| */
713   ierr = VecDot(ipmP->rd,ipmP->rd,&norm);CHKERRQ(ierr);
714   ipmP->phi = norm;
715   if (ipmP->me > 0 ) {
716     ierr = VecDot(ipmP->rpe,ipmP->rpe,&norm);CHKERRQ(ierr);
717     ipmP->phi += norm;
718   }
719   if (ipmP->nb > 0) {
720     ierr = VecDot(ipmP->rpi,ipmP->rpi,&norm);CHKERRQ(ierr);
721     ipmP->phi += norm;
722     ierr = VecDot(ipmP->complementarity,ipmP->complementarity,&norm);CHKERRQ(ierr);
723     ipmP->phi += norm;
724     /* mu = s'*lami/nb */
725     ierr = VecDot(ipmP->s,ipmP->lamdai,&ipmP->mu);CHKERRQ(ierr);
726     ipmP->mu /= ipmP->nb;
727   } else {
728     ipmP->mu = 1.0;
729   }
730 
731   ipmP->phi = PetscSqrtScalar(ipmP->phi);
732 #if defined DEBUG_KKT
733   if (ipmP->monitorkkt) {
734     ierr = PetscPrintf(PETSC_COMM_WORLD,"obj=%g,\tphi = %g,\tmu=%g\talpha1=%g\talpha2=%g\n",(double)ipmP->kkt_f,(double)ipmP->phi,(double)ipmP->mu,(double)ipmP->alpha1,(double)ipmP->alpha2);
735   }
736   PetscPrintf(PETSC_COMM_WORLD,"\ngradient\n");
737   ierr = VecView(tao->gradient,0);
738   PetscPrintf(PETSC_COMM_WORLD,"\nrd\n");
739   ierr = VecView(ipmP->rd,0);
740   PetscPrintf(PETSC_COMM_WORLD,"\nrpe\n");
741   ierr = VecView(ipmP->rpe,0);
742   PetscPrintf(PETSC_COMM_WORLD,"\nrpi\n");
743   ierr = VecView(ipmP->rpi,0);
744   PetscPrintf(PETSC_COMM_WORLD,"\ncomplementarity\n");
745   ierr = VecView(ipmP->complementarity,0);
746 #endif
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "IPMEvaluate"
752 /* evaluate user info at current point */
753 PetscErrorCode IPMEvaluate(Tao tao)
754 {
755   TAO_IPM        *ipmP = (TAO_IPM *)tao->data;
756   PetscErrorCode ierr;
757 
758   PetscFunctionBegin;
759   ierr = TaoComputeObjectiveAndGradient(tao,tao->solution,&ipmP->kkt_f,tao->gradient);CHKERRQ(ierr);
760   ierr = TaoComputeHessian(tao,tao->solution,&tao->hessian,&tao->hessian_pre,&ipmP->Hflag);CHKERRQ(ierr);
761   if (ipmP->me > 0) {
762     ierr = TaoComputeEqualityConstraints(tao,tao->solution,tao->constraints_equality);
763     ierr = TaoComputeJacobianEquality(tao,tao->solution,&tao->jacobian_equality,&tao->jacobian_equality_pre,&ipmP->Aiflag);CHKERRQ(ierr);
764   }
765   if (ipmP->mi > 0) {
766     ierr = TaoComputeInequalityConstraints(tao,tao->solution,tao->constraints_inequality);
767     ierr = TaoComputeJacobianInequality(tao,tao->solution,&tao->jacobian_inequality,&tao->jacobian_inequality_pre,&ipmP->Aeflag);CHKERRQ(ierr);
768   }
769   if (ipmP->nb > 0) {
770     /* Ai' =   jac_ineq | I (w/lb) | -I (w/ub)  */
771     ierr = IPMUpdateAi(tao);CHKERRQ(ierr);
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 #undef __FUNCT__
777 #define __FUNCT__ "IPMPushInitialPoint"
778 /* Push initial point away from bounds */
779 PetscErrorCode IPMPushInitialPoint(Tao tao)
780 {
781   TAO_IPM        *ipmP = (TAO_IPM *)tao->data;
782   PetscErrorCode ierr;
783 
784   PetscFunctionBegin;
785   ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr);
786   if (tao->XL && tao->XU) {
787     ierr = VecMedian(tao->XL, tao->solution, tao->XU, tao->solution);CHKERRQ(ierr);
788   }
789   if (ipmP->nb > 0) {
790     ierr = VecSet(ipmP->s,ipmP->pushs);CHKERRQ(ierr);
791     ierr = VecSet(ipmP->lamdai,ipmP->pushnu);CHKERRQ(ierr);
792     if (ipmP->mi > 0) {
793       ierr = VecSet(tao->DI,ipmP->pushnu);CHKERRQ(ierr);
794     }
795   }
796   if (ipmP->me > 0) {
797     ierr = VecSet(tao->DE,1.0);CHKERRQ(ierr);
798     ierr = VecSet(ipmP->lamdae,1.0);CHKERRQ(ierr);
799   }
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "IPMUpdateAi"
805 PetscErrorCode IPMUpdateAi(Tao tao)
806 {
807   /* Ai =     Ji
808               I (w/lb)
809              -I (w/ub) */
810 
811   /* Ci =    user->ci
812              Xi - lb (w/lb)
813              -Xi + ub (w/ub)  */
814 
815   TAO_IPM           *ipmP = (TAO_IPM *)tao->data;
816   MPI_Comm          comm;
817   PetscInt          i;
818   PetscScalar       newval;
819   PetscInt          newrow,newcol,ncols;
820   const PetscScalar *vals;
821   const PetscInt    *cols;
822   PetscInt          astart,aend,jstart,jend;
823   PetscInt          *nonzeros;
824   PetscInt          r2,r3,r4;
825   PetscMPIInt       size;
826   PetscErrorCode    ierr;
827 
828   PetscFunctionBegin;
829   r2 = ipmP->mi;
830   r3 = r2 + ipmP->nxlb;
831   r4 = r3 + ipmP->nxub;
832 
833   if (!ipmP->nb) PetscFunctionReturn(0);
834 
835   /* Create Ai matrix if it doesn't exist yet */
836   if (!ipmP->Ai) {
837     comm = ((PetscObject)(tao->solution))->comm;
838     ierr = PetscMalloc1(ipmP->nb,&nonzeros);CHKERRQ(ierr);
839     ierr = MPI_Comm_size(comm,&size);
840     if (size == 1) {
841       for (i=0;i<ipmP->mi;i++) {
842         ierr = MatGetRow(tao->jacobian_inequality,i,&ncols,NULL,NULL);CHKERRQ(ierr);
843         nonzeros[i] = ncols;
844         ierr = MatRestoreRow(tao->jacobian_inequality,i,&ncols,NULL,NULL);CHKERRQ(ierr);
845       }
846       for (i=r2;i<r4;i++) {
847         nonzeros[i] = 1;
848       }
849     }
850     ierr = MatCreate(comm,&ipmP->Ai);CHKERRQ(ierr);
851     ierr = MatSetType(ipmP->Ai,MATAIJ);CHKERRQ(ierr);
852     ierr = MatSetSizes(ipmP->Ai,PETSC_DECIDE,PETSC_DECIDE,ipmP->nb,ipmP->n);CHKERRQ(ierr);
853     ierr = MatSetFromOptions(ipmP->Ai);CHKERRQ(ierr);
854     ierr = MatMPIAIJSetPreallocation(ipmP->Ai,ipmP->nb,NULL,ipmP->nb,NULL);
855     ierr = MatSeqAIJSetPreallocation(ipmP->Ai,PETSC_DEFAULT,nonzeros);CHKERRQ(ierr);
856     if (size ==1) {
857       ierr = PetscFree(nonzeros);CHKERRQ(ierr);
858     }
859   }
860 
861   /* Copy values from user jacobian to Ai */
862   ierr = MatGetOwnershipRange(ipmP->Ai,&astart,&aend);CHKERRQ(ierr);
863 
864   /* Ai w/lb */
865   if (ipmP->mi) {
866     ierr = MatZeroEntries(ipmP->Ai);CHKERRQ(ierr);
867     ierr = MatGetOwnershipRange(tao->jacobian_inequality,&jstart,&jend);CHKERRQ(ierr);
868     for (i=jstart;i<jend;i++) {
869       ierr = MatGetRow(tao->jacobian_inequality,i,&ncols,&cols,&vals);CHKERRQ(ierr);
870       newrow = i;
871       ierr = MatSetValues(ipmP->Ai,1,&newrow,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
872       ierr = MatRestoreRow(tao->jacobian_inequality,i,&ncols,&cols,&vals);CHKERRQ(ierr);
873     }
874   }
875 
876   /* I w/ xlb */
877   if (ipmP->nxlb) {
878     for (i=0;i<ipmP->nxlb;i++) {
879       if (i>=astart && i<aend) {
880         newrow = i+r2;
881         newcol = i;
882         newval = 1.0;
883         ierr = MatSetValues(ipmP->Ai,1,&newrow,1,&newcol,&newval,INSERT_VALUES);CHKERRQ(ierr);
884       }
885     }
886   }
887   if (ipmP->nxub) {
888     /* I w/ xub */
889     for (i=0;i<ipmP->nxub;i++) {
890       if (i>=astart && i<aend) {
891       newrow = i+r3;
892       newcol = i;
893       newval = -1.0;
894       ierr = MatSetValues(ipmP->Ai,1,&newrow,1,&newcol,&newval,INSERT_VALUES);CHKERRQ(ierr);
895       }
896     }
897   }
898 
899   ierr = MatAssemblyBegin(ipmP->Ai,MAT_FINAL_ASSEMBLY);
900   ierr = MatAssemblyEnd(ipmP->Ai,MAT_FINAL_ASSEMBLY);
901   CHKMEMQ;
902 
903   ierr = VecSet(ipmP->ci,0.0);CHKERRQ(ierr);
904 
905   /* user ci */
906   if (ipmP->mi > 0) {
907     ierr = VecScatterBegin(ipmP->ci_scat,tao->constraints_inequality,ipmP->ci,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908     ierr = VecScatterEnd(ipmP->ci_scat,tao->constraints_inequality,ipmP->ci,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909   }
910   if (!ipmP->work){
911     VecDuplicate(tao->solution,&ipmP->work);
912   }
913   ierr = VecCopy(tao->solution,ipmP->work);CHKERRQ(ierr);
914   if (tao->XL) {
915     ierr = VecAXPY(ipmP->work,-1.0,tao->XL);CHKERRQ(ierr);
916 
917     /* lower bounds on variables */
918     if (ipmP->nxlb > 0) {
919       ierr = VecScatterBegin(ipmP->xl_scat,ipmP->work,ipmP->ci,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920       ierr = VecScatterEnd(ipmP->xl_scat,ipmP->work,ipmP->ci,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921     }
922   }
923   if (tao->XU) {
924     /* upper bounds on variables */
925     ierr = VecCopy(tao->solution,ipmP->work);CHKERRQ(ierr);
926     ierr = VecScale(ipmP->work,-1.0);CHKERRQ(ierr);
927     ierr = VecAXPY(ipmP->work,1.0,tao->XU);CHKERRQ(ierr);
928     if (ipmP->nxub > 0) {
929       ierr = VecScatterBegin(ipmP->xu_scat,ipmP->work,ipmP->ci,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
930       ierr = VecScatterEnd(ipmP->xu_scat,ipmP->work,ipmP->ci,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931     }
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "IPMUpdateK"
938 /* create K = [ Hlag , 0 , Ae', -Ai'];
939               [Ae , 0,   0  , 0];
940               [Ai ,-I,   0 ,  0];
941               [ 0 , S ,  0,   Y ];  */
942 PetscErrorCode IPMUpdateK(Tao tao)
943 {
944   TAO_IPM         *ipmP = (TAO_IPM *)tao->data;
945   MPI_Comm        comm;
946   PetscMPIInt     size;
947   PetscErrorCode  ierr;
948   PetscInt        i,j,row;
949   PetscInt        ncols,newcol,newcols[2],newrow;
950   const PetscInt  *cols;
951   const PetscReal *vals;
952   PetscReal       *l,*y;
953   PetscReal       *newvals;
954   PetscReal       newval;
955   PetscInt        subsize;
956   const PetscInt  *indices;
957   PetscInt        *nonzeros,*d_nonzeros,*o_nonzeros;
958   PetscInt        bigsize;
959   PetscInt        r1,r2,r3;
960   PetscInt        c1,c2,c3;
961   PetscInt        klocalsize;
962   PetscInt        hstart,hend,kstart,kend;
963   PetscInt        aistart,aiend,aestart,aeend;
964   PetscInt        sstart,send;
965 
966   PetscFunctionBegin;
967   comm = ((PetscObject)(tao->solution))->comm;
968   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
969   ierr = IPMUpdateAi(tao);CHKERRQ(ierr);
970 #if defined DEBUG_K
971   PetscPrintf(PETSC_COMM_WORLD,"H\n");  MatView(tao->hessian,0);
972   if (ipmP->nb) {
973     PetscPrintf(PETSC_COMM_WORLD,"Ai\n"); MatView(ipmP->Ai,0);
974   }
975   if (ipmP->me) {
976     PetscPrintf(PETSC_COMM_WORLD,"Ae\n"); MatView(tao->jacobian_equality,0);
977   }
978 #endif
979   /* allocate workspace */
980   subsize = PetscMax(ipmP->n,ipmP->nb);
981   subsize = PetscMax(ipmP->me,subsize);
982   subsize = PetscMax(2,subsize);
983   ierr = PetscMalloc1(subsize,&indices);CHKERRQ(ierr);
984   ierr = PetscMalloc1(subsize,&newvals);CHKERRQ(ierr);
985 
986   r1 = c1 = ipmP->n;
987   r2 = r1 + ipmP->me;  c2 = c1 + ipmP->nb;
988   r3 = c3 = r2 + ipmP->nb;
989 
990   bigsize = ipmP->n+2*ipmP->nb+ipmP->me;
991   ierr = VecGetOwnershipRange(ipmP->bigrhs,&kstart,&kend);CHKERRQ(ierr);
992   ierr = MatGetOwnershipRange(tao->hessian,&hstart,&hend);CHKERRQ(ierr);
993   klocalsize = kend-kstart;
994   if (!ipmP->K) {
995     if (size == 1) {
996       ierr = PetscMalloc1((kend-kstart),&nonzeros);CHKERRQ(ierr);
997       for (i=0;i<bigsize;i++) {
998         if (i<r1) {
999           ierr = MatGetRow(tao->hessian,i,&ncols,NULL,NULL);CHKERRQ(ierr);
1000           nonzeros[i] = ncols;
1001           ierr = MatRestoreRow(tao->hessian,i,&ncols,NULL,NULL);CHKERRQ(ierr);
1002           nonzeros[i] += ipmP->me+ipmP->nb;
1003         } else if (i<r2) {
1004           nonzeros[i-kstart] = ipmP->n;
1005         } else if (i<r3) {
1006           nonzeros[i-kstart] = ipmP->n+1;
1007         } else if (i<bigsize) {
1008           nonzeros[i-kstart] = 2;
1009         }
1010       }
1011       ierr = MatCreate(comm,&ipmP->K);CHKERRQ(ierr);
1012       ierr = MatSetType(ipmP->K,MATSEQAIJ);CHKERRQ(ierr);
1013       ierr = MatSetSizes(ipmP->K,klocalsize,klocalsize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1014       ierr = MatSeqAIJSetPreallocation(ipmP->K,0,nonzeros);CHKERRQ(ierr);
1015       ierr = MatSetFromOptions(ipmP->K);CHKERRQ(ierr);
1016       ierr = PetscFree(nonzeros);CHKERRQ(ierr);
1017     } else {
1018       ierr = PetscMalloc1((kend-kstart),&d_nonzeros);CHKERRQ(ierr);
1019       ierr = PetscMalloc1((kend-kstart),&o_nonzeros);CHKERRQ(ierr);
1020       for (i=kstart;i<kend;i++) {
1021         if (i<r1) {
1022           /* TODO fix preallocation for mpi mats */
1023           d_nonzeros[i-kstart] = PetscMin(ipmP->n+ipmP->me+ipmP->nb,kend-kstart);
1024           o_nonzeros[i-kstart] = PetscMin(ipmP->n+ipmP->me+ipmP->nb,bigsize-(kend-kstart));
1025         } else if (i<r2) {
1026           d_nonzeros[i-kstart] = PetscMin(ipmP->n,kend-kstart);
1027           o_nonzeros[i-kstart] = PetscMin(ipmP->n,bigsize-(kend-kstart));
1028         } else if (i<r3) {
1029           d_nonzeros[i-kstart] = PetscMin(ipmP->n+2,kend-kstart);
1030           o_nonzeros[i-kstart] = PetscMin(ipmP->n+2,bigsize-(kend-kstart));
1031         } else {
1032           d_nonzeros[i-kstart] = PetscMin(2,kend-kstart);
1033           o_nonzeros[i-kstart] = PetscMin(2,bigsize-(kend-kstart));
1034         }
1035       }
1036       ierr = MatCreate(comm,&ipmP->K);CHKERRQ(ierr);
1037       ierr = MatSetType(ipmP->K,MATMPIAIJ);CHKERRQ(ierr);
1038       ierr = MatSetSizes(ipmP->K,klocalsize,klocalsize,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1039       ierr = MatMPIAIJSetPreallocation(ipmP->K,0,d_nonzeros,0,o_nonzeros);CHKERRQ(ierr);
1040       ierr = PetscFree(d_nonzeros);CHKERRQ(ierr);
1041       ierr = PetscFree(o_nonzeros);CHKERRQ(ierr);
1042       ierr = MatSetFromOptions(ipmP->K);CHKERRQ(ierr);
1043     }
1044   }
1045 
1046   ierr = MatZeroEntries(ipmP->K);CHKERRQ(ierr);
1047   /* Copy H */
1048   for (i=hstart;i<hend;i++) {
1049     ierr = MatGetRow(tao->hessian,i,&ncols,&cols,&vals);CHKERRQ(ierr);
1050     if (ncols > 0) {
1051       ierr = MatSetValues(ipmP->K,1,&i,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1052     }
1053     ierr = MatRestoreRow(tao->hessian,i,&ncols,&cols,&vals);CHKERRQ(ierr);
1054   }
1055 
1056   /* Copy Ae and Ae' */
1057   if (ipmP->me > 0) {
1058     ierr = MatGetOwnershipRange(tao->jacobian_equality,&aestart,&aeend);CHKERRQ(ierr);
1059     for (i=aestart;i<aeend;i++) {
1060       ierr = MatGetRow(tao->jacobian_equality,i,&ncols,&cols,&vals);CHKERRQ(ierr);
1061       if (ncols > 0) {
1062         /*Ae*/
1063         row = i+r1;
1064         ierr = MatSetValues(ipmP->K,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1065         /*Ae'*/
1066         for (j=0;j<ncols;j++) {
1067           newcol = i + c2;
1068           newrow = cols[j];
1069           newval = vals[j];
1070           ierr = MatSetValues(ipmP->K,1,&newrow,1,&newcol,&newval,INSERT_VALUES);CHKERRQ(ierr);
1071         }
1072       }
1073       ierr = MatRestoreRow(tao->jacobian_equality,i,&ncols,&cols,&vals);CHKERRQ(ierr);
1074     }
1075   }
1076 
1077   if (ipmP->nb > 0) {
1078     ierr = MatGetOwnershipRange(ipmP->Ai,&aistart,&aiend);CHKERRQ(ierr);
1079     /* Copy Ai,and Ai' */
1080     for (i=aistart;i<aiend;i++) {
1081       row = i+r2;
1082       ierr = MatGetRow(ipmP->Ai,i,&ncols,&cols,&vals);CHKERRQ(ierr);
1083       if (ncols > 0) {
1084         /*Ai*/
1085         ierr = MatSetValues(ipmP->K,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1086         /*-Ai'*/
1087         for (j=0;j<ncols;j++) {
1088           newcol = i + c3;
1089           newrow = cols[j];
1090           newval = -vals[j];
1091           ierr = MatSetValues(ipmP->K,1,&newrow,1,&newcol,&newval,INSERT_VALUES);CHKERRQ(ierr);
1092         }
1093       }
1094       ierr = MatRestoreRow(ipmP->Ai,i,&ncols,&cols,&vals);CHKERRQ(ierr);
1095     }
1096 
1097     /* -I */
1098     for (i=kstart;i<kend;i++) {
1099       if (i>=r2 && i<r3) {
1100         newrow = i;
1101         newcol = i-r2+c1;
1102         newval = -1.0;
1103         MatSetValues(ipmP->K,1,&newrow,1,&newcol,&newval,INSERT_VALUES);CHKERRQ(ierr);
1104       }
1105     }
1106 
1107     /* Copy L,Y */
1108     ierr = VecGetOwnershipRange(ipmP->s,&sstart,&send);CHKERRQ(ierr);
1109     ierr = VecGetArray(ipmP->lamdai,&l);CHKERRQ(ierr);
1110     ierr = VecGetArray(ipmP->s,&y);CHKERRQ(ierr);
1111 
1112     for (i=sstart;i<send;i++) {
1113       newcols[0] = c1+i;
1114       newcols[1] = c3+i;
1115       newvals[0] = l[i-sstart];
1116       newvals[1] = y[i-sstart];
1117       newrow = r3+i;
1118       ierr = MatSetValues(ipmP->K,1,&newrow,2,newcols,newvals,INSERT_VALUES);CHKERRQ(ierr);
1119     }
1120 
1121     ierr = VecRestoreArray(ipmP->lamdai,&l);CHKERRQ(ierr);
1122     ierr = VecRestoreArray(ipmP->s,&y);CHKERRQ(ierr);
1123   }
1124 
1125   ierr = PetscFree(indices);CHKERRQ(ierr);
1126   ierr = PetscFree(newvals);CHKERRQ(ierr);
1127   ierr = MatAssemblyBegin(ipmP->K,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1128   ierr = MatAssemblyEnd(ipmP->K,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1129 #if defined DEBUG_K
1130   PetscPrintf(PETSC_COMM_WORLD,"K\n");  MatView(ipmP->K,0);
1131 #endif
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "IPMGatherRHS"
1137 PetscErrorCode IPMGatherRHS(Tao tao,Vec RHS,Vec X1,Vec X2,Vec X3,Vec X4)
1138 {
1139   TAO_IPM        *ipmP = (TAO_IPM *)tao->data;
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   /* rhs = [x1      (n)
1144             x2     (me)
1145             x3     (nb)
1146             x4     (nb)] */
1147   if (X1) {
1148     ierr = VecScatterBegin(ipmP->rhs1,X1,RHS,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149     ierr = VecScatterEnd(ipmP->rhs1,X1,RHS,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1150   }
1151   if (ipmP->me > 0 && X2) {
1152     ierr = VecScatterBegin(ipmP->rhs2,X2,RHS,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1153     ierr = VecScatterEnd(ipmP->rhs2,X2,RHS,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1154   }
1155   if (ipmP->nb > 0) {
1156     if (X3) {
1157       ierr = VecScatterBegin(ipmP->rhs3,X3,RHS,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1158       ierr = VecScatterEnd(ipmP->rhs3,X3,RHS,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159     }
1160     if (X4) {
1161       ierr = VecScatterBegin(ipmP->rhs4,X4,RHS,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1162       ierr = VecScatterEnd(ipmP->rhs4,X4,RHS,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1163     }
1164   }
1165 #if defined(DEBUG_SCATTER)
1166   PetscPrintf(PETSC_COMM_WORLD,"X1-X4\n");
1167   if (X1) {VecView(X1,0);}
1168   if (X2) {VecView(X2,0);}
1169   if (X3) {VecView(X3,0);}
1170   if (X4) {VecView(X4,0);}
1171   PetscPrintf(PETSC_COMM_WORLD,"RHS\n");
1172   VecView(RHS,0);
1173 #endif
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "IPMScatterStep"
1179 PetscErrorCode IPMScatterStep(Tao tao, Vec STEP, Vec X1, Vec X2, Vec X3, Vec X4)
1180 {
1181   TAO_IPM        *ipmP = (TAO_IPM *)tao->data;
1182   PetscErrorCode ierr;
1183 
1184   PetscFunctionBegin;
1185   CHKMEMQ;
1186   /*        [x1    (n)
1187              x2    (nb) may be 0
1188              x3    (me) may be 0
1189              x4    (nb) may be 0 */
1190   if (X1) {
1191     ierr = VecScatterBegin(ipmP->step1,STEP,X1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1192     ierr = VecScatterEnd(ipmP->step1,STEP,X1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1193   }
1194   if (X2 && ipmP->nb > 0) {
1195     ierr = VecScatterBegin(ipmP->step2,STEP,X2,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1196     ierr = VecScatterEnd(ipmP->step2,STEP,X2,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1197   }
1198   if (X3 && ipmP->me > 0) {
1199     ierr = VecScatterBegin(ipmP->step3,STEP,X3,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1200     ierr = VecScatterEnd(ipmP->step3,STEP,X3,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1201   }
1202   if (X4 && ipmP->nb > 0) {
1203     ierr = VecScatterBegin(ipmP->step4,STEP,X4,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1204     ierr = VecScatterEnd(ipmP->step4,STEP,X4,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1205   }
1206   CHKMEMQ;
1207 #if defined(DEBUG_SCATTER)
1208   PetscPrintf(PETSC_COMM_WORLD,"Step\n");
1209   VecView(STEP,0);
1210   PetscPrintf(PETSC_COMM_WORLD,"X1-X4\n");
1211   if (X1) {VecView(X1,0);}
1212   if (X2) {VecView(X2,0);}
1213   if (X3) {VecView(X3,0);}
1214   if (X4) {VecView(X4,0);}
1215 #endif
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 EXTERN_C_BEGIN
1220 #undef __FUNCT__
1221 #define __FUNCT__ "TaoCreate_IPM"
1222 PetscErrorCode TaoCreate_IPM(Tao tao)
1223 {
1224   TAO_IPM        *ipmP;
1225   PetscErrorCode ierr;
1226 
1227   PetscFunctionBegin;
1228   tao->ops->setup = TaoSetup_IPM;
1229   tao->ops->solve = TaoSolve_IPM;
1230   tao->ops->view = TaoView_IPM;
1231   tao->ops->setfromoptions = TaoSetFromOptions_IPM;
1232   tao->ops->destroy = TaoDestroy_IPM;
1233   /* tao->ops->computedual = TaoComputeDual_IPM; */
1234 
1235   ierr = PetscNewLog(tao,&ipmP);CHKERRQ(ierr);
1236   tao->data = (void*)ipmP;
1237   tao->max_it = 200;
1238   tao->max_funcs = 500;
1239   tao->fatol = 1e-4;
1240   tao->frtol = 1e-4;
1241   ipmP->dec = 10000; /* line search critera */
1242   ipmP->taumin = 0.995;
1243   ipmP->monitorkkt = PETSC_FALSE;
1244   ipmP->pushs = 100;
1245   ipmP->pushnu = 100;
1246   ierr = KSPCreate(((PetscObject)tao)->comm, &tao->ksp);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 EXTERN_C_END
1250 
1251