xref: /petsc/src/tao/constrained/impls/ipm/pdipm.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 #include <../src/tao/constrained/impls/ipm/pdipm.h>
2 
3 /*
4    TaoPDIPMEvaluateFunctionsAndJacobians - Evaluate the objective function f, gradient fx, constraints, and all the Jacobians at current vector
5 
6    Collective on tao
7 
8    Input Parameter:
9 +  tao - solver context
10 -  x - vector at which all objects to be evaluated
11 
12    Level: beginner
13 
14 .seealso: TaoPDIPMUpdateConstraints(), TaoPDIPMSetUpBounds()
15 */
16 static PetscErrorCode TaoPDIPMEvaluateFunctionsAndJacobians(Tao tao,Vec x)
17 {
18   PetscErrorCode ierr;
19   TAO_PDIPM      *pdipm=(TAO_PDIPM*)tao->data;
20 
21   PetscFunctionBegin;
22   /* Compute user objective function and gradient */
23   ierr = TaoComputeObjectiveAndGradient(tao,x,&pdipm->obj,tao->gradient);CHKERRQ(ierr);
24 
25   /* Equality constraints and Jacobian */
26   if (pdipm->Ng) {
27     ierr = TaoComputeEqualityConstraints(tao,x,tao->constraints_equality);CHKERRQ(ierr);
28     ierr = TaoComputeJacobianEquality(tao,x,tao->jacobian_equality,tao->jacobian_equality_pre);CHKERRQ(ierr);
29   }
30 
31   /* Inequality constraints and Jacobian */
32   if (pdipm->Nh) {
33     ierr = TaoComputeInequalityConstraints(tao,x,tao->constraints_inequality);CHKERRQ(ierr);
34     ierr = TaoComputeJacobianInequality(tao,x,tao->jacobian_inequality,tao->jacobian_inequality_pre);CHKERRQ(ierr);
35   }
36   PetscFunctionReturn(0);
37 }
38 
39 /*
40   TaoPDIPMUpdateConstraints - Update the vectors ce and ci at x
41 
42   Collective
43 
44   Input Parameter:
45 + tao - Tao context
46 - x - vector at which constraints to be evaluated
47 
48    Level: beginner
49 
50 .seealso: TaoPDIPMEvaluateFunctionsAndJacobians()
51 */
52 static PetscErrorCode TaoPDIPMUpdateConstraints(Tao tao,Vec x)
53 {
54   PetscErrorCode    ierr;
55   TAO_PDIPM         *pdipm=(TAO_PDIPM*)tao->data;
56   PetscInt          i,offset,offset1,k,xstart;
57   PetscScalar       *carr;
58   const PetscInt    *ubptr,*lbptr,*bxptr,*fxptr;
59   const PetscScalar *xarr,*xuarr,*xlarr,*garr,*harr;
60 
61   PetscFunctionBegin;
62   ierr = VecGetOwnershipRange(x,&xstart,NULL);CHKERRQ(ierr);
63 
64   ierr = VecGetArrayRead(x,&xarr);CHKERRQ(ierr);
65   ierr = VecGetArrayRead(tao->XU,&xuarr);CHKERRQ(ierr);
66   ierr = VecGetArrayRead(tao->XL,&xlarr);CHKERRQ(ierr);
67 
68   /* (1) Update ce vector */
69   ierr = VecGetArrayWrite(pdipm->ce,&carr);CHKERRQ(ierr);
70 
71   if (pdipm->Ng) {
72     /* (1.a) Inserting updated g(x) */
73     ierr = VecGetArrayRead(tao->constraints_equality,&garr);CHKERRQ(ierr);
74     ierr = PetscMemcpy(carr,garr,pdipm->ng*sizeof(PetscScalar));CHKERRQ(ierr);
75     ierr = VecRestoreArrayRead(tao->constraints_equality,&garr);CHKERRQ(ierr);
76   }
77 
78   /* (1.b) Update xfixed */
79   if (pdipm->Nxfixed) {
80     offset = pdipm->ng;
81     ierr = ISGetIndices(pdipm->isxfixed,&fxptr);CHKERRQ(ierr); /* global indices in x */
82     for (k=0;k < pdipm->nxfixed;k++) {
83       i = fxptr[k]-xstart;
84       carr[offset + k] = xarr[i] - xuarr[i];
85     }
86   }
87   ierr = VecRestoreArrayWrite(pdipm->ce,&carr);CHKERRQ(ierr);
88 
89   /* (2) Update ci vector */
90   ierr = VecGetArrayWrite(pdipm->ci,&carr);CHKERRQ(ierr);
91 
92   if (pdipm->Nh) {
93     /* (2.a) Inserting updated h(x) */
94     ierr = VecGetArrayRead(tao->constraints_inequality,&harr);CHKERRQ(ierr);
95     ierr = PetscMemcpy(carr,harr,pdipm->nh*sizeof(PetscScalar));CHKERRQ(ierr);
96     ierr = VecRestoreArrayRead(tao->constraints_inequality,&harr);CHKERRQ(ierr);
97   }
98 
99   /* (2.b) Update xub */
100   offset = pdipm->nh;
101   if (pdipm->Nxub) {
102     ierr = ISGetIndices(pdipm->isxub,&ubptr);CHKERRQ(ierr);
103     for (k=0; k<pdipm->nxub; k++) {
104       i = ubptr[k]-xstart;
105       carr[offset + k] = xuarr[i] - xarr[i];
106     }
107   }
108 
109   if (pdipm->Nxlb) {
110     /* (2.c) Update xlb */
111     offset += pdipm->nxub;
112     ierr = ISGetIndices(pdipm->isxlb,&lbptr);CHKERRQ(ierr); /* global indices in x */
113     for (k=0; k<pdipm->nxlb; k++) {
114       i = lbptr[k]-xstart;
115       carr[offset + k] = xarr[i] - xlarr[i];
116     }
117   }
118 
119   if (pdipm->Nxbox) {
120     /* (2.d) Update xbox */
121     offset += pdipm->nxlb;
122     offset1 = offset + pdipm->nxbox;
123     ierr = ISGetIndices(pdipm->isxbox,&bxptr);CHKERRQ(ierr); /* global indices in x */
124     for (k=0; k<pdipm->nxbox; k++) {
125       i = bxptr[k]-xstart; /* local indices in x */
126       carr[offset+k]  = xuarr[i] - xarr[i];
127       carr[offset1+k] = xarr[i]  - xlarr[i];
128     }
129   }
130   ierr = VecRestoreArrayWrite(pdipm->ci,&carr);CHKERRQ(ierr);
131 
132   /* Restoring Vectors */
133   ierr = VecRestoreArrayRead(x,&xarr);CHKERRQ(ierr);
134   ierr = VecRestoreArrayRead(tao->XU,&xuarr);CHKERRQ(ierr);
135   ierr = VecRestoreArrayRead(tao->XL,&xlarr);CHKERRQ(ierr);
136   PetscFunctionReturn(0);
137 }
138 
139 /*
140    TaoPDIPMSetUpBounds - Create upper and lower bound vectors of x
141 
142    Collective
143 
144    Input Parameter:
145 .  tao - holds pdipm and XL & XU
146 
147    Level: beginner
148 
149 .seealso: TaoPDIPMUpdateConstraints
150 */
151 static PetscErrorCode TaoPDIPMSetUpBounds(Tao tao)
152 {
153   PetscErrorCode    ierr;
154   TAO_PDIPM         *pdipm=(TAO_PDIPM*)tao->data;
155   const PetscScalar *xl,*xu;
156   PetscInt          n,*ixlb,*ixub,*ixfixed,*ixfree,*ixbox,i,low,high,idx;
157   MPI_Comm          comm;
158   PetscInt          sendbuf[5],recvbuf[5];
159 
160   PetscFunctionBegin;
161   /* Creates upper and lower bounds vectors on x, if not created already */
162   ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr);
163 
164   ierr = VecGetLocalSize(tao->XL,&n);CHKERRQ(ierr);
165   ierr = PetscMalloc5(n,&ixlb,n,&ixub,n,&ixfree,n,&ixfixed,n,&ixbox);CHKERRQ(ierr);
166 
167   ierr = VecGetOwnershipRange(tao->XL,&low,&high);CHKERRQ(ierr);
168   ierr = VecGetArrayRead(tao->XL,&xl);CHKERRQ(ierr);
169   ierr = VecGetArrayRead(tao->XU,&xu);CHKERRQ(ierr);
170   for (i=0; i<n; i++) {
171     idx = low + i;
172     if ((PetscRealPart(xl[i]) > PETSC_NINFINITY) && (PetscRealPart(xu[i]) < PETSC_INFINITY)) {
173       if (PetscRealPart(xl[i]) == PetscRealPart(xu[i])) {
174         ixfixed[pdipm->nxfixed++]  = idx;
175       } else ixbox[pdipm->nxbox++] = idx;
176     } else {
177       if ((PetscRealPart(xl[i]) > PETSC_NINFINITY) && (PetscRealPart(xu[i]) >= PETSC_INFINITY)) {
178         ixlb[pdipm->nxlb++] = idx;
179       } else if ((PetscRealPart(xl[i]) <= PETSC_NINFINITY) && (PetscRealPart(xu[i]) < PETSC_INFINITY)) {
180         ixub[pdipm->nxlb++] = idx;
181       } else  ixfree[pdipm->nxfree++] = idx;
182     }
183   }
184   ierr = VecRestoreArrayRead(tao->XL,&xl);CHKERRQ(ierr);
185   ierr = VecRestoreArrayRead(tao->XU,&xu);CHKERRQ(ierr);
186 
187   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
188   sendbuf[0] = pdipm->nxlb;
189   sendbuf[1] = pdipm->nxub;
190   sendbuf[2] = pdipm->nxfixed;
191   sendbuf[3] = pdipm->nxbox;
192   sendbuf[4] = pdipm->nxfree;
193 
194   ierr = MPI_Allreduce(sendbuf,recvbuf,5,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
195   pdipm->Nxlb    = recvbuf[0];
196   pdipm->Nxub    = recvbuf[1];
197   pdipm->Nxfixed = recvbuf[2];
198   pdipm->Nxbox   = recvbuf[3];
199   pdipm->Nxfree  = recvbuf[4];
200 
201   if (pdipm->Nxlb) {
202     ierr = ISCreateGeneral(comm,pdipm->nxlb,ixlb,PETSC_COPY_VALUES,&pdipm->isxlb);CHKERRQ(ierr);
203   }
204   if (pdipm->Nxub) {
205     ierr = ISCreateGeneral(comm,pdipm->nxub,ixub,PETSC_COPY_VALUES,&pdipm->isxub);CHKERRQ(ierr);
206   }
207   if (pdipm->Nxfixed) {
208     ierr = ISCreateGeneral(comm,pdipm->nxfixed,ixfixed,PETSC_COPY_VALUES,&pdipm->isxfixed);CHKERRQ(ierr);
209   }
210   if (pdipm->Nxbox) {
211     ierr = ISCreateGeneral(comm,pdipm->nxbox,ixbox,PETSC_COPY_VALUES,&pdipm->isxbox);CHKERRQ(ierr);
212   }
213   if (pdipm->Nxfree) {
214     ierr = ISCreateGeneral(comm,pdipm->nxfree,ixfree,PETSC_COPY_VALUES,&pdipm->isxfree);CHKERRQ(ierr);
215   }
216   ierr = PetscFree5(ixlb,ixub,ixfixed,ixbox,ixfree);CHKERRQ(ierr);
217   PetscFunctionReturn(0);
218 }
219 
220 /*
221    TaoPDIPMInitializeSolution - Initialize PDIPM solution X = [x; lambdae; lambdai; z].
222    X consists of four subvectors in the order [x; lambdae; lambdai; z]. These
223      four subvectors need to be initialized and its values copied over to X. Instead
224      of copying, we use VecPlace/ResetArray functions to share the memory locations for
225      X and the subvectors
226 
227    Collective
228 
229    Input Parameter:
230 .  tao - Tao context
231 
232    Level: beginner
233 */
234 static PetscErrorCode TaoPDIPMInitializeSolution(Tao tao)
235 {
236   PetscErrorCode    ierr;
237   TAO_PDIPM         *pdipm = (TAO_PDIPM*)tao->data;
238   PetscScalar       *Xarr,*z,*lambdai;
239   PetscInt          i;
240   const PetscScalar *xarr,*h;
241 
242   PetscFunctionBegin;
243   ierr = VecGetArrayWrite(pdipm->X,&Xarr);CHKERRQ(ierr);
244 
245   /* Set Initialize X.x = tao->solution */
246   ierr = VecGetArrayRead(tao->solution,&xarr);CHKERRQ(ierr);
247   ierr = PetscMemcpy(Xarr,xarr,pdipm->nx*sizeof(PetscScalar));CHKERRQ(ierr);
248   ierr = VecRestoreArrayRead(tao->solution,&xarr);CHKERRQ(ierr);
249 
250   /* Initialize X.lambdae = 0.0 */
251   if (pdipm->lambdae) {
252     ierr = VecSet(pdipm->lambdae,0.0);CHKERRQ(ierr);
253   }
254 
255   /* Initialize X.lambdai = push_init_lambdai, X.z = push_init_slack */
256   if (pdipm->Nci) {
257     ierr = VecSet(pdipm->lambdai,pdipm->push_init_lambdai);CHKERRQ(ierr);
258     ierr = VecSet(pdipm->z,pdipm->push_init_slack);CHKERRQ(ierr);
259 
260     /* Additional modification for X.lambdai and X.z */
261     ierr = VecGetArrayWrite(pdipm->lambdai,&lambdai);CHKERRQ(ierr);
262     ierr = VecGetArrayWrite(pdipm->z,&z);CHKERRQ(ierr);
263     if (pdipm->Nh) {
264       ierr = VecGetArrayRead(tao->constraints_inequality,&h);CHKERRQ(ierr);
265       for (i=0; i < pdipm->nh; i++) {
266         if (h[i] < -pdipm->push_init_slack) z[i] = -h[i];
267         if (pdipm->mu/z[i] > pdipm->push_init_lambdai) lambdai[i] = pdipm->mu/z[i];
268       }
269       ierr = VecRestoreArrayRead(tao->constraints_inequality,&h);CHKERRQ(ierr);
270     }
271     ierr = VecRestoreArrayWrite(pdipm->lambdai,&lambdai);CHKERRQ(ierr);
272     ierr = VecRestoreArrayWrite(pdipm->z,&z);CHKERRQ(ierr);
273   }
274 
275   ierr = VecRestoreArrayWrite(pdipm->X,&Xarr);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 /*
280    TaoSNESJacobian_PDIPM - Evaluate the Hessian matrix at X
281 
282    Input Parameter:
283    snes - SNES context
284    X - KKT Vector
285    *ctx - pdipm context
286 
287    Output Parameter:
288    J - Hessian matrix
289    Jpre - Preconditioner
290 */
291 static PetscErrorCode TaoSNESJacobian_PDIPM(SNES snes,Vec X, Mat J, Mat Jpre, void *ctx)
292 {
293   PetscErrorCode    ierr;
294   Tao               tao=(Tao)ctx;
295   TAO_PDIPM         *pdipm = (TAO_PDIPM*)tao->data;
296   PetscInt          i,row,cols[2],Jrstart,rjstart,nc,j;
297   const PetscInt    *aj,*ranges,*Jranges,*rranges,*cranges;
298   const PetscScalar *Xarr,*aa;
299   PetscScalar       vals[2];
300   PetscInt          proc,nx_all,*nce_all=pdipm->nce_all;
301   MPI_Comm          comm;
302   PetscMPIInt       rank,size;
303   Mat               jac_equality_trans=pdipm->jac_equality_trans,jac_inequality_trans=pdipm->jac_inequality_trans;
304 
305   PetscFunctionBegin;
306   ierr = PetscObjectGetComm((PetscObject)snes,&comm);CHKERRQ(ierr);
307   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
308   ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
309 
310   ierr = MatGetOwnershipRanges(Jpre,&Jranges);CHKERRQ(ierr);
311   ierr = MatGetOwnershipRange(Jpre,&Jrstart,NULL);CHKERRQ(ierr);
312   ierr = MatGetOwnershipRangesColumn(tao->hessian,&rranges);CHKERRQ(ierr);
313   ierr = MatGetOwnershipRangesColumn(tao->hessian,&cranges);CHKERRQ(ierr);
314 
315   ierr = VecGetArrayRead(X,&Xarr);CHKERRQ(ierr);
316 
317   /* (1) insert Z and Ci to the 4th block of Jpre -- overwrite existing values */
318   if (pdipm->solve_symmetric_kkt) { /* 1 for eq 17 revised pdipm doc 0 for eq 18 (symmetric KKT) */
319     vals[0] = 1.0;
320     for (i=0; i < pdipm->nci; i++) {
321         row     = Jrstart + pdipm->off_z + i;
322         cols[0] = Jrstart + pdipm->off_lambdai + i;
323         cols[1] = row;
324         vals[1] = Xarr[pdipm->off_lambdai + i]/Xarr[pdipm->off_z + i];
325         ierr = MatSetValues(Jpre,1,&row,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
326     }
327   } else {
328     for (i=0; i < pdipm->nci; i++) {
329       row     = Jrstart + pdipm->off_z + i;
330       cols[0] = Jrstart + pdipm->off_lambdai + i;
331       cols[1] = row;
332       vals[0] = Xarr[pdipm->off_z + i];
333       vals[1] = Xarr[pdipm->off_lambdai + i];
334       ierr = MatSetValues(Jpre,1,&row,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
335     }
336   }
337 
338   /* (2) insert 2nd row block of Jpre: [ grad g, 0, 0, 0] */
339   if (pdipm->Ng) {
340     ierr = MatGetOwnershipRange(tao->jacobian_equality,&rjstart,NULL);CHKERRQ(ierr);
341     for (i=0; i<pdipm->ng; i++) {
342       row = Jrstart + pdipm->off_lambdae + i;
343 
344       ierr = MatGetRow(tao->jacobian_equality,i+rjstart,&nc,&aj,&aa);CHKERRQ(ierr);
345       proc = 0;
346       for (j=0; j < nc; j++) {
347         while (aj[j] >= cranges[proc+1]) proc++;
348         cols[0] = aj[j] - cranges[proc] + Jranges[proc];
349         ierr = MatSetValue(Jpre,row,cols[0],aa[j],INSERT_VALUES);CHKERRQ(ierr);
350       }
351       ierr = MatRestoreRow(tao->jacobian_equality,i+rjstart,&nc,&aj,&aa);CHKERRQ(ierr);
352       if (pdipm->kkt_pd) {
353         /* add shift \delta_c */
354         ierr = MatSetValue(Jpre,row,row,-pdipm->deltac,INSERT_VALUES);CHKERRQ(ierr);
355       }
356     }
357   }
358 
359   /* (3) insert 3rd row block of Jpre: [ -grad h, 0, deltac, I] */
360   if (pdipm->Nh) {
361     ierr = MatGetOwnershipRange(tao->jacobian_inequality,&rjstart,NULL);CHKERRQ(ierr);
362     for (i=0; i < pdipm->nh; i++) {
363       row = Jrstart + pdipm->off_lambdai + i;
364       ierr = MatGetRow(tao->jacobian_inequality,i+rjstart,&nc,&aj,&aa);CHKERRQ(ierr);
365       proc = 0;
366       for (j=0; j < nc; j++) {
367         while (aj[j] >= cranges[proc+1]) proc++;
368         cols[0] = aj[j] - cranges[proc] + Jranges[proc];
369         ierr = MatSetValue(Jpre,row,cols[0],-aa[j],INSERT_VALUES);CHKERRQ(ierr);
370       }
371       ierr = MatRestoreRow(tao->jacobian_inequality,i+rjstart,&nc,&aj,&aa);CHKERRQ(ierr);
372       if (pdipm->kkt_pd) {
373         /* add shift \delta_c */
374         ierr = MatSetValue(Jpre,row,row,-pdipm->deltac,INSERT_VALUES);CHKERRQ(ierr);
375       }
376     }
377   }
378 
379   /* (4) insert 1st row block of Jpre: [Wxx, grad g', -grad h', 0] */
380   if (pdipm->Ng) { /* grad g' */
381     ierr = MatTranspose(tao->jacobian_equality,MAT_REUSE_MATRIX,&jac_equality_trans);CHKERRQ(ierr);
382   }
383   if (pdipm->Nh) { /* grad h' */
384     ierr = MatTranspose(tao->jacobian_inequality,MAT_REUSE_MATRIX,&jac_inequality_trans);CHKERRQ(ierr);
385   }
386 
387   ierr = VecPlaceArray(pdipm->x,Xarr);CHKERRQ(ierr);
388   ierr = TaoComputeHessian(tao,pdipm->x,tao->hessian,tao->hessian_pre);CHKERRQ(ierr);
389   ierr = VecResetArray(pdipm->x);CHKERRQ(ierr);
390 
391   ierr = MatGetOwnershipRange(tao->hessian,&rjstart,NULL);CHKERRQ(ierr);
392   for (i=0; i<pdipm->nx; i++) {
393     row = Jrstart + i;
394 
395     /* insert Wxx = fxx + ... -- provided by user */
396     ierr = MatGetRow(tao->hessian,i+rjstart,&nc,&aj,&aa);CHKERRQ(ierr);
397     proc = 0;
398     for (j=0; j < nc; j++) {
399       while (aj[j] >= cranges[proc+1]) proc++;
400       cols[0] = aj[j] - cranges[proc] + Jranges[proc];
401       if (row == cols[0] && pdipm->kkt_pd) {
402         /* add shift deltaw to Wxx component */
403         ierr = MatSetValue(Jpre,row,cols[0],aa[j]+pdipm->deltaw,INSERT_VALUES);CHKERRQ(ierr);
404       } else {
405         ierr = MatSetValue(Jpre,row,cols[0],aa[j],INSERT_VALUES);CHKERRQ(ierr);
406       }
407     }
408     ierr = MatRestoreRow(tao->hessian,i+rjstart,&nc,&aj,&aa);CHKERRQ(ierr);
409 
410     /* insert grad g' */
411     if (pdipm->ng) {
412       ierr = MatGetRow(jac_equality_trans,i+rjstart,&nc,&aj,&aa);CHKERRQ(ierr);
413       ierr = MatGetOwnershipRanges(tao->jacobian_equality,&ranges);CHKERRQ(ierr);
414       proc = 0;
415       for (j=0; j < nc; j++) {
416         /* find row ownership of */
417         while (aj[j] >= ranges[proc+1]) proc++;
418         nx_all = rranges[proc+1] - rranges[proc];
419         cols[0] = aj[j] - ranges[proc] + Jranges[proc] + nx_all;
420         ierr = MatSetValue(Jpre,row,cols[0],aa[j],INSERT_VALUES);CHKERRQ(ierr);
421       }
422       ierr = MatRestoreRow(jac_equality_trans,i+rjstart,&nc,&aj,&aa);CHKERRQ(ierr);
423     }
424 
425     /* insert -grad h' */
426     if (pdipm->nh) {
427       ierr = MatGetRow(jac_inequality_trans,i+rjstart,&nc,&aj,&aa);CHKERRQ(ierr);
428       ierr = MatGetOwnershipRanges(tao->jacobian_inequality,&ranges);CHKERRQ(ierr);
429       proc = 0;
430       for (j=0; j < nc; j++) {
431         /* find row ownership of */
432         while (aj[j] >= ranges[proc+1]) proc++;
433         nx_all = rranges[proc+1] - rranges[proc];
434         cols[0] = aj[j] - ranges[proc] + Jranges[proc] + nx_all + nce_all[proc];
435         ierr = MatSetValue(Jpre,row,cols[0],-aa[j],INSERT_VALUES);CHKERRQ(ierr);
436       }
437       ierr = MatRestoreRow(jac_inequality_trans,i+rjstart,&nc,&aj,&aa);CHKERRQ(ierr);
438     }
439   }
440   ierr = VecRestoreArrayRead(X,&Xarr);CHKERRQ(ierr);
441 
442   /* (6) assemble Jpre and J */
443   ierr = MatAssemblyBegin(Jpre,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
444   ierr = MatAssemblyEnd(Jpre,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
445 
446   if (J != Jpre) {
447     ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
448     ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
449   }
450   PetscFunctionReturn(0);
451 }
452 
453 /*
454    TaoSnesFunction_PDIPM - Evaluate KKT function at X
455 
456    Input Parameter:
457    snes - SNES context
458    X - KKT Vector
459    *ctx - pdipm
460 
461    Output Parameter:
462    F - Updated Lagrangian vector
463 */
464 static PetscErrorCode TaoSNESFunction_PDIPM(SNES snes,Vec X,Vec F,void *ctx)
465 {
466   PetscErrorCode    ierr;
467   Tao               tao=(Tao)ctx;
468   TAO_PDIPM         *pdipm = (TAO_PDIPM*)tao->data;
469   PetscScalar       *Farr;
470   Vec               x,L1;
471   PetscInt          i;
472   const PetscScalar *Xarr,*carr,*zarr,*larr;
473 
474   PetscFunctionBegin;
475   ierr = VecSet(F,0.0);CHKERRQ(ierr);
476 
477   ierr = VecGetArrayRead(X,&Xarr);CHKERRQ(ierr);
478   ierr = VecGetArrayWrite(F,&Farr);CHKERRQ(ierr);
479 
480   /* (0) Evaluate f, fx, gradG, gradH at X.x Note: pdipm->x is not changed below */
481   x = pdipm->x;
482   ierr = VecPlaceArray(x,Xarr);CHKERRQ(ierr);
483   ierr = TaoPDIPMEvaluateFunctionsAndJacobians(tao,x);CHKERRQ(ierr);
484 
485   /* Update ce, ci, and Jci at X.x */
486   ierr = TaoPDIPMUpdateConstraints(tao,x);CHKERRQ(ierr);
487   ierr = VecResetArray(x);CHKERRQ(ierr);
488 
489   /* (1) L1 = fx + (gradG'*DE + Jce_xfixed'*lambdae_xfixed) - (gradH'*DI + Jci_xb'*lambdai_xb) */
490   L1 = pdipm->x;
491   ierr = VecPlaceArray(L1,Farr);CHKERRQ(ierr); /* L1 = 0.0 */
492   if (pdipm->Nci) {
493     if (pdipm->Nh) {
494       /* L1 += gradH'*DI. Note: tao->DI is not changed below */
495       ierr = VecPlaceArray(tao->DI,Xarr+pdipm->off_lambdai);CHKERRQ(ierr);
496       ierr = MatMultTransposeAdd(tao->jacobian_inequality,tao->DI,L1,L1);CHKERRQ(ierr);
497       ierr = VecResetArray(tao->DI);CHKERRQ(ierr);
498     }
499 
500     /* L1 += Jci_xb'*lambdai_xb */
501     ierr = VecPlaceArray(pdipm->lambdai_xb,Xarr+pdipm->off_lambdai+pdipm->nh);CHKERRQ(ierr);
502     ierr = MatMultTransposeAdd(pdipm->Jci_xb,pdipm->lambdai_xb,L1,L1);CHKERRQ(ierr);
503     ierr = VecResetArray(pdipm->lambdai_xb);CHKERRQ(ierr);
504 
505     /* L1 = - (gradH'*DI + Jci_xb'*lambdai_xb) */
506     ierr = VecScale(L1,-1.0);CHKERRQ(ierr);
507   }
508 
509   /* L1 += fx */
510   ierr = VecAXPY(L1,1.0,tao->gradient);CHKERRQ(ierr);
511 
512   if (pdipm->Nce) {
513     if (pdipm->Ng) {
514       /* L1 += gradG'*DE. Note: tao->DE is not changed below */
515       ierr = VecPlaceArray(tao->DE,Xarr+pdipm->off_lambdae);CHKERRQ(ierr);
516       ierr = MatMultTransposeAdd(tao->jacobian_equality,tao->DE,L1,L1);CHKERRQ(ierr);
517       ierr = VecResetArray(tao->DE);CHKERRQ(ierr);
518     }
519     if (pdipm->Nxfixed) {
520       /* L1 += Jce_xfixed'*lambdae_xfixed */
521       ierr = VecPlaceArray(pdipm->lambdae_xfixed,Xarr+pdipm->off_lambdae+pdipm->ng);CHKERRQ(ierr);
522       ierr = MatMultTransposeAdd(pdipm->Jce_xfixed,pdipm->lambdae_xfixed,L1,L1);CHKERRQ(ierr);
523       ierr = VecResetArray(pdipm->lambdae_xfixed);CHKERRQ(ierr);
524     }
525   }
526   ierr = VecResetArray(L1);CHKERRQ(ierr);
527 
528   /* (2) L2 = ce(x) */
529   if (pdipm->Nce) {
530     ierr = VecGetArrayRead(pdipm->ce,&carr);CHKERRQ(ierr);
531     for (i=0; i<pdipm->nce; i++) Farr[pdipm->off_lambdae + i] = carr[i];
532     ierr = VecRestoreArrayRead(pdipm->ce,&carr);CHKERRQ(ierr);
533   }
534 
535   if (pdipm->Nci) {
536     if (pdipm->solve_symmetric_kkt) {
537       /* (3) L3 = z - ci(x);
538          (4) L4 = Lambdai * e - mu/z *e  */
539       ierr = VecGetArrayRead(pdipm->ci,&carr);CHKERRQ(ierr);
540       larr = Xarr+pdipm->off_lambdai;
541       zarr = Xarr+pdipm->off_z;
542       for (i=0; i<pdipm->nci; i++) {
543         Farr[pdipm->off_lambdai + i] = zarr[i] - carr[i];
544         Farr[pdipm->off_z       + i] = larr[i] - pdipm->mu/zarr[i];
545       }
546       ierr = VecRestoreArrayRead(pdipm->ci,&carr);CHKERRQ(ierr);
547     } else {
548       /* (3) L3 = z - ci(x);
549          (4) L4 = Z * Lambdai * e - mu * e  */
550       ierr = VecGetArrayRead(pdipm->ci,&carr);CHKERRQ(ierr);
551       larr = Xarr+pdipm->off_lambdai;
552       zarr = Xarr+pdipm->off_z;
553       for (i=0; i<pdipm->nci; i++) {
554         Farr[pdipm->off_lambdai + i] = zarr[i] - carr[i];
555         Farr[pdipm->off_z       + i] = zarr[i]*larr[i] - pdipm->mu;
556       }
557       ierr = VecRestoreArrayRead(pdipm->ci,&carr);CHKERRQ(ierr);
558     }
559   }
560 
561   ierr = VecRestoreArrayRead(X,&Xarr);CHKERRQ(ierr);
562   ierr = VecRestoreArrayWrite(F,&Farr);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567   Evaluate F(X); then update update tao->gnorm0, tao->step = mu,
568   tao->residual = norm2(F_x,F_z) and tao->cnorm = norm2(F_ce,F_ci).
569 */
570 static PetscErrorCode TaoSNESFunction_PDIPM_residual(SNES snes,Vec X,Vec F,void *ctx)
571 {
572   PetscErrorCode    ierr;
573   Tao               tao=(Tao)ctx;
574   TAO_PDIPM         *pdipm = (TAO_PDIPM*)tao->data;
575   PetscScalar       *Farr,*tmparr;
576   Vec               L1;
577   PetscInt          i;
578   PetscReal         res[2],cnorm[2];
579   const PetscScalar *Xarr=NULL;
580 
581   PetscFunctionBegin;
582   ierr = TaoSNESFunction_PDIPM(snes,X,F,(void*)tao);CHKERRQ(ierr);
583   ierr = VecGetArrayWrite(F,&Farr);CHKERRQ(ierr);
584   ierr = VecGetArrayRead(X,&Xarr);CHKERRQ(ierr);
585 
586   /* compute res[0] = norm2(F_x) */
587   L1 = pdipm->x;
588   ierr = VecPlaceArray(L1,Farr);CHKERRQ(ierr);
589   ierr = VecNorm(L1,NORM_2,&res[0]);CHKERRQ(ierr);
590   ierr = VecResetArray(L1);CHKERRQ(ierr);
591 
592   /* compute res[1] = norm2(F_z), cnorm[1] = norm2(F_ci) */
593   if (pdipm->z) {
594     if (pdipm->solve_symmetric_kkt) {
595       ierr = VecPlaceArray(pdipm->z,Farr+pdipm->off_z);CHKERRQ(ierr);
596       if (pdipm->Nci) {
597         ierr = VecGetArrayWrite(pdipm->z,&tmparr);CHKERRQ(ierr);
598         for (i=0; i<pdipm->nci; i++) tmparr[i] *= Xarr[pdipm->off_z + i];
599         ierr = VecRestoreArrayWrite(pdipm->z,&tmparr);CHKERRQ(ierr);
600       }
601 
602       ierr = VecNorm(pdipm->z,NORM_2,&res[1]);CHKERRQ(ierr);
603 
604       if (pdipm->Nci) {
605         ierr = VecGetArrayWrite(pdipm->z,&tmparr);CHKERRQ(ierr);
606         for (i=0; i<pdipm->nci; i++) {
607           tmparr[i] /= Xarr[pdipm->off_z + i];
608         }
609         ierr = VecRestoreArrayWrite(pdipm->z,&tmparr);CHKERRQ(ierr);
610       }
611       ierr = VecResetArray(pdipm->z);CHKERRQ(ierr);
612     } else { /* !solve_symmetric_kkt */
613       ierr = VecPlaceArray(pdipm->z,Farr+pdipm->off_z);CHKERRQ(ierr);
614       ierr = VecNorm(pdipm->z,NORM_2,&res[1]);CHKERRQ(ierr);
615       ierr = VecResetArray(pdipm->z);CHKERRQ(ierr);
616     }
617 
618     ierr = VecPlaceArray(pdipm->ci,Farr+pdipm->off_lambdai);CHKERRQ(ierr);
619     ierr = VecNorm(pdipm->ci,NORM_2,&cnorm[1]);CHKERRQ(ierr);
620     ierr = VecResetArray(pdipm->ci);CHKERRQ(ierr);
621   } else {
622     res[1] = 0.0; cnorm[1] = 0.0;
623   }
624 
625   /* compute cnorm[0] = norm2(F_ce) */
626   if (pdipm->Nce) {
627     ierr = VecPlaceArray(pdipm->ce,Farr+pdipm->off_lambdae);CHKERRQ(ierr);
628     ierr = VecNorm(pdipm->ce,NORM_2,&cnorm[0]);CHKERRQ(ierr);
629     ierr = VecResetArray(pdipm->ce);CHKERRQ(ierr);
630   } else cnorm[0] = 0.0;
631 
632   ierr = VecRestoreArrayWrite(F,&Farr);CHKERRQ(ierr);
633   ierr = VecRestoreArrayRead(X,&Xarr);CHKERRQ(ierr);
634 
635   tao->gnorm0   = tao->residual;
636   tao->residual = PetscSqrtReal(res[0]*res[0] + res[1]*res[1]);
637   tao->cnorm    = PetscSqrtReal(cnorm[0]*cnorm[0] + cnorm[1]*cnorm[1]);
638   tao->step     = pdipm->mu;
639   PetscFunctionReturn(0);
640 }
641 
642 /*
643   KKTAddShifts - Check the inertia of Cholesky factor of KKT matrix.
644   If it does not match the numbers of prime and dual variables, add shifts to the KKT matrix.
645 */
646 static PetscErrorCode KKTAddShifts(Tao tao,SNES snes,Vec X)
647 {
648   PetscErrorCode ierr;
649   TAO_PDIPM      *pdipm = (TAO_PDIPM*)tao->data;
650   KSP            ksp;
651   PC             pc;
652   Mat            Factor;
653   PetscBool      isCHOL;
654   PetscInt       nneg,nzero,npos;
655 
656   PetscFunctionBegin;
657   /* Get the inertia of Cholesky factor */
658   ierr = SNESGetKSP(snes,&ksp);CHKERRQ(ierr);
659   ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
660   ierr = PetscObjectTypeCompare((PetscObject)pc,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
661   if (!isCHOL) PetscFunctionReturn(0);
662 
663   ierr = PCFactorGetMatrix(pc,&Factor);CHKERRQ(ierr);
664   ierr = MatGetInertia(Factor,&nneg,&nzero,&npos);CHKERRQ(ierr);
665 
666   if (npos < pdipm->Nx+pdipm->Nci) {
667     pdipm->deltaw = PetscMax(pdipm->lastdeltaw/3, 1.e-4*PETSC_MACHINE_EPSILON);
668     ierr = PetscInfo5(tao,"Test reduced deltaw=%g; previous MatInertia: nneg %D, nzero %D, npos %D(<%D)\n",(double)pdipm->deltaw,nneg,nzero,npos,pdipm->Nx+pdipm->Nci);CHKERRQ(ierr);
669     ierr = TaoSNESJacobian_PDIPM(snes,X, pdipm->K, pdipm->K, tao);CHKERRQ(ierr);
670     ierr = PCSetUp(pc);CHKERRQ(ierr);
671     ierr = MatGetInertia(Factor,&nneg,&nzero,&npos);CHKERRQ(ierr);
672 
673     if (npos < pdipm->Nx+pdipm->Nci) {
674       pdipm->deltaw = pdipm->lastdeltaw; /* in case reduction update does not help, this prevents that step from impacting increasing update */
675       while (npos < pdipm->Nx+pdipm->Nci && pdipm->deltaw <= 1./PETSC_SMALL) { /* increase deltaw */
676         ierr = PetscInfo5(tao,"  deltaw=%g fails, MatInertia: nneg %D, nzero %D, npos %D(<%D)\n",(double)pdipm->deltaw,nneg,nzero,npos,pdipm->Nx+pdipm->Nci);CHKERRQ(ierr);
677         pdipm->deltaw = PetscMin(8*pdipm->deltaw,PetscPowReal(10,20));
678         ierr = TaoSNESJacobian_PDIPM(snes,X, pdipm->K, pdipm->K, tao);CHKERRQ(ierr);
679         ierr = PCSetUp(pc);CHKERRQ(ierr);
680         ierr = MatGetInertia(Factor,&nneg,&nzero,&npos);CHKERRQ(ierr);CHKERRQ(ierr);
681       }
682 
683       if (pdipm->deltaw >= 1./PETSC_SMALL) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_CONV_FAILED,"Reached maximum delta w will not converge, try different initial x0");
684 
685       ierr = PetscInfo1(tao,"Updated deltaw %g\n",(double)pdipm->deltaw);CHKERRQ(ierr);
686       pdipm->lastdeltaw = pdipm->deltaw;
687       pdipm->deltaw     = 0.0;
688     }
689   }
690 
691   if (nzero) { /* Jacobian is singular */
692     if (pdipm->deltac == 0.0) {
693       pdipm->deltac = PETSC_SQRT_MACHINE_EPSILON;
694     } else {
695       pdipm->deltac = pdipm->deltac*PetscPowReal(pdipm->mu,.25);
696     }
697     ierr = PetscInfo4(tao,"Updated deltac=%g, MatInertia: nneg %D, nzero %D(!=0), npos %D\n",(double)pdipm->deltac,nneg,nzero,npos);CHKERRQ(ierr);
698     ierr = TaoSNESJacobian_PDIPM(snes,X, pdipm->K, pdipm->K, tao);CHKERRQ(ierr);
699     ierr = PCSetUp(pc);CHKERRQ(ierr);
700     ierr = MatGetInertia(Factor,&nneg,&nzero,&npos);CHKERRQ(ierr);
701   }
702   PetscFunctionReturn(0);
703 }
704 
705 /*
706   PCPreSolve_PDIPM -- called betwee MatFactorNumeric() and MatSolve()
707 */
708 PetscErrorCode PCPreSolve_PDIPM(PC pc,KSP ksp)
709 {
710   PetscErrorCode ierr;
711   Tao            tao;
712   TAO_PDIPM      *pdipm;
713 
714   PetscFunctionBegin;
715   ierr = KSPGetApplicationContext(ksp,&tao);CHKERRQ(ierr);
716   pdipm = (TAO_PDIPM*)tao->data;
717   ierr = KKTAddShifts(tao,pdipm->snes,pdipm->X);CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 
721 /*
722    SNESLineSearch_PDIPM - Custom line search used with PDIPM.
723 
724    Collective on TAO
725 
726    Notes:
727    This routine employs a simple backtracking line-search to keep
728    the slack variables (z) and inequality constraints Lagrange multipliers
729    (lambdai) positive, i.e., z,lambdai >=0. It does this by calculating scalars
730    alpha_p and alpha_d to keep z,lambdai non-negative. The decision (x), and the
731    slack variables are updated as X = X - alpha_d*dx. The constraint multipliers
732    are updated as Lambdai = Lambdai + alpha_p*dLambdai. The barrier parameter mu
733    is also updated as mu = mu + z'lambdai/Nci
734 */
735 static PetscErrorCode SNESLineSearch_PDIPM(SNESLineSearch linesearch,void *ctx)
736 {
737   PetscErrorCode    ierr;
738   Tao               tao=(Tao)ctx;
739   TAO_PDIPM         *pdipm = (TAO_PDIPM*)tao->data;
740   SNES              snes;
741   Vec               X,F,Y;
742   PetscInt          i,iter;
743   PetscReal         alpha_p=1.0,alpha_d=1.0,alpha[4];
744   PetscScalar       *Xarr,*z,*lambdai,dot,*taosolarr;
745   const PetscScalar *dXarr,*dz,*dlambdai;
746 
747   PetscFunctionBegin;
748   ierr = SNESLineSearchGetSNES(linesearch,&snes);CHKERRQ(ierr);
749   ierr = SNESGetIterationNumber(snes,&iter);CHKERRQ(ierr);
750 
751   ierr = SNESLineSearchSetReason(linesearch,SNES_LINESEARCH_SUCCEEDED);CHKERRQ(ierr);
752   ierr = SNESLineSearchGetVecs(linesearch,&X,&F,&Y,NULL,NULL);CHKERRQ(ierr);
753 
754   ierr = VecGetArrayWrite(X,&Xarr);CHKERRQ(ierr);
755   ierr = VecGetArrayRead(Y,&dXarr);CHKERRQ(ierr);
756   z  = Xarr + pdipm->off_z;
757   dz = dXarr + pdipm->off_z;
758   for (i=0; i < pdipm->nci; i++) {
759     if (z[i] - dz[i] < 0.0) alpha_p = PetscMin(alpha_p, 0.9999*z[i]/dz[i]);
760   }
761 
762   lambdai  = Xarr + pdipm->off_lambdai;
763   dlambdai = dXarr + pdipm->off_lambdai;
764 
765   for (i=0; i<pdipm->nci; i++) {
766     if (lambdai[i] - dlambdai[i] < 0.0) alpha_d = PetscMin(0.9999*lambdai[i]/dlambdai[i], alpha_d);
767   }
768 
769   alpha[0] = alpha_p;
770   alpha[1] = alpha_d;
771   ierr = VecRestoreArrayRead(Y,&dXarr);CHKERRQ(ierr);
772   ierr = VecRestoreArrayWrite(X,&Xarr);CHKERRQ(ierr);
773 
774   /* alpha = min(alpha) over all processes */
775   ierr = MPI_Allreduce(alpha,alpha+2,2,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)tao));CHKERRMPI(ierr);
776 
777   alpha_p = alpha[2];
778   alpha_d = alpha[3];
779 
780   /* X = X - alpha * Y */
781   ierr = VecGetArrayWrite(X,&Xarr);CHKERRQ(ierr);
782   ierr = VecGetArrayRead(Y,&dXarr);CHKERRQ(ierr);
783   for (i=0; i<pdipm->nx; i++) Xarr[i] -= alpha_p * dXarr[i];
784   for (i=0; i<pdipm->nce; i++) Xarr[i+pdipm->off_lambdae] -= alpha_d * dXarr[i+pdipm->off_lambdae];
785 
786   for (i=0; i<pdipm->nci; i++) {
787     Xarr[i+pdipm->off_lambdai] -= alpha_d * dXarr[i+pdipm->off_lambdai];
788     Xarr[i+pdipm->off_z]       -= alpha_p * dXarr[i+pdipm->off_z];
789   }
790   ierr = VecGetArrayWrite(tao->solution,&taosolarr);CHKERRQ(ierr);
791   ierr = PetscMemcpy(taosolarr,Xarr,pdipm->nx*sizeof(PetscScalar));CHKERRQ(ierr);
792   ierr = VecRestoreArrayWrite(tao->solution,&taosolarr);CHKERRQ(ierr);
793 
794   ierr = VecRestoreArrayWrite(X,&Xarr);CHKERRQ(ierr);
795   ierr = VecRestoreArrayRead(Y,&dXarr);CHKERRQ(ierr);
796 
797   /* Update mu = mu_update_factor * dot(z,lambdai)/pdipm->nci at updated X */
798   if (pdipm->z) {
799     ierr = VecDot(pdipm->z,pdipm->lambdai,&dot);CHKERRQ(ierr);
800   } else dot = 0.0;
801 
802   /* if (PetscAbsReal(pdipm->gradL) < 0.9*pdipm->mu)  */
803   pdipm->mu = pdipm->mu_update_factor * dot/pdipm->Nci;
804 
805   /* Update F; get tao->residual and tao->cnorm */
806   ierr = TaoSNESFunction_PDIPM_residual(snes,X,F,(void*)tao);CHKERRQ(ierr);
807 
808   tao->niter++;
809   ierr = TaoLogConvergenceHistory(tao,pdipm->obj,tao->residual,tao->cnorm,tao->niter);CHKERRQ(ierr);
810   ierr = TaoMonitor(tao,tao->niter,pdipm->obj,tao->residual,tao->cnorm,pdipm->mu);CHKERRQ(ierr);
811 
812   ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
813   if (tao->reason) {
814     ierr = SNESSetConvergedReason(snes,SNES_CONVERGED_FNORM_ABS);CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 /*
820    TaoSolve_PDIPM
821 
822    Input Parameter:
823    tao - TAO context
824 
825    Output Parameter:
826    tao - TAO context
827 */
828 PetscErrorCode TaoSolve_PDIPM(Tao tao)
829 {
830   PetscErrorCode     ierr;
831   TAO_PDIPM          *pdipm = (TAO_PDIPM*)tao->data;
832   SNESLineSearch     linesearch; /* SNESLineSearch context */
833   Vec                dummy;
834 
835   PetscFunctionBegin;
836   if (!tao->constraints_equality && !tao->constraints_inequality) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_ARG_NULL,"Equality and inequality constraints are not set. Either set them or switch to a different algorithm");
837 
838   /* Initialize all variables */
839   ierr = TaoPDIPMInitializeSolution(tao);CHKERRQ(ierr);
840 
841   /* Set linesearch */
842   ierr = SNESGetLineSearch(pdipm->snes,&linesearch);CHKERRQ(ierr);
843   ierr = SNESLineSearchSetType(linesearch,SNESLINESEARCHSHELL);CHKERRQ(ierr);
844   ierr = SNESLineSearchShellSetUserFunc(linesearch,SNESLineSearch_PDIPM,tao);CHKERRQ(ierr);
845   ierr = SNESLineSearchSetFromOptions(linesearch);CHKERRQ(ierr);
846 
847   tao->reason = TAO_CONTINUE_ITERATING;
848 
849   /* -tao_monitor for iteration 0 and check convergence */
850   ierr = VecDuplicate(pdipm->X,&dummy);CHKERRQ(ierr);
851   ierr = TaoSNESFunction_PDIPM_residual(pdipm->snes,pdipm->X,dummy,(void*)tao);CHKERRQ(ierr);
852 
853   ierr = TaoLogConvergenceHistory(tao,pdipm->obj,tao->residual,tao->cnorm,tao->niter);CHKERRQ(ierr);
854   ierr = TaoMonitor(tao,tao->niter,pdipm->obj,tao->residual,tao->cnorm,pdipm->mu);CHKERRQ(ierr);
855   ierr = VecDestroy(&dummy);CHKERRQ(ierr);
856   ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
857   if (tao->reason) {
858     ierr = SNESSetConvergedReason(pdipm->snes,SNES_CONVERGED_FNORM_ABS);CHKERRQ(ierr);
859   }
860 
861   while (tao->reason == TAO_CONTINUE_ITERATING) {
862     SNESConvergedReason reason;
863     ierr = SNESSolve(pdipm->snes,NULL,pdipm->X);CHKERRQ(ierr);
864 
865     /* Check SNES convergence */
866     ierr = SNESGetConvergedReason(pdipm->snes,&reason);CHKERRQ(ierr);
867     if (reason < 0) {
868       ierr = PetscPrintf(PetscObjectComm((PetscObject)pdipm->snes),"SNES solve did not converged due to reason %D\n",reason);CHKERRQ(ierr);
869     }
870 
871     /* Check TAO convergence */
872     if (PetscIsInfOrNanReal(pdipm->obj)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"User-provided compute function generated Inf or NaN");
873   }
874   PetscFunctionReturn(0);
875 }
876 
877 /*
878   TaoView_PDIPM - View PDIPM
879 
880    Input Parameter:
881     tao - TAO object
882     viewer - PetscViewer
883 
884    Output:
885 */
886 PetscErrorCode TaoView_PDIPM(Tao tao,PetscViewer viewer)
887 {
888   TAO_PDIPM      *pdipm = (TAO_PDIPM *)tao->data;
889   PetscErrorCode ierr;
890 
891   PetscFunctionBegin;
892   tao->constrained = PETSC_TRUE;
893   ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
894   ierr = PetscViewerASCIIPrintf(viewer,"Number of prime=%D, Number of dual=%D\n",pdipm->Nx+pdipm->Nci,pdipm->Nce + pdipm->Nci);CHKERRQ(ierr);
895   if (pdipm->kkt_pd) {
896     ierr = PetscViewerASCIIPrintf(viewer,"KKT shifts deltaw=%g, deltac=%g\n",(double)pdipm->deltaw,(double)pdipm->deltac);CHKERRQ(ierr);
897   }
898   ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 /*
903    TaoSetup_PDIPM - Sets up tao and pdipm
904 
905    Input Parameter:
906    tao - TAO object
907 
908    Output:   pdipm - initialized object
909 */
910 PetscErrorCode TaoSetup_PDIPM(Tao tao)
911 {
912   TAO_PDIPM         *pdipm = (TAO_PDIPM*)tao->data;
913   PetscErrorCode    ierr;
914   MPI_Comm          comm;
915   PetscMPIInt       size;
916   PetscInt          row,col,Jcrstart,Jcrend,k,tmp,nc,proc,*nh_all,*ng_all;
917   PetscInt          offset,*xa,*xb,i,j,rstart,rend;
918   PetscScalar       one=1.0,neg_one=-1.0;
919   const PetscInt    *cols,*rranges,*cranges,*aj,*ranges;
920   const PetscScalar *aa,*Xarr;
921   Mat               J,jac_equality_trans,jac_inequality_trans;
922   Mat               Jce_xfixed_trans,Jci_xb_trans;
923   PetscInt          *dnz,*onz,rjstart,nx_all,*nce_all,*Jranges,cols1[2];
924 
925   PetscFunctionBegin;
926   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
928 
929   /* (1) Setup Bounds and create Tao vectors */
930   ierr = TaoPDIPMSetUpBounds(tao);CHKERRQ(ierr);
931 
932   if (!tao->gradient) {
933     ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr);
934     ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr);
935   }
936 
937   /* (2) Get sizes */
938   /* Size of vector x - This is set by TaoSetInitialVector */
939   ierr = VecGetSize(tao->solution,&pdipm->Nx);CHKERRQ(ierr);
940   ierr = VecGetLocalSize(tao->solution,&pdipm->nx);CHKERRQ(ierr);
941 
942   /* Size of equality constraints and vectors */
943   if (tao->constraints_equality) {
944     ierr = VecGetSize(tao->constraints_equality,&pdipm->Ng);CHKERRQ(ierr);
945     ierr = VecGetLocalSize(tao->constraints_equality,&pdipm->ng);CHKERRQ(ierr);
946   } else {
947     pdipm->ng = pdipm->Ng = 0;
948   }
949 
950   pdipm->nce = pdipm->ng + pdipm->nxfixed;
951   pdipm->Nce = pdipm->Ng + pdipm->Nxfixed;
952 
953   /* Size of inequality constraints and vectors */
954   if (tao->constraints_inequality) {
955     ierr = VecGetSize(tao->constraints_inequality,&pdipm->Nh);CHKERRQ(ierr);
956     ierr = VecGetLocalSize(tao->constraints_inequality,&pdipm->nh);CHKERRQ(ierr);
957   } else {
958     pdipm->nh = pdipm->Nh = 0;
959   }
960 
961   pdipm->nci = pdipm->nh + pdipm->nxlb + pdipm->nxub + 2*pdipm->nxbox;
962   pdipm->Nci = pdipm->Nh + pdipm->Nxlb + pdipm->Nxub + 2*pdipm->Nxbox;
963 
964   /* Full size of the KKT system to be solved */
965   pdipm->n = pdipm->nx + pdipm->nce + 2*pdipm->nci;
966   pdipm->N = pdipm->Nx + pdipm->Nce + 2*pdipm->Nci;
967 
968   /* (3) Offsets for subvectors */
969   pdipm->off_lambdae = pdipm->nx;
970   pdipm->off_lambdai = pdipm->off_lambdae + pdipm->nce;
971   pdipm->off_z       = pdipm->off_lambdai + pdipm->nci;
972 
973   /* (4) Create vectors and subvectors */
974   /* Ce and Ci vectors */
975   ierr = VecCreate(comm,&pdipm->ce);CHKERRQ(ierr);
976   ierr = VecSetSizes(pdipm->ce,pdipm->nce,pdipm->Nce);CHKERRQ(ierr);
977   ierr = VecSetFromOptions(pdipm->ce);CHKERRQ(ierr);
978 
979   ierr = VecCreate(comm,&pdipm->ci);CHKERRQ(ierr);
980   ierr = VecSetSizes(pdipm->ci,pdipm->nci,pdipm->Nci);CHKERRQ(ierr);
981   ierr = VecSetFromOptions(pdipm->ci);CHKERRQ(ierr);
982 
983   /* X=[x; lambdae; lambdai; z] for the big KKT system */
984   ierr = VecCreate(comm,&pdipm->X);CHKERRQ(ierr);
985   ierr = VecSetSizes(pdipm->X,pdipm->n,pdipm->N);CHKERRQ(ierr);
986   ierr = VecSetFromOptions(pdipm->X);CHKERRQ(ierr);
987 
988   /* Subvectors; they share local arrays with X */
989   ierr = VecGetArrayRead(pdipm->X,&Xarr);CHKERRQ(ierr);
990   /* x shares local array with X.x */
991   if (pdipm->Nx) {
992     ierr = VecCreateMPIWithArray(comm,1,pdipm->nx,pdipm->Nx,Xarr,&pdipm->x);CHKERRQ(ierr);
993   }
994 
995   /* lambdae shares local array with X.lambdae */
996   if (pdipm->Nce) {
997     ierr = VecCreateMPIWithArray(comm,1,pdipm->nce,pdipm->Nce,Xarr+pdipm->off_lambdae,&pdipm->lambdae);CHKERRQ(ierr);
998   }
999 
1000   /* tao->DE shares local array with X.lambdae_g */
1001   if (pdipm->Ng) {
1002     ierr = VecCreateMPIWithArray(comm,1,pdipm->ng,pdipm->Ng,Xarr+pdipm->off_lambdae,&tao->DE);CHKERRQ(ierr);
1003 
1004     ierr = VecCreate(comm,&pdipm->lambdae_xfixed);CHKERRQ(ierr);
1005     ierr = VecSetSizes(pdipm->lambdae_xfixed,pdipm->nxfixed,PETSC_DECIDE);CHKERRQ(ierr);
1006     ierr = VecSetFromOptions(pdipm->lambdae_xfixed);CHKERRQ(ierr);
1007   }
1008 
1009   if (pdipm->Nci) {
1010     /* lambdai shares local array with X.lambdai */
1011     ierr = VecCreateMPIWithArray(comm,1,pdipm->nci,pdipm->Nci,Xarr+pdipm->off_lambdai,&pdipm->lambdai);CHKERRQ(ierr);
1012 
1013     /* z for slack variables; it shares local array with X.z */
1014     ierr = VecCreateMPIWithArray(comm,1,pdipm->nci,pdipm->Nci,Xarr+pdipm->off_z,&pdipm->z);CHKERRQ(ierr);
1015   }
1016 
1017   /* tao->DI which shares local array with X.lambdai_h */
1018   if (pdipm->Nh) {
1019     ierr = VecCreateMPIWithArray(comm,1,pdipm->nh,pdipm->Nh,Xarr+pdipm->off_lambdai,&tao->DI);CHKERRQ(ierr);
1020   }
1021   ierr = VecCreate(comm,&pdipm->lambdai_xb);CHKERRQ(ierr);
1022   ierr = VecSetSizes(pdipm->lambdai_xb,(pdipm->nci - pdipm->nh),PETSC_DECIDE);CHKERRQ(ierr);
1023   ierr = VecSetFromOptions(pdipm->lambdai_xb);CHKERRQ(ierr);
1024 
1025   ierr = VecRestoreArrayRead(pdipm->X,&Xarr);CHKERRQ(ierr);
1026 
1027   /* (5) Create Jacobians Jce_xfixed and Jci */
1028   /* (5.1) PDIPM Jacobian of equality bounds cebound(x) = J_nxfixed */
1029   if (pdipm->Nxfixed) {
1030     /* Create Jce_xfixed */
1031     ierr = MatCreate(comm,&pdipm->Jce_xfixed);CHKERRQ(ierr);
1032     ierr = MatSetSizes(pdipm->Jce_xfixed,pdipm->nxfixed,pdipm->nx,PETSC_DECIDE,pdipm->Nx);CHKERRQ(ierr);
1033     ierr = MatSetFromOptions(pdipm->Jce_xfixed);CHKERRQ(ierr);
1034     ierr = MatSeqAIJSetPreallocation(pdipm->Jce_xfixed,1,NULL);CHKERRQ(ierr);
1035     ierr = MatMPIAIJSetPreallocation(pdipm->Jce_xfixed,1,NULL,1,NULL);CHKERRQ(ierr);
1036 
1037     ierr = MatGetOwnershipRange(pdipm->Jce_xfixed,&Jcrstart,&Jcrend);CHKERRQ(ierr);
1038     ierr = ISGetIndices(pdipm->isxfixed,&cols);CHKERRQ(ierr);
1039     k = 0;
1040     for (row = Jcrstart; row < Jcrend; row++) {
1041       ierr = MatSetValues(pdipm->Jce_xfixed,1,&row,1,cols+k,&one,INSERT_VALUES);CHKERRQ(ierr);
1042       k++;
1043     }
1044     ierr = ISRestoreIndices(pdipm->isxfixed, &cols);CHKERRQ(ierr);
1045     ierr = MatAssemblyBegin(pdipm->Jce_xfixed,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1046     ierr = MatAssemblyEnd(pdipm->Jce_xfixed,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1047   }
1048 
1049   /* (5.2) PDIPM inequality Jacobian Jci = [tao->jacobian_inequality; ...] */
1050   ierr = MatCreate(comm,&pdipm->Jci_xb);CHKERRQ(ierr);
1051   ierr = MatSetSizes(pdipm->Jci_xb,pdipm->nci-pdipm->nh,pdipm->nx,PETSC_DECIDE,pdipm->Nx);CHKERRQ(ierr);
1052   ierr = MatSetFromOptions(pdipm->Jci_xb);CHKERRQ(ierr);
1053   ierr = MatSeqAIJSetPreallocation(pdipm->Jci_xb,1,NULL);CHKERRQ(ierr);
1054   ierr = MatMPIAIJSetPreallocation(pdipm->Jci_xb,1,NULL,1,NULL);CHKERRQ(ierr);
1055 
1056   ierr = MatGetOwnershipRange(pdipm->Jci_xb,&Jcrstart,&Jcrend);CHKERRQ(ierr);
1057   offset = Jcrstart;
1058   if (pdipm->Nxub) {
1059     /* Add xub to Jci_xb */
1060     ierr = ISGetIndices(pdipm->isxub,&cols);CHKERRQ(ierr);
1061     k = 0;
1062     for (row = offset; row < offset + pdipm->nxub; row++) {
1063       ierr = MatSetValues(pdipm->Jci_xb,1,&row,1,cols+k,&neg_one,INSERT_VALUES);CHKERRQ(ierr);
1064       k++;
1065     }
1066     ierr = ISRestoreIndices(pdipm->isxub, &cols);CHKERRQ(ierr);
1067   }
1068 
1069   if (pdipm->Nxlb) {
1070     /* Add xlb to Jci_xb */
1071     ierr = ISGetIndices(pdipm->isxlb,&cols);CHKERRQ(ierr);
1072     k = 0;
1073     offset += pdipm->nxub;
1074     for (row = offset; row < offset + pdipm->nxlb; row++) {
1075       ierr = MatSetValues(pdipm->Jci_xb,1,&row,1,cols+k,&one,INSERT_VALUES);CHKERRQ(ierr);
1076       k++;
1077     }
1078     ierr = ISRestoreIndices(pdipm->isxlb, &cols);CHKERRQ(ierr);
1079   }
1080 
1081   /* Add xbox to Jci_xb */
1082   if (pdipm->Nxbox) {
1083     ierr = ISGetIndices(pdipm->isxbox,&cols);CHKERRQ(ierr);
1084     k = 0;
1085     offset += pdipm->nxlb;
1086     for (row = offset; row < offset + pdipm->nxbox; row++) {
1087       ierr = MatSetValues(pdipm->Jci_xb,1,&row,1,cols+k,&neg_one,INSERT_VALUES);CHKERRQ(ierr);
1088       tmp = row + pdipm->nxbox;
1089       ierr = MatSetValues(pdipm->Jci_xb,1,&tmp,1,cols+k,&one,INSERT_VALUES);CHKERRQ(ierr);
1090       k++;
1091     }
1092     ierr = ISRestoreIndices(pdipm->isxbox, &cols);CHKERRQ(ierr);
1093   }
1094 
1095   ierr = MatAssemblyBegin(pdipm->Jci_xb,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1096   ierr = MatAssemblyEnd(pdipm->Jci_xb,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1097   /* ierr = MatView(pdipm->Jci_xb,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */
1098 
1099   /* (6) Set up ISs for PC Fieldsplit */
1100   if (pdipm->solve_reduced_kkt) {
1101     ierr = PetscMalloc2(pdipm->nx+pdipm->nce,&xa,2*pdipm->nci,&xb);CHKERRQ(ierr);
1102     for (i=0; i < pdipm->nx + pdipm->nce; i++) xa[i] = i;
1103     for (i=0; i < 2*pdipm->nci; i++) xb[i] = pdipm->off_lambdai + i;
1104 
1105     ierr = ISCreateGeneral(comm,pdipm->nx+pdipm->nce,xa,PETSC_OWN_POINTER,&pdipm->is1);CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,2*pdipm->nci,xb,PETSC_OWN_POINTER,&pdipm->is2);CHKERRQ(ierr);
1107   }
1108 
1109   /* (7) Gather offsets from all processes */
1110   ierr = PetscMalloc1(size,&pdipm->nce_all);CHKERRQ(ierr);
1111 
1112   /* Get rstart of KKT matrix */
1113   ierr = MPI_Scan(&pdipm->n,&rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
1114   rstart -= pdipm->n;
1115 
1116   ierr = MPI_Allgather(&pdipm->nce,1,MPIU_INT,pdipm->nce_all,1,MPIU_INT,comm);CHKERRMPI(ierr);
1117 
1118   ierr = PetscMalloc3(size,&ng_all,size,&nh_all,size,&Jranges);CHKERRQ(ierr);
1119   ierr = MPI_Allgather(&rstart,1,MPIU_INT,Jranges,1,MPIU_INT,comm);CHKERRMPI(ierr);
1120   ierr = MPI_Allgather(&pdipm->nh,1,MPIU_INT,nh_all,1,MPIU_INT,comm);CHKERRMPI(ierr);
1121   ierr = MPI_Allgather(&pdipm->ng,1,MPIU_INT,ng_all,1,MPIU_INT,comm);CHKERRMPI(ierr);
1122 
1123   ierr = MatGetOwnershipRanges(tao->hessian,&rranges);CHKERRQ(ierr);
1124   ierr = MatGetOwnershipRangesColumn(tao->hessian,&cranges);CHKERRQ(ierr);
1125 
1126   if (pdipm->Ng) {
1127     ierr = TaoComputeJacobianEquality(tao,tao->solution,tao->jacobian_equality,tao->jacobian_equality_pre);CHKERRQ(ierr);
1128     ierr = MatTranspose(tao->jacobian_equality,MAT_INITIAL_MATRIX,&pdipm->jac_equality_trans);CHKERRQ(ierr);
1129   }
1130   if (pdipm->Nh) {
1131     ierr = TaoComputeJacobianInequality(tao,tao->solution,tao->jacobian_inequality,tao->jacobian_inequality_pre);CHKERRQ(ierr);
1132     ierr = MatTranspose(tao->jacobian_inequality,MAT_INITIAL_MATRIX,&pdipm->jac_inequality_trans);CHKERRQ(ierr);
1133   }
1134 
1135   /* Count dnz,onz for preallocation of KKT matrix */
1136   jac_equality_trans   = pdipm->jac_equality_trans;
1137   jac_inequality_trans = pdipm->jac_inequality_trans;
1138   nce_all = pdipm->nce_all;
1139 
1140   if (pdipm->Nxfixed) {
1141     ierr = MatTranspose(pdipm->Jce_xfixed,MAT_INITIAL_MATRIX,&Jce_xfixed_trans);CHKERRQ(ierr);
1142   }
1143   ierr = MatTranspose(pdipm->Jci_xb,MAT_INITIAL_MATRIX,&Jci_xb_trans);CHKERRQ(ierr);
1144 
1145   ierr = MatPreallocateInitialize(comm,pdipm->n,pdipm->n,dnz,onz);CHKERRQ(ierr);
1146 
1147   /* 1st row block of KKT matrix: [Wxx; gradCe'; -gradCi'; 0] */
1148   ierr = TaoPDIPMEvaluateFunctionsAndJacobians(tao,pdipm->x);CHKERRQ(ierr);
1149   ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr);
1150 
1151   /* Insert tao->hessian */
1152   ierr = MatGetOwnershipRange(tao->hessian,&rjstart,NULL);CHKERRQ(ierr);
1153   for (i=0; i<pdipm->nx; i++) {
1154     row = rstart + i;
1155 
1156     ierr = MatGetRow(tao->hessian,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1157     proc = 0;
1158     for (j=0; j < nc; j++) {
1159       while (aj[j] >= cranges[proc+1]) proc++;
1160       col = aj[j] - cranges[proc] + Jranges[proc];
1161       ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr);
1162     }
1163     ierr = MatRestoreRow(tao->hessian,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1164 
1165     if (pdipm->ng) {
1166       /* Insert grad g' */
1167       ierr = MatGetRow(jac_equality_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1168       ierr = MatGetOwnershipRanges(tao->jacobian_equality,&ranges);CHKERRQ(ierr);
1169       proc = 0;
1170       for (j=0; j < nc; j++) {
1171         /* find row ownership of */
1172         while (aj[j] >= ranges[proc+1]) proc++;
1173         nx_all = rranges[proc+1] - rranges[proc];
1174         col = aj[j] - ranges[proc] + Jranges[proc] + nx_all;
1175         ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr);
1176       }
1177       ierr = MatRestoreRow(jac_equality_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1178     }
1179 
1180     /* Insert Jce_xfixed^T' */
1181     if (pdipm->nxfixed) {
1182       ierr = MatGetRow(Jce_xfixed_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1183       ierr = MatGetOwnershipRanges(pdipm->Jce_xfixed,&ranges);CHKERRQ(ierr);
1184       proc = 0;
1185       for (j=0; j < nc; j++) {
1186         /* find row ownership of */
1187         while (aj[j] >= ranges[proc+1]) proc++;
1188         nx_all = rranges[proc+1] - rranges[proc];
1189         col = aj[j] - ranges[proc] + Jranges[proc] + nx_all + ng_all[proc];
1190         ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr);
1191       }
1192       ierr = MatRestoreRow(Jce_xfixed_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1193     }
1194 
1195     if (pdipm->nh) {
1196       /* Insert -grad h' */
1197       ierr = MatGetRow(jac_inequality_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1198       ierr = MatGetOwnershipRanges(tao->jacobian_inequality,&ranges);CHKERRQ(ierr);
1199       proc = 0;
1200       for (j=0; j < nc; j++) {
1201         /* find row ownership of */
1202         while (aj[j] >= ranges[proc+1]) proc++;
1203         nx_all = rranges[proc+1] - rranges[proc];
1204         col = aj[j] - ranges[proc] + Jranges[proc] + nx_all + nce_all[proc];
1205         ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr);
1206       }
1207       ierr = MatRestoreRow(jac_inequality_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1208     }
1209 
1210     /* Insert Jci_xb^T' */
1211     ierr = MatGetRow(Jci_xb_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1212     ierr = MatGetOwnershipRanges(pdipm->Jci_xb,&ranges);CHKERRQ(ierr);
1213     proc = 0;
1214     for (j=0; j < nc; j++) {
1215       /* find row ownership of */
1216       while (aj[j] >= ranges[proc+1]) proc++;
1217       nx_all = rranges[proc+1] - rranges[proc];
1218       col = aj[j] - ranges[proc] + Jranges[proc] + nx_all + nce_all[proc] + nh_all[proc];
1219       ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr);
1220     }
1221     ierr = MatRestoreRow(Jci_xb_trans,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1222   }
1223 
1224   /* 2nd Row block of KKT matrix: [grad Ce, deltac*I, 0, 0] */
1225   if (pdipm->Ng) {
1226     ierr = MatGetOwnershipRange(tao->jacobian_equality,&rjstart,NULL);CHKERRQ(ierr);
1227     for (i=0; i < pdipm->ng; i++) {
1228       row = rstart + pdipm->off_lambdae + i;
1229 
1230       ierr = MatGetRow(tao->jacobian_equality,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1231       proc = 0;
1232       for (j=0; j < nc; j++) {
1233         while (aj[j] >= cranges[proc+1]) proc++;
1234         col = aj[j] - cranges[proc] + Jranges[proc];
1235         ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); /* grad g */
1236       }
1237       ierr = MatRestoreRow(tao->jacobian_equality,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1238     }
1239   }
1240   /* Jce_xfixed */
1241   if (pdipm->Nxfixed) {
1242     ierr = MatGetOwnershipRange(pdipm->Jce_xfixed,&Jcrstart,NULL);CHKERRQ(ierr);
1243     for (i=0; i < (pdipm->nce - pdipm->ng); i++) {
1244       row = rstart + pdipm->off_lambdae + pdipm->ng + i;
1245 
1246       ierr = MatGetRow(pdipm->Jce_xfixed,i+Jcrstart,&nc,&cols,NULL);CHKERRQ(ierr);
1247       if (nc != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"nc != 1");
1248 
1249       proc = 0;
1250       j    = 0;
1251       while (cols[j] >= cranges[proc+1]) proc++;
1252       col = cols[j] - cranges[proc] + Jranges[proc];
1253       ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr);
1254       ierr = MatRestoreRow(pdipm->Jce_xfixed,i+Jcrstart,&nc,&cols,NULL);CHKERRQ(ierr);
1255     }
1256   }
1257 
1258   /* 3rd Row block of KKT matrix: [ gradCi, 0, deltac*I, -I] */
1259   if (pdipm->Nh) {
1260     ierr = MatGetOwnershipRange(tao->jacobian_inequality,&rjstart,NULL);CHKERRQ(ierr);
1261     for (i=0; i < pdipm->nh; i++) {
1262       row = rstart + pdipm->off_lambdai + i;
1263 
1264       ierr = MatGetRow(tao->jacobian_inequality,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1265       proc = 0;
1266       for (j=0; j < nc; j++) {
1267         while (aj[j] >= cranges[proc+1]) proc++;
1268         col = aj[j] - cranges[proc] + Jranges[proc];
1269         ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr); /* grad h */
1270       }
1271       ierr = MatRestoreRow(tao->jacobian_inequality,i+rjstart,&nc,&aj,NULL);CHKERRQ(ierr);
1272     }
1273     /* I */
1274     for (i=0; i < pdipm->nh; i++) {
1275       row = rstart + pdipm->off_lambdai + i;
1276       col = rstart + pdipm->off_z + i;
1277       ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr);
1278     }
1279   }
1280 
1281   /* Jci_xb */
1282   ierr = MatGetOwnershipRange(pdipm->Jci_xb,&Jcrstart,NULL);CHKERRQ(ierr);
1283   for (i=0; i < (pdipm->nci - pdipm->nh); i++) {
1284     row = rstart + pdipm->off_lambdai + pdipm->nh + i;
1285 
1286     ierr = MatGetRow(pdipm->Jci_xb,i+Jcrstart,&nc,&cols,NULL);CHKERRQ(ierr);
1287     if (nc != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"nc != 1");
1288     proc = 0;
1289     for (j=0; j < nc; j++) {
1290       while (cols[j] >= cranges[proc+1]) proc++;
1291       col = cols[j] - cranges[proc] + Jranges[proc];
1292       ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr);
1293     }
1294     ierr = MatRestoreRow(pdipm->Jci_xb,i+Jcrstart,&nc,&cols,NULL);CHKERRQ(ierr);
1295     /* I */
1296     col = rstart + pdipm->off_z + pdipm->nh + i;
1297     ierr = MatPreallocateSet(row,1,&col,dnz,onz);CHKERRQ(ierr);
1298   }
1299 
1300   /* 4-th Row block of KKT matrix: Z and Ci */
1301   for (i=0; i < pdipm->nci; i++) {
1302     row     = rstart + pdipm->off_z + i;
1303     cols1[0] = rstart + pdipm->off_lambdai + i;
1304     cols1[1] = row;
1305     ierr = MatPreallocateSet(row,2,cols1,dnz,onz);CHKERRQ(ierr);
1306   }
1307 
1308   /* diagonal entry */
1309   for (i=0; i<pdipm->n; i++) dnz[i]++; /* diagonal entry */
1310 
1311   /* Create KKT matrix */
1312   ierr = MatCreate(comm,&J);CHKERRQ(ierr);
1313   ierr = MatSetSizes(J,pdipm->n,pdipm->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1314   ierr = MatSetFromOptions(J);CHKERRQ(ierr);
1315   ierr = MatSeqAIJSetPreallocation(J,0,dnz);CHKERRQ(ierr);
1316   ierr = MatMPIAIJSetPreallocation(J,0,dnz,0,onz);CHKERRQ(ierr);
1317   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
1318   pdipm->K = J;
1319 
1320   /* (8) Insert constant entries to  K */
1321   /* Set 0.0 to diagonal of K, so that the solver does not complain *about missing diagonal value */
1322   ierr = MatGetOwnershipRange(J,&rstart,&rend);CHKERRQ(ierr);
1323   for (i=rstart; i<rend; i++) {
1324     ierr = MatSetValue(J,i,i,0.0,INSERT_VALUES);CHKERRQ(ierr);
1325   }
1326   /* In case Wxx has no diagonal entries preset set diagonal to deltaw given */
1327   if (pdipm->kkt_pd) {
1328       for (i=0; i<pdipm->nh; i++) {
1329         row  = rstart + i;
1330         ierr = MatSetValue(J,row,row,pdipm->deltaw,INSERT_VALUES);CHKERRQ(ierr);
1331       }
1332   }
1333 
1334   /* Row block of K: [ grad Ce, 0, 0, 0] */
1335   if (pdipm->Nxfixed) {
1336     ierr = MatGetOwnershipRange(pdipm->Jce_xfixed,&Jcrstart,NULL);CHKERRQ(ierr);
1337     for (i=0; i < (pdipm->nce - pdipm->ng); i++) {
1338       row = rstart + pdipm->off_lambdae + pdipm->ng + i;
1339 
1340       ierr = MatGetRow(pdipm->Jce_xfixed,i+Jcrstart,&nc,&cols,&aa);CHKERRQ(ierr);
1341       proc = 0;
1342       for (j=0; j < nc; j++) {
1343         while (cols[j] >= cranges[proc+1]) proc++;
1344         col = cols[j] - cranges[proc] + Jranges[proc];
1345         ierr = MatSetValue(J,row,col,aa[j],INSERT_VALUES);CHKERRQ(ierr); /* grad Ce */
1346         ierr = MatSetValue(J,col,row,aa[j],INSERT_VALUES);CHKERRQ(ierr); /* grad Ce' */
1347       }
1348       ierr = MatRestoreRow(pdipm->Jce_xfixed,i+Jcrstart,&nc,&cols,&aa);CHKERRQ(ierr);
1349     }
1350   }
1351 
1352   /* Row block of K: [ -grad Ci, 0, 0, I] */
1353   ierr = MatGetOwnershipRange(pdipm->Jci_xb,&Jcrstart,NULL);CHKERRQ(ierr);
1354   for (i=0; i < pdipm->nci - pdipm->nh; i++) {
1355     row = rstart + pdipm->off_lambdai + pdipm->nh + i;
1356 
1357     ierr = MatGetRow(pdipm->Jci_xb,i+Jcrstart,&nc,&cols,&aa);CHKERRQ(ierr);
1358     proc = 0;
1359     for (j=0; j < nc; j++) {
1360       while (cols[j] >= cranges[proc+1]) proc++;
1361       col = cols[j] - cranges[proc] + Jranges[proc];
1362       ierr = MatSetValue(J,col,row,-aa[j],INSERT_VALUES);CHKERRQ(ierr);
1363       ierr = MatSetValue(J,row,col,-aa[j],INSERT_VALUES);CHKERRQ(ierr);
1364     }
1365     ierr = MatRestoreRow(pdipm->Jci_xb,i+Jcrstart,&nc,&cols,&aa);CHKERRQ(ierr);
1366 
1367     col = rstart + pdipm->off_z + pdipm->nh + i;
1368     ierr = MatSetValue(J,row,col,1,INSERT_VALUES);CHKERRQ(ierr);
1369   }
1370 
1371   for (i=0; i < pdipm->nh; i++) {
1372     row = rstart + pdipm->off_lambdai + i;
1373     col = rstart + pdipm->off_z + i;
1374     ierr = MatSetValue(J,row,col,1,INSERT_VALUES);CHKERRQ(ierr);
1375   }
1376 
1377   /* Row block of K: [ 0, 0, I, ...] */
1378   for (i=0; i < pdipm->nci; i++) {
1379     row = rstart + pdipm->off_z + i;
1380     col = rstart + pdipm->off_lambdai + i;
1381     ierr = MatSetValue(J,row,col,1,INSERT_VALUES);CHKERRQ(ierr);
1382   }
1383 
1384   if (pdipm->Nxfixed) {
1385     ierr = MatDestroy(&Jce_xfixed_trans);CHKERRQ(ierr);
1386   }
1387   ierr = MatDestroy(&Jci_xb_trans);CHKERRQ(ierr);
1388   ierr = PetscFree3(ng_all,nh_all,Jranges);CHKERRQ(ierr);
1389 
1390   /* (9) Set up nonlinear solver SNES */
1391   ierr = SNESSetFunction(pdipm->snes,NULL,TaoSNESFunction_PDIPM,(void*)tao);CHKERRQ(ierr);
1392   ierr = SNESSetJacobian(pdipm->snes,J,J,TaoSNESJacobian_PDIPM,(void*)tao);CHKERRQ(ierr);
1393 
1394   if (pdipm->solve_reduced_kkt) {
1395     PC pc;
1396     ierr = KSPGetPC(tao->ksp,&pc);CHKERRQ(ierr);
1397     ierr = PCSetType(pc,PCFIELDSPLIT);CHKERRQ(ierr);
1398     ierr = PCFieldSplitSetType(pc,PC_COMPOSITE_SCHUR);CHKERRQ(ierr);
1399     ierr = PCFieldSplitSetIS(pc,"2",pdipm->is2);CHKERRQ(ierr);
1400     ierr = PCFieldSplitSetIS(pc,"1",pdipm->is1);CHKERRQ(ierr);
1401   }
1402   ierr = SNESSetFromOptions(pdipm->snes);CHKERRQ(ierr);
1403 
1404   /* (10) Setup PCPreSolve() for pdipm->solve_symmetric_kkt */
1405   if (pdipm->solve_symmetric_kkt) {
1406     KSP       ksp;
1407     PC        pc;
1408     PetscBool isCHOL;
1409     ierr = SNESGetKSP(pdipm->snes,&ksp);CHKERRQ(ierr);
1410     ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
1411     ierr = PCSetPreSolve(pc,PCPreSolve_PDIPM);CHKERRQ(ierr);
1412 
1413     ierr = PetscObjectTypeCompare((PetscObject)pc,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
1414     if (isCHOL) {
1415       Mat        Factor;
1416       PetscBool  isMUMPS;
1417       ierr = PCFactorGetMatrix(pc,&Factor);CHKERRQ(ierr);
1418       ierr = PetscObjectTypeCompare((PetscObject)Factor,"mumps",&isMUMPS);CHKERRQ(ierr);
1419       if (isMUMPS) { /* must set mumps ICNTL(13)=1 and ICNTL(24)=1 to call MatGetInertia() */
1420 #if defined(PETSC_HAVE_MUMPS)
1421         ierr = MatMumpsSetIcntl(Factor,24,1);CHKERRQ(ierr); /* detection of null pivot rows */
1422         if (size > 1) {
1423           ierr = MatMumpsSetIcntl(Factor,13,1);CHKERRQ(ierr); /* parallelism of the root node (enable ScaLAPACK) and its splitting */
1424         }
1425 #else
1426         SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_SUP,"Requires external package MUMPS");
1427 #endif
1428       }
1429     }
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 /*
1435    TaoDestroy_PDIPM - Destroys the pdipm object
1436 
1437    Input:
1438    full pdipm
1439 
1440    Output:
1441    Destroyed pdipm
1442 */
1443 PetscErrorCode TaoDestroy_PDIPM(Tao tao)
1444 {
1445   TAO_PDIPM      *pdipm = (TAO_PDIPM*)tao->data;
1446   PetscErrorCode ierr;
1447 
1448   PetscFunctionBegin;
1449   /* Freeing Vectors assocaiated with KKT (X) */
1450   ierr = VecDestroy(&pdipm->x);CHKERRQ(ierr); /* Solution x */
1451   ierr = VecDestroy(&pdipm->lambdae);CHKERRQ(ierr); /* Equality constraints lagrangian multiplier*/
1452   ierr = VecDestroy(&pdipm->lambdai);CHKERRQ(ierr); /* Inequality constraints lagrangian multiplier*/
1453   ierr = VecDestroy(&pdipm->z);CHKERRQ(ierr);       /* Slack variables */
1454   ierr = VecDestroy(&pdipm->X);CHKERRQ(ierr);       /* Big KKT system vector [x; lambdae; lambdai; z] */
1455 
1456   /* work vectors */
1457   ierr = VecDestroy(&pdipm->lambdae_xfixed);CHKERRQ(ierr);
1458   ierr = VecDestroy(&pdipm->lambdai_xb);CHKERRQ(ierr);
1459 
1460   /* Legrangian equality and inequality Vec */
1461   ierr = VecDestroy(&pdipm->ce);CHKERRQ(ierr); /* Vec of equality constraints */
1462   ierr = VecDestroy(&pdipm->ci);CHKERRQ(ierr); /* Vec of inequality constraints */
1463 
1464   /* Matrices */
1465   ierr = MatDestroy(&pdipm->Jce_xfixed);CHKERRQ(ierr);
1466   ierr = MatDestroy(&pdipm->Jci_xb);CHKERRQ(ierr); /* Jacobian of inequality constraints Jci = [tao->jacobian_inequality ; J(nxub); J(nxlb); J(nxbx)] */
1467   ierr = MatDestroy(&pdipm->K);CHKERRQ(ierr);
1468 
1469   /* Index Sets */
1470   if (pdipm->Nxub) {
1471     ierr = ISDestroy(&pdipm->isxub);CHKERRQ(ierr);    /* Finite upper bound only -inf < x < ub */
1472   }
1473 
1474   if (pdipm->Nxlb) {
1475     ierr = ISDestroy(&pdipm->isxlb);CHKERRQ(ierr);    /* Finite lower bound only  lb <= x < inf */
1476   }
1477 
1478   if (pdipm->Nxfixed) {
1479     ierr = ISDestroy(&pdipm->isxfixed);CHKERRQ(ierr); /* Fixed variables         lb =  x = ub */
1480   }
1481 
1482   if (pdipm->Nxbox) {
1483     ierr = ISDestroy(&pdipm->isxbox);CHKERRQ(ierr);   /* Boxed variables         lb <= x <= ub */
1484   }
1485 
1486   if (pdipm->Nxfree) {
1487     ierr = ISDestroy(&pdipm->isxfree);CHKERRQ(ierr);  /* Free variables        -inf <= x <= inf */
1488   }
1489 
1490   if (pdipm->solve_reduced_kkt) {
1491     ierr = ISDestroy(&pdipm->is1);CHKERRQ(ierr);
1492     ierr = ISDestroy(&pdipm->is2);CHKERRQ(ierr);
1493   }
1494 
1495   /* SNES */
1496   ierr = SNESDestroy(&pdipm->snes);CHKERRQ(ierr); /* Nonlinear solver */
1497   ierr = PetscFree(pdipm->nce_all);CHKERRQ(ierr);
1498   ierr = MatDestroy(&pdipm->jac_equality_trans);CHKERRQ(ierr);
1499   ierr = MatDestroy(&pdipm->jac_inequality_trans);CHKERRQ(ierr);
1500 
1501   /* Destroy pdipm */
1502   ierr = PetscFree(tao->data);CHKERRQ(ierr); /* Holding locations of pdipm */
1503 
1504   /* Destroy Dual */
1505   ierr = VecDestroy(&tao->DE);CHKERRQ(ierr); /* equality dual */
1506   ierr = VecDestroy(&tao->DI);CHKERRQ(ierr); /* dinequality dual */
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 PetscErrorCode TaoSetFromOptions_PDIPM(PetscOptionItems *PetscOptionsObject,Tao tao)
1511 {
1512   TAO_PDIPM      *pdipm = (TAO_PDIPM*)tao->data;
1513   PetscErrorCode ierr;
1514 
1515   PetscFunctionBegin;
1516   ierr = PetscOptionsHead(PetscOptionsObject,"PDIPM method for constrained optimization");CHKERRQ(ierr);
1517   ierr = PetscOptionsReal("-tao_pdipm_push_init_slack","parameter to push initial slack variables away from bounds",NULL,pdipm->push_init_slack,&pdipm->push_init_slack,NULL);CHKERRQ(ierr);
1518   ierr = PetscOptionsReal("-tao_pdipm_push_init_lambdai","parameter to push initial (inequality) dual variables away from bounds",NULL,pdipm->push_init_lambdai,&pdipm->push_init_lambdai,NULL);CHKERRQ(ierr);
1519   ierr = PetscOptionsBool("-tao_pdipm_solve_reduced_kkt","Solve reduced KKT system using Schur-complement",NULL,pdipm->solve_reduced_kkt,&pdipm->solve_reduced_kkt,NULL);CHKERRQ(ierr);
1520   ierr = PetscOptionsReal("-tao_pdipm_mu_update_factor","Update scalar for barrier parameter (mu) update",NULL,pdipm->mu_update_factor,&pdipm->mu_update_factor,NULL);CHKERRQ(ierr);
1521   ierr = PetscOptionsBool("-tao_pdipm_symmetric_kkt","Solve non reduced symmetric KKT system",NULL,pdipm->solve_symmetric_kkt,&pdipm->solve_symmetric_kkt,NULL);CHKERRQ(ierr);
1522   ierr = PetscOptionsBool("-tao_pdipm_kkt_shift_pd","Add shifts to make KKT matrix positive definite",NULL,pdipm->kkt_pd,&pdipm->kkt_pd,NULL);CHKERRQ(ierr);
1523   ierr = PetscOptionsTail();CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 /*MC
1528   TAOPDIPM - Barrier-based primal-dual interior point algorithm for generally constrained optimization.
1529 
1530   Option Database Keys:
1531 +   -tao_pdipm_push_init_lambdai - parameter to push initial dual variables away from bounds (> 0)
1532 .   -tao_pdipm_push_init_slack - parameter to push initial slack variables away from bounds (> 0)
1533 .   -tao_pdipm_mu_update_factor - update scalar for barrier parameter (mu) update (> 0)
1534 .   -tao_pdipm_symmetric_kkt - Solve non-reduced symmetric KKT system
1535 -   -tao_pdipm_kkt_shift_pd - Add shifts to make KKT matrix positive definite
1536 
1537   Level: beginner
1538 M*/
1539 PETSC_EXTERN PetscErrorCode TaoCreate_PDIPM(Tao tao)
1540 {
1541   TAO_PDIPM      *pdipm;
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   tao->ops->setup          = TaoSetup_PDIPM;
1546   tao->ops->solve          = TaoSolve_PDIPM;
1547   tao->ops->setfromoptions = TaoSetFromOptions_PDIPM;
1548   tao->ops->view           = TaoView_PDIPM;
1549   tao->ops->destroy        = TaoDestroy_PDIPM;
1550 
1551   ierr = PetscNewLog(tao,&pdipm);CHKERRQ(ierr);
1552   tao->data = (void*)pdipm;
1553 
1554   pdipm->nx      = pdipm->Nx      = 0;
1555   pdipm->nxfixed = pdipm->Nxfixed = 0;
1556   pdipm->nxlb    = pdipm->Nxlb    = 0;
1557   pdipm->nxub    = pdipm->Nxub    = 0;
1558   pdipm->nxbox   = pdipm->Nxbox   = 0;
1559   pdipm->nxfree  = pdipm->Nxfree  = 0;
1560 
1561   pdipm->ng = pdipm->Ng = pdipm->nce = pdipm->Nce = 0;
1562   pdipm->nh = pdipm->Nh = pdipm->nci = pdipm->Nci = 0;
1563   pdipm->n  = pdipm->N  = 0;
1564   pdipm->mu = 1.0;
1565   pdipm->mu_update_factor = 0.1;
1566 
1567   pdipm->deltaw     = 0.0;
1568   pdipm->lastdeltaw = 3*1.e-4;
1569   pdipm->deltac     = 0.0;
1570   pdipm->kkt_pd     = PETSC_FALSE;
1571 
1572   pdipm->push_init_slack     = 1.0;
1573   pdipm->push_init_lambdai   = 1.0;
1574   pdipm->solve_reduced_kkt   = PETSC_FALSE;
1575   pdipm->solve_symmetric_kkt = PETSC_TRUE;
1576 
1577   /* Override default settings (unless already changed) */
1578   if (!tao->max_it_changed) tao->max_it = 200;
1579   if (!tao->max_funcs_changed) tao->max_funcs = 500;
1580 
1581   ierr = SNESCreate(((PetscObject)tao)->comm,&pdipm->snes);CHKERRQ(ierr);
1582   ierr = SNESSetOptionsPrefix(pdipm->snes,tao->hdr.prefix);CHKERRQ(ierr);
1583   ierr = SNESGetKSP(pdipm->snes,&tao->ksp);CHKERRQ(ierr);
1584   ierr = PetscObjectReference((PetscObject)tao->ksp);CHKERRQ(ierr);
1585   ierr = KSPSetApplicationContext(tao->ksp,(void *)tao);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588