xref: /petsc/src/tao/pde_constrained/tutorials/elliptic.c (revision c3b5f7ba6bc5ce25a01a67bb37ba5d34b02bbbd7)
1 #include <petsc/private/taoimpl.h>
2 
3 typedef struct {
4   PetscInt n; /* Number of total variables */
5   PetscInt m; /* Number of constraints */
6   PetscInt nstate;
7   PetscInt ndesign;
8   PetscInt mx; /* grid points in each direction */
9   PetscInt ns; /* Number of data samples (1<=ns<=8)
10                   Currently only ns=1 is supported */
11   PetscInt ndata; /* Number of data points per sample */
12   IS       s_is;
13   IS       d_is;
14 
15   VecScatter state_scatter;
16   VecScatter design_scatter;
17   VecScatter *yi_scatter, *di_scatter;
18   Vec        suby,subq,subd;
19   Mat        Js,Jd,JsPrec,JsInv,JsBlock;
20 
21   PetscReal alpha; /* Regularization parameter */
22   PetscReal beta; /* Weight attributed to ||u||^2 in regularization functional */
23   PetscReal noise; /* Amount of noise to add to data */
24   PetscReal *ones;
25   Mat       Q;
26   Mat       MQ;
27   Mat       L;
28 
29   Mat Grad;
30   Mat Av,Avwork;
31   Mat Div, Divwork;
32   Mat DSG;
33   Mat Diag,Ones;
34 
35   Vec q;
36   Vec ur; /* reference */
37 
38   Vec d;
39   Vec dwork;
40 
41   Vec x; /* super vec of y,u */
42 
43   Vec y; /* state variables */
44   Vec ywork;
45 
46   Vec ytrue;
47 
48   Vec u; /* design variables */
49   Vec uwork;
50 
51   Vec utrue;
52 
53   Vec js_diag;
54 
55   Vec c; /* constraint vector */
56   Vec cwork;
57 
58   Vec lwork;
59   Vec S;
60   Vec Swork,Twork,Sdiag,Ywork;
61   Vec Av_u;
62 
63   KSP solver;
64   PC  prec;
65 
66   PetscReal tola,tolb,tolc,told;
67   PetscInt  ksp_its;
68   PetscInt  ksp_its_initial;
69   PetscLogStage stages[10];
70   PetscBool use_ptap;
71   PetscBool use_lrc;
72 } AppCtx;
73 
74 PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
75 PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
76 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
77 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
78 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*);
79 PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
80 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
81 PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter);
82 PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter);
83 PetscErrorCode EllipticInitialize(AppCtx*);
84 PetscErrorCode EllipticDestroy(AppCtx*);
85 PetscErrorCode EllipticMonitor(Tao, void*);
86 
87 PetscErrorCode StateBlockMatMult(Mat,Vec,Vec);
88 PetscErrorCode StateMatMult(Mat,Vec,Vec);
89 
90 PetscErrorCode StateInvMatMult(Mat,Vec,Vec);
91 PetscErrorCode DesignMatMult(Mat,Vec,Vec);
92 PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);
93 
94 PetscErrorCode QMatMult(Mat,Vec,Vec);
95 PetscErrorCode QMatMultTranspose(Mat,Vec,Vec);
96 
97 static  char help[]="";
98 
99 int main(int argc, char **argv)
100 {
101   PetscErrorCode     ierr;
102   Vec                x0;
103   Tao                tao;
104   AppCtx             user;
105   PetscInt           ntests = 1;
106   PetscInt           i;
107 
108   PetscCall(PetscInitialize(&argc, &argv, (char*)0,help));
109   user.mx = 8;
110   ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"elliptic example",NULL);PetscCall(ierr);
111   PetscCall(PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL));
112   user.ns = 6;
113   PetscCall(PetscOptionsInt("-ns","Number of data samples (1<=ns<=8)","",user.ns,&user.ns,NULL));
114   user.ndata = 64;
115   PetscCall(PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL));
116   user.alpha = 0.1;
117   PetscCall(PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL));
118   user.beta = 0.00001;
119   PetscCall(PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL));
120   user.noise = 0.01;
121   PetscCall(PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL));
122 
123   user.use_ptap = PETSC_FALSE;
124   PetscCall(PetscOptionsBool("-use_ptap","Use ptap matrix for DSG","",user.use_ptap,&user.use_ptap,NULL));
125   user.use_lrc = PETSC_FALSE;
126   PetscCall(PetscOptionsBool("-use_lrc","Use lrc matrix for Js","",user.use_lrc,&user.use_lrc,NULL));
127   PetscCall(PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL));
128   ierr = PetscOptionsEnd();PetscCall(ierr);
129 
130   user.m = user.ns*user.mx*user.mx*user.mx; /* number of constraints */
131   user.nstate =  user.m;
132   user.ndesign = user.mx*user.mx*user.mx;
133   user.n = user.nstate + user.ndesign; /* number of variables */
134 
135   /* Create TAO solver and set desired solution method */
136   PetscCall(TaoCreate(PETSC_COMM_WORLD,&tao));
137   PetscCall(TaoSetType(tao,TAOLCL));
138 
139   /* Set up initial vectors and matrices */
140   PetscCall(EllipticInitialize(&user));
141 
142   PetscCall(Gather(user.x,user.y,user.state_scatter,user.u,user.design_scatter));
143   PetscCall(VecDuplicate(user.x,&x0));
144   PetscCall(VecCopy(user.x,x0));
145 
146   /* Set solution vector with an initial guess */
147   PetscCall(TaoSetSolution(tao,user.x));
148   PetscCall(TaoSetObjective(tao, FormFunction, &user));
149   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
150   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
151 
152   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, &user));
153   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
154 
155   PetscCall(TaoSetStateDesignIS(tao,user.s_is,user.d_is));
156   PetscCall(TaoSetFromOptions(tao));
157 
158   /* SOLVE THE APPLICATION */
159   PetscCall(PetscLogStageRegister("Trials",&user.stages[1]));
160   PetscCall(PetscLogStagePush(user.stages[1]));
161   for (i=0; i<ntests; i++) {
162     PetscCall(TaoSolve(tao));
163     PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its));
164     PetscCall(VecCopy(x0,user.x));
165   }
166   PetscCall(PetscLogStagePop());
167   PetscCall(PetscBarrier((PetscObject)user.x));
168   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: "));
169   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial));
170 
171   PetscCall(TaoDestroy(&tao));
172   PetscCall(VecDestroy(&x0));
173   PetscCall(EllipticDestroy(&user));
174   PetscCall(PetscFinalize());
175   return 0;
176 }
177 /* ------------------------------------------------------------------- */
178 /*
179    dwork = Qy - d
180    lwork = L*(u-ur)
181    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
182 */
183 PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
184 {
185   PetscReal      d1=0,d2=0;
186   AppCtx         *user = (AppCtx*)ptr;
187 
188   PetscFunctionBegin;
189   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
190   PetscCall(MatMult(user->MQ,user->y,user->dwork));
191   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
192   PetscCall(VecDot(user->dwork,user->dwork,&d1));
193   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
194   PetscCall(MatMult(user->L,user->uwork,user->lwork));
195   PetscCall(VecDot(user->lwork,user->lwork,&d2));
196   *f = 0.5 * (d1 + user->alpha*d2);
197   PetscFunctionReturn(0);
198 }
199 
200 /* ------------------------------------------------------------------- */
201 /*
202     state: g_s = Q' *(Qy - d)
203     design: g_d = alpha*L'*L*(u-ur)
204 */
205 PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
206 {
207   AppCtx         *user = (AppCtx*)ptr;
208 
209   PetscFunctionBegin;
210   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
211   PetscCall(MatMult(user->MQ,user->y,user->dwork));
212   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
213   PetscCall(MatMultTranspose(user->MQ,user->dwork,user->ywork));
214   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
215   PetscCall(MatMult(user->L,user->uwork,user->lwork));
216   PetscCall(MatMultTranspose(user->L,user->lwork,user->uwork));
217   PetscCall(VecScale(user->uwork, user->alpha));
218   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
219   PetscFunctionReturn(0);
220 }
221 
222 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
223 {
224   PetscReal      d1,d2;
225   AppCtx         *user = (AppCtx*)ptr;
226 
227   PetscFunctionBegin;
228   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
229   PetscCall(MatMult(user->MQ,user->y,user->dwork));
230   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
231   PetscCall(VecDot(user->dwork,user->dwork,&d1));
232   PetscCall(MatMultTranspose(user->MQ,user->dwork,user->ywork));
233 
234   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
235   PetscCall(MatMult(user->L,user->uwork,user->lwork));
236   PetscCall(VecDot(user->lwork,user->lwork,&d2));
237   PetscCall(MatMultTranspose(user->L,user->lwork,user->uwork));
238   PetscCall(VecScale(user->uwork, user->alpha));
239   *f = 0.5 * (d1 + user->alpha*d2);
240   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
241   PetscFunctionReturn(0);
242 }
243 
244 /* ------------------------------------------------------------------- */
245 /* A
246 MatShell object
247 */
248 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
249 {
250   AppCtx         *user = (AppCtx*)ptr;
251 
252   PetscFunctionBegin;
253   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
254   /* DSG = Div * (1/Av_u) * Grad */
255   PetscCall(VecSet(user->uwork,0));
256   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
257   PetscCall(VecExp(user->uwork));
258   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
259   PetscCall(VecCopy(user->Av_u,user->Swork));
260   PetscCall(VecReciprocal(user->Swork));
261   if (user->use_ptap) {
262     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
263     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG));
264   } else {
265     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
266     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
267     PetscCall(MatProductNumeric(user->DSG));
268   }
269   PetscFunctionReturn(0);
270 }
271 /* ------------------------------------------------------------------- */
272 /* B */
273 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
274 {
275   AppCtx         *user = (AppCtx*)ptr;
276 
277   PetscFunctionBegin;
278   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
279   PetscFunctionReturn(0);
280 }
281 
282 PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y)
283 {
284   PetscReal      sum;
285   AppCtx         *user;
286 
287   PetscFunctionBegin;
288   PetscCall(MatShellGetContext(J_shell,&user));
289   PetscCall(MatMult(user->DSG,X,Y));
290   PetscCall(VecSum(X,&sum));
291   sum /= user->ndesign;
292   PetscCall(VecShift(Y,sum));
293   PetscFunctionReturn(0);
294 }
295 
296 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
297 {
298   PetscInt       i;
299   AppCtx         *user;
300 
301   PetscFunctionBegin;
302   PetscCall(MatShellGetContext(J_shell,&user));
303   if (user->ns == 1) {
304     PetscCall(MatMult(user->JsBlock,X,Y));
305   } else {
306     for (i=0;i<user->ns;i++) {
307       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
308       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
309       PetscCall(MatMult(user->JsBlock,user->subq,user->suby));
310       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
311     }
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y)
317 {
318   PetscInt       its,i;
319   AppCtx         *user;
320 
321   PetscFunctionBegin;
322   PetscCall(MatShellGetContext(J_shell,&user));
323   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->DSG));
324   if (Y == user->ytrue) {
325     /* First solve is done using true solution to set up problem */
326     PetscCall(KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
327   } else {
328     PetscCall(KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
329   }
330   if (user->ns == 1) {
331     PetscCall(KSPSolve(user->solver,X,Y));
332     PetscCall(KSPGetIterationNumber(user->solver,&its));
333     user->ksp_its+=its;
334   } else {
335     for (i=0;i<user->ns;i++) {
336       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
337       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
338       PetscCall(KSPSolve(user->solver,user->subq,user->suby));
339       PetscCall(KSPGetIterationNumber(user->solver,&its));
340       user->ksp_its+=its;
341       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
342     }
343   }
344   PetscFunctionReturn(0);
345 }
346 PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
347 {
348   AppCtx         *user;
349   PetscInt       i;
350 
351   PetscFunctionBegin;
352   PetscCall(MatShellGetContext(J_shell,&user));
353   if (user->ns == 1) {
354     PetscCall(MatMult(user->Q,X,Y));
355   } else {
356     for (i=0;i<user->ns;i++) {
357       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
358       PetscCall(Scatter(Y,user->subd,user->di_scatter[i],0,0));
359       PetscCall(MatMult(user->Q,user->subq,user->subd));
360       PetscCall(Gather(Y,user->subd,user->di_scatter[i],0,0));
361     }
362   }
363   PetscFunctionReturn(0);
364 }
365 
366 PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
367 {
368   AppCtx         *user;
369   PetscInt       i;
370 
371   PetscFunctionBegin;
372   PetscCall(MatShellGetContext(J_shell,&user));
373   if (user->ns == 1) {
374     PetscCall(MatMultTranspose(user->Q,X,Y));
375   } else {
376     for (i=0;i<user->ns;i++) {
377       PetscCall(Scatter(X,user->subd,user->di_scatter[i],0,0));
378       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
379       PetscCall(MatMultTranspose(user->Q,user->subd,user->suby));
380       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
381     }
382   }
383   PetscFunctionReturn(0);
384 }
385 
386 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
387 {
388   PetscInt       i;
389   AppCtx         *user;
390 
391   PetscFunctionBegin;
392   PetscCall(MatShellGetContext(J_shell,&user));
393 
394   /* sdiag(1./v) */
395   PetscCall(VecSet(user->uwork,0));
396   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
397   PetscCall(VecExp(user->uwork));
398 
399   /* sdiag(1./((Av*(1./v)).^2)) */
400   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
401   PetscCall(VecPointwiseMult(user->Swork,user->Swork,user->Swork));
402   PetscCall(VecReciprocal(user->Swork));
403 
404   /* (Av * (sdiag(1./v) * b)) */
405   PetscCall(VecPointwiseMult(user->uwork,user->uwork,X));
406   PetscCall(MatMult(user->Av,user->uwork,user->Twork));
407 
408   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
409   PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
410 
411   if (user->ns == 1) {
412     /* (sdiag(Grad*y(:,i)) */
413     PetscCall(MatMult(user->Grad,user->y,user->Twork));
414 
415     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
416     PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
417     PetscCall(MatMultTranspose(user->Grad,user->Swork,Y));
418   } else {
419     for (i=0;i<user->ns;i++) {
420       PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
421       PetscCall(Scatter(Y,user->subq,user->yi_scatter[i],0,0));
422 
423       PetscCall(MatMult(user->Grad,user->suby,user->Twork));
424       PetscCall(VecPointwiseMult(user->Twork,user->Twork,user->Swork));
425       PetscCall(MatMultTranspose(user->Grad,user->Twork,user->subq));
426       PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
427       PetscCall(Gather(Y,user->subq,user->yi_scatter[i],0,0));
428     }
429   }
430   PetscFunctionReturn(0);
431 }
432 
433 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
434 {
435   PetscInt       i;
436   AppCtx         *user;
437 
438   PetscFunctionBegin;
439   PetscCall(MatShellGetContext(J_shell,&user));
440   PetscCall(VecZeroEntries(Y));
441 
442   /* Sdiag = 1./((Av*(1./v)).^2) */
443   PetscCall(VecSet(user->uwork,0));
444   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
445   PetscCall(VecExp(user->uwork));
446   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
447   PetscCall(VecPointwiseMult(user->Sdiag,user->Swork,user->Swork));
448   PetscCall(VecReciprocal(user->Sdiag));
449 
450   for (i=0;i<user->ns;i++) {
451     PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
452     PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
453 
454     /* Swork = (Div' * b(:,i)) */
455     PetscCall(MatMult(user->Grad,user->subq,user->Swork));
456 
457     /* Twork = Grad*y(:,i) */
458     PetscCall(MatMult(user->Grad,user->suby,user->Twork));
459 
460     /* Twork = sdiag(Twork) * Swork */
461     PetscCall(VecPointwiseMult(user->Twork,user->Swork,user->Twork));
462 
463     /* Swork = pointwisemult(Sdiag,Twork) */
464     PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Sdiag));
465 
466     /* Ywork = Av' * Swork */
467     PetscCall(MatMultTranspose(user->Av,user->Swork,user->Ywork));
468 
469     /* Ywork = pointwisemult(uwork,Ywork) */
470     PetscCall(VecPointwiseMult(user->Ywork,user->uwork,user->Ywork));
471     PetscCall(VecAXPY(Y,1.0,user->Ywork));
472     PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
478 {
479    /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
480    PetscReal      sum;
481    PetscInt       i;
482    AppCtx         *user = (AppCtx*)ptr;
483 
484    PetscFunctionBegin;
485    PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
486    if (user->ns == 1) {
487      PetscCall(MatMult(user->Grad,user->y,user->Swork));
488      PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
489      PetscCall(MatMultTranspose(user->Grad,user->Swork,C));
490      PetscCall(VecSum(user->y,&sum));
491      sum /= user->ndesign;
492      PetscCall(VecShift(C,sum));
493    } else {
494      for (i=0;i<user->ns;i++) {
495       PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
496       PetscCall(Scatter(C,user->subq,user->yi_scatter[i],0,0));
497       PetscCall(MatMult(user->Grad,user->suby,user->Swork));
498       PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
499       PetscCall(MatMultTranspose(user->Grad,user->Swork,user->subq));
500 
501       PetscCall(VecSum(user->suby,&sum));
502       sum /= user->ndesign;
503       PetscCall(VecShift(user->subq,sum));
504 
505       PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
506       PetscCall(Gather(C,user->subq,user->yi_scatter[i],0,0));
507      }
508    }
509    PetscCall(VecAXPY(C,-1.0,user->q));
510    PetscFunctionReturn(0);
511 }
512 
513 PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
514 {
515   PetscFunctionBegin;
516   PetscCall(VecScatterBegin(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD));
517   PetscCall(VecScatterEnd(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD));
518   if (sub2) {
519     PetscCall(VecScatterBegin(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD));
520     PetscCall(VecScatterEnd(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD));
521   }
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
526 {
527   PetscFunctionBegin;
528   PetscCall(VecScatterBegin(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE));
529   PetscCall(VecScatterEnd(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE));
530   if (sub2) {
531     PetscCall(VecScatterBegin(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE));
532     PetscCall(VecScatterEnd(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE));
533   }
534   PetscFunctionReturn(0);
535 }
536 
537 PetscErrorCode EllipticInitialize(AppCtx *user)
538 {
539   PetscInt       m,n,i,j,k,l,linear_index,is,js,ks,ls,istart,iend,iblock;
540   Vec            XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork;
541   PetscReal      *x,*y,*z;
542   PetscReal      h,meanut;
543   PetscScalar    hinv,neg_hinv,half = 0.5,sqrt_beta;
544   PetscInt       im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
545   IS             is_alldesign,is_allstate;
546   IS             is_from_d;
547   IS             is_from_y;
548   PetscInt       lo,hi,hi2,lo2,ysubnlocal,dsubnlocal;
549   const PetscInt *ranges, *subranges;
550   PetscMPIInt    size;
551   PetscReal      xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
552   PetscScalar    v,vx,vy,vz;
553   PetscInt       offset,subindex,subvec,nrank,kk;
554 
555   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,
556                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,
557                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,
558                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,
559                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,
560                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,
561                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,
562                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
563 
564   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,
565                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,
566                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,
567                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,
568                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,
569                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,
570                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,
571                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
572 
573   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,
574                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,
575                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,
576                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,
577                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,
578                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,
579                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,
580                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};
581 
582   PetscFunctionBegin;
583   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size));
584   PetscCall(PetscLogStageRegister("Elliptic Setup",&user->stages[0]));
585   PetscCall(PetscLogStagePush(user->stages[0]));
586 
587   /* Create u,y,c,x */
588   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->u));
589   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->y));
590   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->c));
591   PetscCall(VecSetSizes(user->u,PETSC_DECIDE,user->ndesign));
592   PetscCall(VecSetFromOptions(user->u));
593   PetscCall(VecGetLocalSize(user->u,&ysubnlocal));
594   PetscCall(VecSetSizes(user->y,ysubnlocal*user->ns,user->nstate));
595   PetscCall(VecSetSizes(user->c,ysubnlocal*user->ns,user->m));
596   PetscCall(VecSetFromOptions(user->y));
597   PetscCall(VecSetFromOptions(user->c));
598 
599   /*
600      *******************************
601      Create scatters for x <-> y,u
602      *******************************
603 
604      If the state vector y and design vector u are partitioned as
605      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
606      then the solution vector x is organized as
607      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
608      The index sets user->s_is and user->d_is correspond to the indices of the
609      state and design variables owned by the current processor.
610   */
611   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->x));
612 
613   PetscCall(VecGetOwnershipRange(user->y,&lo,&hi));
614   PetscCall(VecGetOwnershipRange(user->u,&lo2,&hi2));
615 
616   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate));
617   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user->s_is));
618   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign));
619   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user->d_is));
620 
621   PetscCall(VecSetSizes(user->x,hi-lo+hi2-lo2,user->n));
622   PetscCall(VecSetFromOptions(user->x));
623 
624   PetscCall(VecScatterCreate(user->x,user->s_is,user->y,is_allstate,&user->state_scatter));
625   PetscCall(VecScatterCreate(user->x,user->d_is,user->u,is_alldesign,&user->design_scatter));
626   PetscCall(ISDestroy(&is_alldesign));
627   PetscCall(ISDestroy(&is_allstate));
628   /*
629      *******************************
630      Create scatter from y to y_1,y_2,...,y_ns
631      *******************************
632   */
633   PetscCall(PetscMalloc1(user->ns,&user->yi_scatter));
634   PetscCall(VecDuplicate(user->u,&user->suby));
635   PetscCall(VecDuplicate(user->u,&user->subq));
636 
637   PetscCall(VecGetOwnershipRange(user->y,&lo2,&hi2));
638   istart = 0;
639   for (i=0; i<user->ns; i++) {
640     PetscCall(VecGetOwnershipRange(user->suby,&lo,&hi));
641     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y));
642     PetscCall(VecScatterCreate(user->y,is_from_y,user->suby,NULL,&user->yi_scatter[i]));
643     istart = istart + hi-lo;
644     PetscCall(ISDestroy(&is_from_y));
645   }
646   /*
647      *******************************
648      Create scatter from d to d_1,d_2,...,d_ns
649      *******************************
650   */
651   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->subd));
652   PetscCall(VecSetSizes(user->subd,PETSC_DECIDE,user->ndata));
653   PetscCall(VecSetFromOptions(user->subd));
654   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->d));
655   PetscCall(VecGetLocalSize(user->subd,&dsubnlocal));
656   PetscCall(VecSetSizes(user->d,dsubnlocal*user->ns,user->ndata*user->ns));
657   PetscCall(VecSetFromOptions(user->d));
658   PetscCall(PetscMalloc1(user->ns,&user->di_scatter));
659 
660   PetscCall(VecGetOwnershipRange(user->d,&lo2,&hi2));
661   istart = 0;
662   for (i=0; i<user->ns; i++) {
663     PetscCall(VecGetOwnershipRange(user->subd,&lo,&hi));
664     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d));
665     PetscCall(VecScatterCreate(user->d,is_from_d,user->subd,NULL,&user->di_scatter[i]));
666     istart = istart + hi-lo;
667     PetscCall(ISDestroy(&is_from_d));
668   }
669 
670   PetscCall(PetscMalloc1(user->mx,&x));
671   PetscCall(PetscMalloc1(user->mx,&y));
672   PetscCall(PetscMalloc1(user->mx,&z));
673 
674   user->ksp_its = 0;
675   user->ksp_its_initial = 0;
676 
677   n = user->mx * user->mx * user->mx;
678   m = 3 * user->mx * user->mx * (user->mx-1);
679   sqrt_beta = PetscSqrtScalar(user->beta);
680 
681   PetscCall(VecCreate(PETSC_COMM_WORLD,&XX));
682   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->q));
683   PetscCall(VecSetSizes(XX,ysubnlocal,n));
684   PetscCall(VecSetSizes(user->q,ysubnlocal*user->ns,user->m));
685   PetscCall(VecSetFromOptions(XX));
686   PetscCall(VecSetFromOptions(user->q));
687 
688   PetscCall(VecDuplicate(XX,&YY));
689   PetscCall(VecDuplicate(XX,&ZZ));
690   PetscCall(VecDuplicate(XX,&XXwork));
691   PetscCall(VecDuplicate(XX,&YYwork));
692   PetscCall(VecDuplicate(XX,&ZZwork));
693   PetscCall(VecDuplicate(XX,&UTwork));
694   PetscCall(VecDuplicate(XX,&user->utrue));
695 
696   /* map for striding q */
697   PetscCall(VecGetOwnershipRanges(user->q,&ranges));
698   PetscCall(VecGetOwnershipRanges(user->u,&subranges));
699 
700   PetscCall(VecGetOwnershipRange(user->q,&lo2,&hi2));
701   PetscCall(VecGetOwnershipRange(user->u,&lo,&hi));
702   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
703   h = 1.0/user->mx;
704   hinv = user->mx;
705   neg_hinv = -hinv;
706 
707   PetscCall(VecGetOwnershipRange(XX,&istart,&iend));
708   for (linear_index=istart; linear_index<iend; linear_index++) {
709     i = linear_index % user->mx;
710     j = ((linear_index-i)/user->mx) % user->mx;
711     k = ((linear_index-i)/user->mx-j) / user->mx;
712     vx = h*(i+0.5);
713     vy = h*(j+0.5);
714     vz = h*(k+0.5);
715     PetscCall(VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES));
716     PetscCall(VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES));
717     PetscCall(VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES));
718     for (is=0; is<2; is++) {
719       for (js=0; js<2; js++) {
720         for (ks=0; ks<2; ks++) {
721           ls = is*4 + js*2 + ks;
722           if (ls<user->ns) {
723             l =ls*n + linear_index;
724             /* remap */
725             subindex = l%n;
726             subvec = l/n;
727             nrank=0;
728             while (subindex >= subranges[nrank+1]) nrank++;
729             offset = subindex - subranges[nrank];
730             istart=0;
731             for (kk=0;kk<nrank;kk++) istart+=user->ns*(subranges[kk+1]-subranges[kk]);
732             istart += (subranges[nrank+1]-subranges[nrank])*subvec;
733             l = istart+offset;
734             v = 100*PetscSinScalar(2*PETSC_PI*(vx+0.25*is))*PetscSinScalar(2*PETSC_PI*(vy+0.25*js))*PetscSinScalar(2*PETSC_PI*(vz+0.25*ks));
735             PetscCall(VecSetValues(user->q,1,&l,&v,INSERT_VALUES));
736           }
737         }
738       }
739     }
740   }
741 
742   PetscCall(VecAssemblyBegin(XX));
743   PetscCall(VecAssemblyEnd(XX));
744   PetscCall(VecAssemblyBegin(YY));
745   PetscCall(VecAssemblyEnd(YY));
746   PetscCall(VecAssemblyBegin(ZZ));
747   PetscCall(VecAssemblyEnd(ZZ));
748   PetscCall(VecAssemblyBegin(user->q));
749   PetscCall(VecAssemblyEnd(user->q));
750 
751   /* Compute true parameter function
752      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
753   PetscCall(VecCopy(XX,XXwork));
754   PetscCall(VecCopy(YY,YYwork));
755   PetscCall(VecCopy(ZZ,ZZwork));
756 
757   PetscCall(VecShift(XXwork,-0.25));
758   PetscCall(VecShift(YYwork,-0.25));
759   PetscCall(VecShift(ZZwork,-0.25));
760 
761   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
762   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
763   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
764 
765   PetscCall(VecCopy(XXwork,UTwork));
766   PetscCall(VecAXPY(UTwork,1.0,YYwork));
767   PetscCall(VecAXPY(UTwork,1.0,ZZwork));
768   PetscCall(VecScale(UTwork,-20.0));
769   PetscCall(VecExp(UTwork));
770   PetscCall(VecCopy(UTwork,user->utrue));
771 
772   PetscCall(VecCopy(XX,XXwork));
773   PetscCall(VecCopy(YY,YYwork));
774   PetscCall(VecCopy(ZZ,ZZwork));
775 
776   PetscCall(VecShift(XXwork,-0.75));
777   PetscCall(VecShift(YYwork,-0.75));
778   PetscCall(VecShift(ZZwork,-0.75));
779 
780   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
781   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
782   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
783 
784   PetscCall(VecCopy(XXwork,UTwork));
785   PetscCall(VecAXPY(UTwork,1.0,YYwork));
786   PetscCall(VecAXPY(UTwork,1.0,ZZwork));
787   PetscCall(VecScale(UTwork,-20.0));
788   PetscCall(VecExp(UTwork));
789 
790   PetscCall(VecAXPY(user->utrue,-1.0,UTwork));
791 
792   PetscCall(VecDestroy(&XX));
793   PetscCall(VecDestroy(&YY));
794   PetscCall(VecDestroy(&ZZ));
795   PetscCall(VecDestroy(&XXwork));
796   PetscCall(VecDestroy(&YYwork));
797   PetscCall(VecDestroy(&ZZwork));
798   PetscCall(VecDestroy(&UTwork));
799 
800   /* Initial guess and reference model */
801   PetscCall(VecDuplicate(user->utrue,&user->ur));
802   PetscCall(VecSum(user->utrue,&meanut));
803   meanut = meanut / n;
804   PetscCall(VecSet(user->ur,meanut));
805   PetscCall(VecCopy(user->ur,user->u));
806 
807   /* Generate Grad matrix */
808   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Grad));
809   PetscCall(MatSetSizes(user->Grad,PETSC_DECIDE,ysubnlocal,m,n));
810   PetscCall(MatSetFromOptions(user->Grad));
811   PetscCall(MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL));
812   PetscCall(MatSeqAIJSetPreallocation(user->Grad,2,NULL));
813   PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
814 
815   for (i=istart; i<iend; i++) {
816     if (i<m/3) {
817       iblock = i / (user->mx-1);
818       j = iblock*user->mx + (i % (user->mx-1));
819       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
820       j = j+1;
821       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
822     }
823     if (i>=m/3 && i<2*m/3) {
824       iblock = (i-m/3) / (user->mx*(user->mx-1));
825       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
826       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
827       j = j + user->mx;
828       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
829     }
830     if (i>=2*m/3) {
831       j = i-2*m/3;
832       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
833       j = j + user->mx*user->mx;
834       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
835     }
836   }
837 
838   PetscCall(MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY));
839   PetscCall(MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY));
840 
841   /* Generate arithmetic averaging matrix Av */
842   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Av));
843   PetscCall(MatSetSizes(user->Av,PETSC_DECIDE,ysubnlocal,m,n));
844   PetscCall(MatSetFromOptions(user->Av));
845   PetscCall(MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL));
846   PetscCall(MatSeqAIJSetPreallocation(user->Av,2,NULL));
847   PetscCall(MatGetOwnershipRange(user->Av,&istart,&iend));
848 
849   for (i=istart; i<iend; i++) {
850     if (i<m/3) {
851       iblock = i / (user->mx-1);
852       j = iblock*user->mx + (i % (user->mx-1));
853       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
854       j = j+1;
855       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
856     }
857     if (i>=m/3 && i<2*m/3) {
858       iblock = (i-m/3) / (user->mx*(user->mx-1));
859       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
860       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
861       j = j + user->mx;
862       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
863     }
864     if (i>=2*m/3) {
865       j = i-2*m/3;
866       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
867       j = j + user->mx*user->mx;
868       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
869     }
870   }
871 
872   PetscCall(MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY));
873   PetscCall(MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY));
874 
875   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->L));
876   PetscCall(MatSetSizes(user->L,PETSC_DECIDE,ysubnlocal,m+n,n));
877   PetscCall(MatSetFromOptions(user->L));
878   PetscCall(MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL));
879   PetscCall(MatSeqAIJSetPreallocation(user->L,2,NULL));
880   PetscCall(MatGetOwnershipRange(user->L,&istart,&iend));
881 
882   for (i=istart; i<iend; i++) {
883     if (i<m/3) {
884       iblock = i / (user->mx-1);
885       j = iblock*user->mx + (i % (user->mx-1));
886       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
887       j = j+1;
888       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
889     }
890     if (i>=m/3 && i<2*m/3) {
891       iblock = (i-m/3) / (user->mx*(user->mx-1));
892       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
893       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
894       j = j + user->mx;
895       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
896     }
897     if (i>=2*m/3 && i<m) {
898       j = i-2*m/3;
899       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
900       j = j + user->mx*user->mx;
901       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
902     }
903     if (i>=m) {
904       j = i - m;
905       PetscCall(MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES));
906     }
907   }
908   PetscCall(MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY));
909   PetscCall(MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY));
910   PetscCall(MatScale(user->L,PetscPowScalar(h,1.5)));
911 
912   /* Generate Div matrix */
913   if (!user->use_ptap) {
914     /* Generate Div matrix */
915     PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Div));
916     PetscCall(MatSetSizes(user->Div,ysubnlocal,PETSC_DECIDE,n,m));
917     PetscCall(MatSetFromOptions(user->Div));
918     PetscCall(MatMPIAIJSetPreallocation(user->Div,4,NULL,4,NULL));
919     PetscCall(MatSeqAIJSetPreallocation(user->Div,6,NULL));
920     PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
921 
922     for (i=istart; i<iend; i++) {
923       if (i<m/3) {
924         iblock = i / (user->mx-1);
925         j = iblock*user->mx + (i % (user->mx-1));
926         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
927         j = j+1;
928         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
929       }
930       if (i>=m/3 && i<2*m/3) {
931         iblock = (i-m/3) / (user->mx*(user->mx-1));
932         j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
933         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
934         j = j + user->mx;
935         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
936       }
937       if (i>=2*m/3) {
938         j = i-2*m/3;
939         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
940         j = j + user->mx*user->mx;
941         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
942       }
943     }
944 
945     PetscCall(MatAssemblyBegin(user->Div,MAT_FINAL_ASSEMBLY));
946     PetscCall(MatAssemblyEnd(user->Div,MAT_FINAL_ASSEMBLY));
947     PetscCall(MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork));
948   } else {
949     PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Diag));
950     PetscCall(MatSetSizes(user->Diag,PETSC_DECIDE,PETSC_DECIDE,m,m));
951     PetscCall(MatSetFromOptions(user->Diag));
952     PetscCall(MatMPIAIJSetPreallocation(user->Diag,1,NULL,0,NULL));
953     PetscCall(MatSeqAIJSetPreallocation(user->Diag,1,NULL));
954   }
955 
956   /* Build work vectors and matrices */
957   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->S));
958   PetscCall(VecSetSizes(user->S, PETSC_DECIDE, m));
959   PetscCall(VecSetFromOptions(user->S));
960 
961   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->lwork));
962   PetscCall(VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx));
963   PetscCall(VecSetFromOptions(user->lwork));
964 
965   PetscCall(MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork));
966 
967   PetscCall(VecDuplicate(user->S,&user->Swork));
968   PetscCall(VecDuplicate(user->S,&user->Sdiag));
969   PetscCall(VecDuplicate(user->S,&user->Av_u));
970   PetscCall(VecDuplicate(user->S,&user->Twork));
971   PetscCall(VecDuplicate(user->y,&user->ywork));
972   PetscCall(VecDuplicate(user->u,&user->Ywork));
973   PetscCall(VecDuplicate(user->u,&user->uwork));
974   PetscCall(VecDuplicate(user->u,&user->js_diag));
975   PetscCall(VecDuplicate(user->c,&user->cwork));
976   PetscCall(VecDuplicate(user->d,&user->dwork));
977 
978   /* Create a matrix-free shell user->Jd for computing B*x */
979   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal,user->nstate,user->ndesign,user,&user->Jd));
980   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult));
981   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose));
982 
983   /* Compute true state function ytrue given utrue */
984   PetscCall(VecDuplicate(user->y,&user->ytrue));
985 
986   /* First compute Av_u = Av*exp(-u) */
987   PetscCall(VecSet(user->uwork, 0));
988   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue)); /* Note: user->utrue */
989   PetscCall(VecExp(user->uwork));
990   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
991 
992   /* Next form DSG = Div*S*Grad */
993   PetscCall(VecCopy(user->Av_u,user->Swork));
994   PetscCall(VecReciprocal(user->Swork));
995   if (user->use_ptap) {
996     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
997     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG));
998   } else {
999     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1000     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
1001 
1002     PetscCall(MatMatMult(user->Divwork,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG));
1003   }
1004 
1005   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE));
1006   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1007 
1008   if (user->use_lrc == PETSC_TRUE) {
1009     v=PetscSqrtReal(1.0 /user->ndesign);
1010     PetscCall(PetscMalloc1(user->ndesign,&user->ones));
1011 
1012     for (i=0;i<user->ndesign;i++) {
1013       user->ones[i]=v;
1014     }
1015     PetscCall(MatCreateDense(PETSC_COMM_WORLD,ysubnlocal,PETSC_DECIDE,user->ndesign,1,user->ones,&user->Ones));
1016     PetscCall(MatAssemblyBegin(user->Ones, MAT_FINAL_ASSEMBLY));
1017     PetscCall(MatAssemblyEnd(user->Ones, MAT_FINAL_ASSEMBLY));
1018     PetscCall(MatCreateLRC(user->DSG,user->Ones,NULL,user->Ones,&user->JsBlock));
1019     PetscCall(MatSetUp(user->JsBlock));
1020   } else {
1021     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
1022     PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal,ysubnlocal,user->ndesign,user->ndesign,user,&user->JsBlock));
1023     PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateBlockMatMult));
1024     PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateBlockMatMult));
1025   }
1026   PetscCall(MatSetOption(user->JsBlock,MAT_SYMMETRIC,PETSC_TRUE));
1027   PetscCall(MatSetOption(user->JsBlock,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1028   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->Js));
1029   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult));
1030   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMult));
1031   PetscCall(MatSetOption(user->Js,MAT_SYMMETRIC,PETSC_TRUE));
1032   PetscCall(MatSetOption(user->Js,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1033 
1034   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->JsInv));
1035   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateInvMatMult));
1036   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateInvMatMult));
1037   PetscCall(MatSetOption(user->JsInv,MAT_SYMMETRIC,PETSC_TRUE));
1038   PetscCall(MatSetOption(user->JsInv,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1039 
1040   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE));
1041   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1042   /* Now solve for ytrue */
1043   PetscCall(KSPCreate(PETSC_COMM_WORLD,&user->solver));
1044   PetscCall(KSPSetFromOptions(user->solver));
1045 
1046   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->DSG));
1047 
1048   PetscCall(MatMult(user->JsInv,user->q,user->ytrue));
1049   /* First compute Av_u = Av*exp(-u) */
1050   PetscCall(VecSet(user->uwork,0));
1051   PetscCall(VecAXPY(user->uwork,-1.0,user->u)); /* Note: user->u */
1052   PetscCall(VecExp(user->uwork));
1053   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1054 
1055   /* Next update DSG = Div*S*Grad  with user->u */
1056   PetscCall(VecCopy(user->Av_u,user->Swork));
1057   PetscCall(VecReciprocal(user->Swork));
1058   if (user->use_ptap) {
1059     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
1060     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG));
1061   } else {
1062     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1063     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u));
1064     PetscCall(MatProductNumeric(user->DSG));
1065   }
1066 
1067   /* Now solve for y */
1068 
1069   PetscCall(MatMult(user->JsInv,user->q,user->y));
1070 
1071   user->ksp_its_initial = user->ksp_its;
1072   user->ksp_its = 0;
1073   /* Construct projection matrix Q (blocks) */
1074   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Q));
1075   PetscCall(MatSetSizes(user->Q,dsubnlocal,ysubnlocal,user->ndata,user->ndesign));
1076   PetscCall(MatSetFromOptions(user->Q));
1077   PetscCall(MatMPIAIJSetPreallocation(user->Q,8,NULL,8,NULL));
1078   PetscCall(MatSeqAIJSetPreallocation(user->Q,8,NULL));
1079 
1080   for (i=0; i<user->mx; i++) {
1081     x[i] = h*(i+0.5);
1082     y[i] = h*(i+0.5);
1083     z[i] = h*(i+0.5);
1084   }
1085   PetscCall(MatGetOwnershipRange(user->Q,&istart,&iend));
1086 
1087   nx = user->mx; ny = user->mx; nz = user->mx;
1088   for (i=istart; i<iend; i++) {
1089 
1090     xri = xr[i];
1091     im = 0;
1092     xim = x[im];
1093     while (xri>xim && im<nx) {
1094       im = im+1;
1095       xim = x[im];
1096     }
1097     indx1 = im-1;
1098     indx2 = im;
1099     dx1 = xri - x[indx1];
1100     dx2 = x[indx2] - xri;
1101 
1102     yri = yr[i];
1103     im = 0;
1104     yim = y[im];
1105     while (yri>yim && im<ny) {
1106       im = im+1;
1107       yim = y[im];
1108     }
1109     indy1 = im-1;
1110     indy2 = im;
1111     dy1 = yri - y[indy1];
1112     dy2 = y[indy2] - yri;
1113 
1114     zri = zr[i];
1115     im = 0;
1116     zim = z[im];
1117     while (zri>zim && im<nz) {
1118       im = im+1;
1119       zim = z[im];
1120     }
1121     indz1 = im-1;
1122     indz2 = im;
1123     dz1 = zri - z[indz1];
1124     dz2 = z[indz2] - zri;
1125 
1126     Dx = x[indx2] - x[indx1];
1127     Dy = y[indy2] - y[indy1];
1128     Dz = z[indz2] - z[indz1];
1129 
1130     j = indx1 + indy1*nx + indz1*nx*ny;
1131     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1132     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1133 
1134     j = indx1 + indy1*nx + indz2*nx*ny;
1135     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1136     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1137 
1138     j = indx1 + indy2*nx + indz1*nx*ny;
1139     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1140     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1141 
1142     j = indx1 + indy2*nx + indz2*nx*ny;
1143     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1144     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1145 
1146     j = indx2 + indy1*nx + indz1*nx*ny;
1147     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1148     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1149 
1150     j = indx2 + indy1*nx + indz2*nx*ny;
1151     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1152     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1153 
1154     j = indx2 + indy2*nx + indz1*nx*ny;
1155     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1156     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1157 
1158     j = indx2 + indy2*nx + indz2*nx*ny;
1159     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1160     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1161   }
1162 
1163   PetscCall(MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY));
1164   PetscCall(MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY));
1165   /* Create MQ (composed of blocks of Q */
1166   PetscCall(MatCreateShell(PETSC_COMM_WORLD,dsubnlocal*user->ns,PETSC_DECIDE,user->ndata*user->ns,user->nstate,user,&user->MQ));
1167   PetscCall(MatShellSetOperation(user->MQ,MATOP_MULT,(void(*)(void))QMatMult));
1168   PetscCall(MatShellSetOperation(user->MQ,MATOP_MULT_TRANSPOSE,(void(*)(void))QMatMultTranspose));
1169 
1170   /* Add noise to the measurement data */
1171   PetscCall(VecSet(user->ywork,1.0));
1172   PetscCall(VecAYPX(user->ywork,user->noise,user->ytrue));
1173   PetscCall(MatMult(user->MQ,user->ywork,user->d));
1174 
1175   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1176   PetscCall(PetscFree(x));
1177   PetscCall(PetscFree(y));
1178   PetscCall(PetscFree(z));
1179   PetscCall(PetscLogStagePop());
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 PetscErrorCode EllipticDestroy(AppCtx *user)
1184 {
1185   PetscInt       i;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(MatDestroy(&user->DSG));
1189   PetscCall(KSPDestroy(&user->solver));
1190   PetscCall(MatDestroy(&user->Q));
1191   PetscCall(MatDestroy(&user->MQ));
1192   if (!user->use_ptap) {
1193     PetscCall(MatDestroy(&user->Div));
1194     PetscCall(MatDestroy(&user->Divwork));
1195   } else {
1196     PetscCall(MatDestroy(&user->Diag));
1197   }
1198   if (user->use_lrc) {
1199     PetscCall(MatDestroy(&user->Ones));
1200   }
1201 
1202   PetscCall(MatDestroy(&user->Grad));
1203   PetscCall(MatDestroy(&user->Av));
1204   PetscCall(MatDestroy(&user->Avwork));
1205   PetscCall(MatDestroy(&user->L));
1206   PetscCall(MatDestroy(&user->Js));
1207   PetscCall(MatDestroy(&user->Jd));
1208   PetscCall(MatDestroy(&user->JsBlock));
1209   PetscCall(MatDestroy(&user->JsInv));
1210 
1211   PetscCall(VecDestroy(&user->x));
1212   PetscCall(VecDestroy(&user->u));
1213   PetscCall(VecDestroy(&user->uwork));
1214   PetscCall(VecDestroy(&user->utrue));
1215   PetscCall(VecDestroy(&user->y));
1216   PetscCall(VecDestroy(&user->ywork));
1217   PetscCall(VecDestroy(&user->ytrue));
1218   PetscCall(VecDestroy(&user->c));
1219   PetscCall(VecDestroy(&user->cwork));
1220   PetscCall(VecDestroy(&user->ur));
1221   PetscCall(VecDestroy(&user->q));
1222   PetscCall(VecDestroy(&user->d));
1223   PetscCall(VecDestroy(&user->dwork));
1224   PetscCall(VecDestroy(&user->lwork));
1225   PetscCall(VecDestroy(&user->S));
1226   PetscCall(VecDestroy(&user->Swork));
1227   PetscCall(VecDestroy(&user->Sdiag));
1228   PetscCall(VecDestroy(&user->Ywork));
1229   PetscCall(VecDestroy(&user->Twork));
1230   PetscCall(VecDestroy(&user->Av_u));
1231   PetscCall(VecDestroy(&user->js_diag));
1232   PetscCall(ISDestroy(&user->s_is));
1233   PetscCall(ISDestroy(&user->d_is));
1234   PetscCall(VecDestroy(&user->suby));
1235   PetscCall(VecDestroy(&user->subd));
1236   PetscCall(VecDestroy(&user->subq));
1237   PetscCall(VecScatterDestroy(&user->state_scatter));
1238   PetscCall(VecScatterDestroy(&user->design_scatter));
1239   for (i=0;i<user->ns;i++) {
1240     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1241     PetscCall(VecScatterDestroy(&user->di_scatter[i]));
1242   }
1243   PetscCall(PetscFree(user->yi_scatter));
1244   PetscCall(PetscFree(user->di_scatter));
1245   if (user->use_lrc) {
1246     PetscCall(PetscFree(user->ones));
1247     PetscCall(MatDestroy(&user->Ones));
1248   }
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 PetscErrorCode EllipticMonitor(Tao tao, void *ptr)
1253 {
1254   Vec            X;
1255   PetscReal      unorm,ynorm;
1256   AppCtx         *user = (AppCtx*)ptr;
1257 
1258   PetscFunctionBegin;
1259   PetscCall(TaoGetSolution(tao,&X));
1260   PetscCall(Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
1261   PetscCall(VecAXPY(user->ywork,-1.0,user->ytrue));
1262   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue));
1263   PetscCall(VecNorm(user->uwork,NORM_2,&unorm));
1264   PetscCall(VecNorm(user->ywork,NORM_2,&ynorm));
1265   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm));
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 /*TEST
1270 
1271    build:
1272       requires: !complex
1273 
1274    test:
1275       args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11
1276       requires: !single
1277 
1278    test:
1279       suffix: 2
1280       args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3
1281       requires: !single
1282 
1283 TEST*/
1284