xref: /petsc/src/tao/pde_constrained/tutorials/elliptic.c (revision 2e65eb737b5b07432530db55f6b2a145ebc548b2)
1 #include <petsc/private/taoimpl.h>
2 
3 typedef struct {
4   PetscInt n; /* Number of total variables */
5   PetscInt m; /* Number of constraints */
6   PetscInt nstate;
7   PetscInt ndesign;
8   PetscInt mx; /* grid points in each direction */
9   PetscInt ns; /* Number of data samples (1<=ns<=8)
10                   Currently only ns=1 is supported */
11   PetscInt ndata; /* Number of data points per sample */
12   IS       s_is;
13   IS       d_is;
14 
15   VecScatter state_scatter;
16   VecScatter design_scatter;
17   VecScatter *yi_scatter, *di_scatter;
18   Vec        suby,subq,subd;
19   Mat        Js,Jd,JsPrec,JsInv,JsBlock;
20 
21   PetscReal alpha; /* Regularization parameter */
22   PetscReal beta; /* Weight attributed to ||u||^2 in regularization functional */
23   PetscReal noise; /* Amount of noise to add to data */
24   PetscReal *ones;
25   Mat       Q;
26   Mat       MQ;
27   Mat       L;
28 
29   Mat Grad;
30   Mat Av,Avwork;
31   Mat Div, Divwork;
32   Mat DSG;
33   Mat Diag,Ones;
34 
35   Vec q;
36   Vec ur; /* reference */
37 
38   Vec d;
39   Vec dwork;
40 
41   Vec x; /* super vec of y,u */
42 
43   Vec y; /* state variables */
44   Vec ywork;
45 
46   Vec ytrue;
47 
48   Vec u; /* design variables */
49   Vec uwork;
50 
51   Vec utrue;
52 
53   Vec js_diag;
54 
55   Vec c; /* constraint vector */
56   Vec cwork;
57 
58   Vec lwork;
59   Vec S;
60   Vec Swork,Twork,Sdiag,Ywork;
61   Vec Av_u;
62 
63   KSP solver;
64   PC  prec;
65 
66   PetscReal tola,tolb,tolc,told;
67   PetscInt  ksp_its;
68   PetscInt  ksp_its_initial;
69   PetscLogStage stages[10];
70   PetscBool use_ptap;
71   PetscBool use_lrc;
72 } AppCtx;
73 
74 PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
75 PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
76 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
77 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
78 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*);
79 PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
80 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
81 PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter);
82 PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter);
83 PetscErrorCode EllipticInitialize(AppCtx*);
84 PetscErrorCode EllipticDestroy(AppCtx*);
85 PetscErrorCode EllipticMonitor(Tao, void*);
86 
87 PetscErrorCode StateBlockMatMult(Mat,Vec,Vec);
88 PetscErrorCode StateMatMult(Mat,Vec,Vec);
89 
90 PetscErrorCode StateInvMatMult(Mat,Vec,Vec);
91 PetscErrorCode DesignMatMult(Mat,Vec,Vec);
92 PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);
93 
94 PetscErrorCode QMatMult(Mat,Vec,Vec);
95 PetscErrorCode QMatMultTranspose(Mat,Vec,Vec);
96 
97 static  char help[]="";
98 
99 int main(int argc, char **argv)
100 {
101   Vec                x0;
102   Tao                tao;
103   AppCtx             user;
104   PetscInt           ntests = 1;
105   PetscInt           i;
106 
107   PetscCall(PetscInitialize(&argc, &argv, (char*)0,help));
108   user.mx = 8;
109   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"elliptic example",NULL);
110   PetscCall(PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL));
111   user.ns = 6;
112   PetscCall(PetscOptionsInt("-ns","Number of data samples (1<=ns<=8)","",user.ns,&user.ns,NULL));
113   user.ndata = 64;
114   PetscCall(PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL));
115   user.alpha = 0.1;
116   PetscCall(PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL));
117   user.beta = 0.00001;
118   PetscCall(PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL));
119   user.noise = 0.01;
120   PetscCall(PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL));
121 
122   user.use_ptap = PETSC_FALSE;
123   PetscCall(PetscOptionsBool("-use_ptap","Use ptap matrix for DSG","",user.use_ptap,&user.use_ptap,NULL));
124   user.use_lrc = PETSC_FALSE;
125   PetscCall(PetscOptionsBool("-use_lrc","Use lrc matrix for Js","",user.use_lrc,&user.use_lrc,NULL));
126   PetscCall(PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL));
127   PetscOptionsEnd();
128 
129   user.m = user.ns*user.mx*user.mx*user.mx; /* number of constraints */
130   user.nstate =  user.m;
131   user.ndesign = user.mx*user.mx*user.mx;
132   user.n = user.nstate + user.ndesign; /* number of variables */
133 
134   /* Create TAO solver and set desired solution method */
135   PetscCall(TaoCreate(PETSC_COMM_WORLD,&tao));
136   PetscCall(TaoSetType(tao,TAOLCL));
137 
138   /* Set up initial vectors and matrices */
139   PetscCall(EllipticInitialize(&user));
140 
141   PetscCall(Gather(user.x,user.y,user.state_scatter,user.u,user.design_scatter));
142   PetscCall(VecDuplicate(user.x,&x0));
143   PetscCall(VecCopy(user.x,x0));
144 
145   /* Set solution vector with an initial guess */
146   PetscCall(TaoSetSolution(tao,user.x));
147   PetscCall(TaoSetObjective(tao, FormFunction, &user));
148   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
149   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
150 
151   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, &user));
152   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
153 
154   PetscCall(TaoSetStateDesignIS(tao,user.s_is,user.d_is));
155   PetscCall(TaoSetFromOptions(tao));
156 
157   /* SOLVE THE APPLICATION */
158   PetscCall(PetscLogStageRegister("Trials",&user.stages[1]));
159   PetscCall(PetscLogStagePush(user.stages[1]));
160   for (i=0; i<ntests; i++) {
161     PetscCall(TaoSolve(tao));
162     PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %" PetscInt_FMT "\n",user.ksp_its));
163     PetscCall(VecCopy(x0,user.x));
164   }
165   PetscCall(PetscLogStagePop());
166   PetscCall(PetscBarrier((PetscObject)user.x));
167   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: "));
168   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%" PetscInt_FMT "\n",user.ksp_its_initial));
169 
170   PetscCall(TaoDestroy(&tao));
171   PetscCall(VecDestroy(&x0));
172   PetscCall(EllipticDestroy(&user));
173   PetscCall(PetscFinalize());
174   return 0;
175 }
176 /* ------------------------------------------------------------------- */
177 /*
178    dwork = Qy - d
179    lwork = L*(u-ur)
180    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
181 */
182 PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
183 {
184   PetscReal      d1=0,d2=0;
185   AppCtx         *user = (AppCtx*)ptr;
186 
187   PetscFunctionBegin;
188   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
189   PetscCall(MatMult(user->MQ,user->y,user->dwork));
190   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
191   PetscCall(VecDot(user->dwork,user->dwork,&d1));
192   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
193   PetscCall(MatMult(user->L,user->uwork,user->lwork));
194   PetscCall(VecDot(user->lwork,user->lwork,&d2));
195   *f = 0.5 * (d1 + user->alpha*d2);
196   PetscFunctionReturn(0);
197 }
198 
199 /* ------------------------------------------------------------------- */
200 /*
201     state: g_s = Q' *(Qy - d)
202     design: g_d = alpha*L'*L*(u-ur)
203 */
204 PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
205 {
206   AppCtx         *user = (AppCtx*)ptr;
207 
208   PetscFunctionBegin;
209   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
210   PetscCall(MatMult(user->MQ,user->y,user->dwork));
211   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
212   PetscCall(MatMultTranspose(user->MQ,user->dwork,user->ywork));
213   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
214   PetscCall(MatMult(user->L,user->uwork,user->lwork));
215   PetscCall(MatMultTranspose(user->L,user->lwork,user->uwork));
216   PetscCall(VecScale(user->uwork, user->alpha));
217   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
218   PetscFunctionReturn(0);
219 }
220 
221 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
222 {
223   PetscReal      d1,d2;
224   AppCtx         *user = (AppCtx*)ptr;
225 
226   PetscFunctionBegin;
227   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
228   PetscCall(MatMult(user->MQ,user->y,user->dwork));
229   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
230   PetscCall(VecDot(user->dwork,user->dwork,&d1));
231   PetscCall(MatMultTranspose(user->MQ,user->dwork,user->ywork));
232 
233   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
234   PetscCall(MatMult(user->L,user->uwork,user->lwork));
235   PetscCall(VecDot(user->lwork,user->lwork,&d2));
236   PetscCall(MatMultTranspose(user->L,user->lwork,user->uwork));
237   PetscCall(VecScale(user->uwork, user->alpha));
238   *f = 0.5 * (d1 + user->alpha*d2);
239   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
240   PetscFunctionReturn(0);
241 }
242 
243 /* ------------------------------------------------------------------- */
244 /* A
245 MatShell object
246 */
247 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
248 {
249   AppCtx         *user = (AppCtx*)ptr;
250 
251   PetscFunctionBegin;
252   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
253   /* DSG = Div * (1/Av_u) * Grad */
254   PetscCall(VecSet(user->uwork,0));
255   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
256   PetscCall(VecExp(user->uwork));
257   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
258   PetscCall(VecCopy(user->Av_u,user->Swork));
259   PetscCall(VecReciprocal(user->Swork));
260   if (user->use_ptap) {
261     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
262     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG));
263   } else {
264     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
265     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
266     PetscCall(MatProductNumeric(user->DSG));
267   }
268   PetscFunctionReturn(0);
269 }
270 /* ------------------------------------------------------------------- */
271 /* B */
272 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
273 {
274   AppCtx         *user = (AppCtx*)ptr;
275 
276   PetscFunctionBegin;
277   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
278   PetscFunctionReturn(0);
279 }
280 
281 PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y)
282 {
283   PetscReal      sum;
284   AppCtx         *user;
285 
286   PetscFunctionBegin;
287   PetscCall(MatShellGetContext(J_shell,&user));
288   PetscCall(MatMult(user->DSG,X,Y));
289   PetscCall(VecSum(X,&sum));
290   sum /= user->ndesign;
291   PetscCall(VecShift(Y,sum));
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
296 {
297   PetscInt       i;
298   AppCtx         *user;
299 
300   PetscFunctionBegin;
301   PetscCall(MatShellGetContext(J_shell,&user));
302   if (user->ns == 1) {
303     PetscCall(MatMult(user->JsBlock,X,Y));
304   } else {
305     for (i=0;i<user->ns;i++) {
306       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
307       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
308       PetscCall(MatMult(user->JsBlock,user->subq,user->suby));
309       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y)
316 {
317   PetscInt       its,i;
318   AppCtx         *user;
319 
320   PetscFunctionBegin;
321   PetscCall(MatShellGetContext(J_shell,&user));
322   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->DSG));
323   if (Y == user->ytrue) {
324     /* First solve is done using true solution to set up problem */
325     PetscCall(KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
326   } else {
327     PetscCall(KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
328   }
329   if (user->ns == 1) {
330     PetscCall(KSPSolve(user->solver,X,Y));
331     PetscCall(KSPGetIterationNumber(user->solver,&its));
332     user->ksp_its+=its;
333   } else {
334     for (i=0;i<user->ns;i++) {
335       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
336       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
337       PetscCall(KSPSolve(user->solver,user->subq,user->suby));
338       PetscCall(KSPGetIterationNumber(user->solver,&its));
339       user->ksp_its+=its;
340       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
341     }
342   }
343   PetscFunctionReturn(0);
344 }
345 PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
346 {
347   AppCtx         *user;
348   PetscInt       i;
349 
350   PetscFunctionBegin;
351   PetscCall(MatShellGetContext(J_shell,&user));
352   if (user->ns == 1) {
353     PetscCall(MatMult(user->Q,X,Y));
354   } else {
355     for (i=0;i<user->ns;i++) {
356       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
357       PetscCall(Scatter(Y,user->subd,user->di_scatter[i],0,0));
358       PetscCall(MatMult(user->Q,user->subq,user->subd));
359       PetscCall(Gather(Y,user->subd,user->di_scatter[i],0,0));
360     }
361   }
362   PetscFunctionReturn(0);
363 }
364 
365 PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
366 {
367   AppCtx         *user;
368   PetscInt       i;
369 
370   PetscFunctionBegin;
371   PetscCall(MatShellGetContext(J_shell,&user));
372   if (user->ns == 1) {
373     PetscCall(MatMultTranspose(user->Q,X,Y));
374   } else {
375     for (i=0;i<user->ns;i++) {
376       PetscCall(Scatter(X,user->subd,user->di_scatter[i],0,0));
377       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
378       PetscCall(MatMultTranspose(user->Q,user->subd,user->suby));
379       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
380     }
381   }
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
386 {
387   PetscInt       i;
388   AppCtx         *user;
389 
390   PetscFunctionBegin;
391   PetscCall(MatShellGetContext(J_shell,&user));
392 
393   /* sdiag(1./v) */
394   PetscCall(VecSet(user->uwork,0));
395   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
396   PetscCall(VecExp(user->uwork));
397 
398   /* sdiag(1./((Av*(1./v)).^2)) */
399   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
400   PetscCall(VecPointwiseMult(user->Swork,user->Swork,user->Swork));
401   PetscCall(VecReciprocal(user->Swork));
402 
403   /* (Av * (sdiag(1./v) * b)) */
404   PetscCall(VecPointwiseMult(user->uwork,user->uwork,X));
405   PetscCall(MatMult(user->Av,user->uwork,user->Twork));
406 
407   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
408   PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
409 
410   if (user->ns == 1) {
411     /* (sdiag(Grad*y(:,i)) */
412     PetscCall(MatMult(user->Grad,user->y,user->Twork));
413 
414     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
415     PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
416     PetscCall(MatMultTranspose(user->Grad,user->Swork,Y));
417   } else {
418     for (i=0;i<user->ns;i++) {
419       PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
420       PetscCall(Scatter(Y,user->subq,user->yi_scatter[i],0,0));
421 
422       PetscCall(MatMult(user->Grad,user->suby,user->Twork));
423       PetscCall(VecPointwiseMult(user->Twork,user->Twork,user->Swork));
424       PetscCall(MatMultTranspose(user->Grad,user->Twork,user->subq));
425       PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
426       PetscCall(Gather(Y,user->subq,user->yi_scatter[i],0,0));
427     }
428   }
429   PetscFunctionReturn(0);
430 }
431 
432 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
433 {
434   PetscInt       i;
435   AppCtx         *user;
436 
437   PetscFunctionBegin;
438   PetscCall(MatShellGetContext(J_shell,&user));
439   PetscCall(VecZeroEntries(Y));
440 
441   /* Sdiag = 1./((Av*(1./v)).^2) */
442   PetscCall(VecSet(user->uwork,0));
443   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
444   PetscCall(VecExp(user->uwork));
445   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
446   PetscCall(VecPointwiseMult(user->Sdiag,user->Swork,user->Swork));
447   PetscCall(VecReciprocal(user->Sdiag));
448 
449   for (i=0;i<user->ns;i++) {
450     PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
451     PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
452 
453     /* Swork = (Div' * b(:,i)) */
454     PetscCall(MatMult(user->Grad,user->subq,user->Swork));
455 
456     /* Twork = Grad*y(:,i) */
457     PetscCall(MatMult(user->Grad,user->suby,user->Twork));
458 
459     /* Twork = sdiag(Twork) * Swork */
460     PetscCall(VecPointwiseMult(user->Twork,user->Swork,user->Twork));
461 
462     /* Swork = pointwisemult(Sdiag,Twork) */
463     PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Sdiag));
464 
465     /* Ywork = Av' * Swork */
466     PetscCall(MatMultTranspose(user->Av,user->Swork,user->Ywork));
467 
468     /* Ywork = pointwisemult(uwork,Ywork) */
469     PetscCall(VecPointwiseMult(user->Ywork,user->uwork,user->Ywork));
470     PetscCall(VecAXPY(Y,1.0,user->Ywork));
471     PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
472   }
473   PetscFunctionReturn(0);
474 }
475 
476 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
477 {
478    /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
479    PetscReal      sum;
480    PetscInt       i;
481    AppCtx         *user = (AppCtx*)ptr;
482 
483    PetscFunctionBegin;
484    PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
485    if (user->ns == 1) {
486      PetscCall(MatMult(user->Grad,user->y,user->Swork));
487      PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
488      PetscCall(MatMultTranspose(user->Grad,user->Swork,C));
489      PetscCall(VecSum(user->y,&sum));
490      sum /= user->ndesign;
491      PetscCall(VecShift(C,sum));
492    } else {
493      for (i=0;i<user->ns;i++) {
494       PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
495       PetscCall(Scatter(C,user->subq,user->yi_scatter[i],0,0));
496       PetscCall(MatMult(user->Grad,user->suby,user->Swork));
497       PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
498       PetscCall(MatMultTranspose(user->Grad,user->Swork,user->subq));
499 
500       PetscCall(VecSum(user->suby,&sum));
501       sum /= user->ndesign;
502       PetscCall(VecShift(user->subq,sum));
503 
504       PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
505       PetscCall(Gather(C,user->subq,user->yi_scatter[i],0,0));
506      }
507    }
508    PetscCall(VecAXPY(C,-1.0,user->q));
509    PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
513 {
514   PetscFunctionBegin;
515   PetscCall(VecScatterBegin(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD));
516   PetscCall(VecScatterEnd(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD));
517   if (sub2) {
518     PetscCall(VecScatterBegin(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD));
519     PetscCall(VecScatterEnd(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD));
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
525 {
526   PetscFunctionBegin;
527   PetscCall(VecScatterBegin(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE));
528   PetscCall(VecScatterEnd(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE));
529   if (sub2) {
530     PetscCall(VecScatterBegin(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE));
531     PetscCall(VecScatterEnd(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE));
532   }
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode EllipticInitialize(AppCtx *user)
537 {
538   PetscInt       m,n,i,j,k,l,linear_index,is,js,ks,ls,istart,iend,iblock;
539   Vec            XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork;
540   PetscReal      *x,*y,*z;
541   PetscReal      h,meanut;
542   PetscScalar    hinv,neg_hinv,half = 0.5,sqrt_beta;
543   PetscInt       im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
544   IS             is_alldesign,is_allstate;
545   IS             is_from_d;
546   IS             is_from_y;
547   PetscInt       lo,hi,hi2,lo2,ysubnlocal,dsubnlocal;
548   const PetscInt *ranges, *subranges;
549   PetscMPIInt    size;
550   PetscReal      xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
551   PetscScalar    v,vx,vy,vz;
552   PetscInt       offset,subindex,subvec,nrank,kk;
553 
554   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,
555                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,
556                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,
557                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,
558                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,
559                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,
560                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,
561                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
562 
563   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,
564                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,
565                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,
566                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,
567                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,
568                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,
569                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,
570                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
571 
572   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,
573                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,
574                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,
575                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,
576                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,
577                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,
578                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,
579                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};
580 
581   PetscFunctionBegin;
582   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size));
583   PetscCall(PetscLogStageRegister("Elliptic Setup",&user->stages[0]));
584   PetscCall(PetscLogStagePush(user->stages[0]));
585 
586   /* Create u,y,c,x */
587   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->u));
588   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->y));
589   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->c));
590   PetscCall(VecSetSizes(user->u,PETSC_DECIDE,user->ndesign));
591   PetscCall(VecSetFromOptions(user->u));
592   PetscCall(VecGetLocalSize(user->u,&ysubnlocal));
593   PetscCall(VecSetSizes(user->y,ysubnlocal*user->ns,user->nstate));
594   PetscCall(VecSetSizes(user->c,ysubnlocal*user->ns,user->m));
595   PetscCall(VecSetFromOptions(user->y));
596   PetscCall(VecSetFromOptions(user->c));
597 
598   /*
599      *******************************
600      Create scatters for x <-> y,u
601      *******************************
602 
603      If the state vector y and design vector u are partitioned as
604      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
605      then the solution vector x is organized as
606      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
607      The index sets user->s_is and user->d_is correspond to the indices of the
608      state and design variables owned by the current processor.
609   */
610   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->x));
611 
612   PetscCall(VecGetOwnershipRange(user->y,&lo,&hi));
613   PetscCall(VecGetOwnershipRange(user->u,&lo2,&hi2));
614 
615   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate));
616   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user->s_is));
617   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign));
618   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user->d_is));
619 
620   PetscCall(VecSetSizes(user->x,hi-lo+hi2-lo2,user->n));
621   PetscCall(VecSetFromOptions(user->x));
622 
623   PetscCall(VecScatterCreate(user->x,user->s_is,user->y,is_allstate,&user->state_scatter));
624   PetscCall(VecScatterCreate(user->x,user->d_is,user->u,is_alldesign,&user->design_scatter));
625   PetscCall(ISDestroy(&is_alldesign));
626   PetscCall(ISDestroy(&is_allstate));
627   /*
628      *******************************
629      Create scatter from y to y_1,y_2,...,y_ns
630      *******************************
631   */
632   PetscCall(PetscMalloc1(user->ns,&user->yi_scatter));
633   PetscCall(VecDuplicate(user->u,&user->suby));
634   PetscCall(VecDuplicate(user->u,&user->subq));
635 
636   PetscCall(VecGetOwnershipRange(user->y,&lo2,&hi2));
637   istart = 0;
638   for (i=0; i<user->ns; i++) {
639     PetscCall(VecGetOwnershipRange(user->suby,&lo,&hi));
640     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y));
641     PetscCall(VecScatterCreate(user->y,is_from_y,user->suby,NULL,&user->yi_scatter[i]));
642     istart = istart + hi-lo;
643     PetscCall(ISDestroy(&is_from_y));
644   }
645   /*
646      *******************************
647      Create scatter from d to d_1,d_2,...,d_ns
648      *******************************
649   */
650   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->subd));
651   PetscCall(VecSetSizes(user->subd,PETSC_DECIDE,user->ndata));
652   PetscCall(VecSetFromOptions(user->subd));
653   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->d));
654   PetscCall(VecGetLocalSize(user->subd,&dsubnlocal));
655   PetscCall(VecSetSizes(user->d,dsubnlocal*user->ns,user->ndata*user->ns));
656   PetscCall(VecSetFromOptions(user->d));
657   PetscCall(PetscMalloc1(user->ns,&user->di_scatter));
658 
659   PetscCall(VecGetOwnershipRange(user->d,&lo2,&hi2));
660   istart = 0;
661   for (i=0; i<user->ns; i++) {
662     PetscCall(VecGetOwnershipRange(user->subd,&lo,&hi));
663     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d));
664     PetscCall(VecScatterCreate(user->d,is_from_d,user->subd,NULL,&user->di_scatter[i]));
665     istart = istart + hi-lo;
666     PetscCall(ISDestroy(&is_from_d));
667   }
668 
669   PetscCall(PetscMalloc1(user->mx,&x));
670   PetscCall(PetscMalloc1(user->mx,&y));
671   PetscCall(PetscMalloc1(user->mx,&z));
672 
673   user->ksp_its = 0;
674   user->ksp_its_initial = 0;
675 
676   n = user->mx * user->mx * user->mx;
677   m = 3 * user->mx * user->mx * (user->mx-1);
678   sqrt_beta = PetscSqrtScalar(user->beta);
679 
680   PetscCall(VecCreate(PETSC_COMM_WORLD,&XX));
681   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->q));
682   PetscCall(VecSetSizes(XX,ysubnlocal,n));
683   PetscCall(VecSetSizes(user->q,ysubnlocal*user->ns,user->m));
684   PetscCall(VecSetFromOptions(XX));
685   PetscCall(VecSetFromOptions(user->q));
686 
687   PetscCall(VecDuplicate(XX,&YY));
688   PetscCall(VecDuplicate(XX,&ZZ));
689   PetscCall(VecDuplicate(XX,&XXwork));
690   PetscCall(VecDuplicate(XX,&YYwork));
691   PetscCall(VecDuplicate(XX,&ZZwork));
692   PetscCall(VecDuplicate(XX,&UTwork));
693   PetscCall(VecDuplicate(XX,&user->utrue));
694 
695   /* map for striding q */
696   PetscCall(VecGetOwnershipRanges(user->q,&ranges));
697   PetscCall(VecGetOwnershipRanges(user->u,&subranges));
698 
699   PetscCall(VecGetOwnershipRange(user->q,&lo2,&hi2));
700   PetscCall(VecGetOwnershipRange(user->u,&lo,&hi));
701   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
702   h = 1.0/user->mx;
703   hinv = user->mx;
704   neg_hinv = -hinv;
705 
706   PetscCall(VecGetOwnershipRange(XX,&istart,&iend));
707   for (linear_index=istart; linear_index<iend; linear_index++) {
708     i = linear_index % user->mx;
709     j = ((linear_index-i)/user->mx) % user->mx;
710     k = ((linear_index-i)/user->mx-j) / user->mx;
711     vx = h*(i+0.5);
712     vy = h*(j+0.5);
713     vz = h*(k+0.5);
714     PetscCall(VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES));
715     PetscCall(VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES));
716     PetscCall(VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES));
717     for (is=0; is<2; is++) {
718       for (js=0; js<2; js++) {
719         for (ks=0; ks<2; ks++) {
720           ls = is*4 + js*2 + ks;
721           if (ls<user->ns) {
722             l =ls*n + linear_index;
723             /* remap */
724             subindex = l%n;
725             subvec = l/n;
726             nrank=0;
727             while (subindex >= subranges[nrank+1]) nrank++;
728             offset = subindex - subranges[nrank];
729             istart=0;
730             for (kk=0;kk<nrank;kk++) istart+=user->ns*(subranges[kk+1]-subranges[kk]);
731             istart += (subranges[nrank+1]-subranges[nrank])*subvec;
732             l = istart+offset;
733             v = 100*PetscSinScalar(2*PETSC_PI*(vx+0.25*is))*PetscSinScalar(2*PETSC_PI*(vy+0.25*js))*PetscSinScalar(2*PETSC_PI*(vz+0.25*ks));
734             PetscCall(VecSetValues(user->q,1,&l,&v,INSERT_VALUES));
735           }
736         }
737       }
738     }
739   }
740 
741   PetscCall(VecAssemblyBegin(XX));
742   PetscCall(VecAssemblyEnd(XX));
743   PetscCall(VecAssemblyBegin(YY));
744   PetscCall(VecAssemblyEnd(YY));
745   PetscCall(VecAssemblyBegin(ZZ));
746   PetscCall(VecAssemblyEnd(ZZ));
747   PetscCall(VecAssemblyBegin(user->q));
748   PetscCall(VecAssemblyEnd(user->q));
749 
750   /* Compute true parameter function
751      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
752   PetscCall(VecCopy(XX,XXwork));
753   PetscCall(VecCopy(YY,YYwork));
754   PetscCall(VecCopy(ZZ,ZZwork));
755 
756   PetscCall(VecShift(XXwork,-0.25));
757   PetscCall(VecShift(YYwork,-0.25));
758   PetscCall(VecShift(ZZwork,-0.25));
759 
760   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
761   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
762   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
763 
764   PetscCall(VecCopy(XXwork,UTwork));
765   PetscCall(VecAXPY(UTwork,1.0,YYwork));
766   PetscCall(VecAXPY(UTwork,1.0,ZZwork));
767   PetscCall(VecScale(UTwork,-20.0));
768   PetscCall(VecExp(UTwork));
769   PetscCall(VecCopy(UTwork,user->utrue));
770 
771   PetscCall(VecCopy(XX,XXwork));
772   PetscCall(VecCopy(YY,YYwork));
773   PetscCall(VecCopy(ZZ,ZZwork));
774 
775   PetscCall(VecShift(XXwork,-0.75));
776   PetscCall(VecShift(YYwork,-0.75));
777   PetscCall(VecShift(ZZwork,-0.75));
778 
779   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
780   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
781   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
782 
783   PetscCall(VecCopy(XXwork,UTwork));
784   PetscCall(VecAXPY(UTwork,1.0,YYwork));
785   PetscCall(VecAXPY(UTwork,1.0,ZZwork));
786   PetscCall(VecScale(UTwork,-20.0));
787   PetscCall(VecExp(UTwork));
788 
789   PetscCall(VecAXPY(user->utrue,-1.0,UTwork));
790 
791   PetscCall(VecDestroy(&XX));
792   PetscCall(VecDestroy(&YY));
793   PetscCall(VecDestroy(&ZZ));
794   PetscCall(VecDestroy(&XXwork));
795   PetscCall(VecDestroy(&YYwork));
796   PetscCall(VecDestroy(&ZZwork));
797   PetscCall(VecDestroy(&UTwork));
798 
799   /* Initial guess and reference model */
800   PetscCall(VecDuplicate(user->utrue,&user->ur));
801   PetscCall(VecSum(user->utrue,&meanut));
802   meanut = meanut / n;
803   PetscCall(VecSet(user->ur,meanut));
804   PetscCall(VecCopy(user->ur,user->u));
805 
806   /* Generate Grad matrix */
807   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Grad));
808   PetscCall(MatSetSizes(user->Grad,PETSC_DECIDE,ysubnlocal,m,n));
809   PetscCall(MatSetFromOptions(user->Grad));
810   PetscCall(MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL));
811   PetscCall(MatSeqAIJSetPreallocation(user->Grad,2,NULL));
812   PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
813 
814   for (i=istart; i<iend; i++) {
815     if (i<m/3) {
816       iblock = i / (user->mx-1);
817       j = iblock*user->mx + (i % (user->mx-1));
818       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
819       j = j+1;
820       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
821     }
822     if (i>=m/3 && i<2*m/3) {
823       iblock = (i-m/3) / (user->mx*(user->mx-1));
824       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
825       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
826       j = j + user->mx;
827       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
828     }
829     if (i>=2*m/3) {
830       j = i-2*m/3;
831       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
832       j = j + user->mx*user->mx;
833       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
834     }
835   }
836 
837   PetscCall(MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY));
838   PetscCall(MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY));
839 
840   /* Generate arithmetic averaging matrix Av */
841   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Av));
842   PetscCall(MatSetSizes(user->Av,PETSC_DECIDE,ysubnlocal,m,n));
843   PetscCall(MatSetFromOptions(user->Av));
844   PetscCall(MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL));
845   PetscCall(MatSeqAIJSetPreallocation(user->Av,2,NULL));
846   PetscCall(MatGetOwnershipRange(user->Av,&istart,&iend));
847 
848   for (i=istart; i<iend; i++) {
849     if (i<m/3) {
850       iblock = i / (user->mx-1);
851       j = iblock*user->mx + (i % (user->mx-1));
852       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
853       j = j+1;
854       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
855     }
856     if (i>=m/3 && i<2*m/3) {
857       iblock = (i-m/3) / (user->mx*(user->mx-1));
858       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
859       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
860       j = j + user->mx;
861       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
862     }
863     if (i>=2*m/3) {
864       j = i-2*m/3;
865       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
866       j = j + user->mx*user->mx;
867       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
868     }
869   }
870 
871   PetscCall(MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY));
872   PetscCall(MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY));
873 
874   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->L));
875   PetscCall(MatSetSizes(user->L,PETSC_DECIDE,ysubnlocal,m+n,n));
876   PetscCall(MatSetFromOptions(user->L));
877   PetscCall(MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL));
878   PetscCall(MatSeqAIJSetPreallocation(user->L,2,NULL));
879   PetscCall(MatGetOwnershipRange(user->L,&istart,&iend));
880 
881   for (i=istart; i<iend; i++) {
882     if (i<m/3) {
883       iblock = i / (user->mx-1);
884       j = iblock*user->mx + (i % (user->mx-1));
885       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
886       j = j+1;
887       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
888     }
889     if (i>=m/3 && i<2*m/3) {
890       iblock = (i-m/3) / (user->mx*(user->mx-1));
891       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
892       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
893       j = j + user->mx;
894       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
895     }
896     if (i>=2*m/3 && i<m) {
897       j = i-2*m/3;
898       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
899       j = j + user->mx*user->mx;
900       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
901     }
902     if (i>=m) {
903       j = i - m;
904       PetscCall(MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES));
905     }
906   }
907   PetscCall(MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY));
908   PetscCall(MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY));
909   PetscCall(MatScale(user->L,PetscPowScalar(h,1.5)));
910 
911   /* Generate Div matrix */
912   if (!user->use_ptap) {
913     /* Generate Div matrix */
914     PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Div));
915     PetscCall(MatSetSizes(user->Div,ysubnlocal,PETSC_DECIDE,n,m));
916     PetscCall(MatSetFromOptions(user->Div));
917     PetscCall(MatMPIAIJSetPreallocation(user->Div,4,NULL,4,NULL));
918     PetscCall(MatSeqAIJSetPreallocation(user->Div,6,NULL));
919     PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
920 
921     for (i=istart; i<iend; i++) {
922       if (i<m/3) {
923         iblock = i / (user->mx-1);
924         j = iblock*user->mx + (i % (user->mx-1));
925         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
926         j = j+1;
927         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
928       }
929       if (i>=m/3 && i<2*m/3) {
930         iblock = (i-m/3) / (user->mx*(user->mx-1));
931         j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
932         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
933         j = j + user->mx;
934         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
935       }
936       if (i>=2*m/3) {
937         j = i-2*m/3;
938         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
939         j = j + user->mx*user->mx;
940         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
941       }
942     }
943 
944     PetscCall(MatAssemblyBegin(user->Div,MAT_FINAL_ASSEMBLY));
945     PetscCall(MatAssemblyEnd(user->Div,MAT_FINAL_ASSEMBLY));
946     PetscCall(MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork));
947   } else {
948     PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Diag));
949     PetscCall(MatSetSizes(user->Diag,PETSC_DECIDE,PETSC_DECIDE,m,m));
950     PetscCall(MatSetFromOptions(user->Diag));
951     PetscCall(MatMPIAIJSetPreallocation(user->Diag,1,NULL,0,NULL));
952     PetscCall(MatSeqAIJSetPreallocation(user->Diag,1,NULL));
953   }
954 
955   /* Build work vectors and matrices */
956   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->S));
957   PetscCall(VecSetSizes(user->S, PETSC_DECIDE, m));
958   PetscCall(VecSetFromOptions(user->S));
959 
960   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->lwork));
961   PetscCall(VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx));
962   PetscCall(VecSetFromOptions(user->lwork));
963 
964   PetscCall(MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork));
965 
966   PetscCall(VecDuplicate(user->S,&user->Swork));
967   PetscCall(VecDuplicate(user->S,&user->Sdiag));
968   PetscCall(VecDuplicate(user->S,&user->Av_u));
969   PetscCall(VecDuplicate(user->S,&user->Twork));
970   PetscCall(VecDuplicate(user->y,&user->ywork));
971   PetscCall(VecDuplicate(user->u,&user->Ywork));
972   PetscCall(VecDuplicate(user->u,&user->uwork));
973   PetscCall(VecDuplicate(user->u,&user->js_diag));
974   PetscCall(VecDuplicate(user->c,&user->cwork));
975   PetscCall(VecDuplicate(user->d,&user->dwork));
976 
977   /* Create a matrix-free shell user->Jd for computing B*x */
978   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal,user->nstate,user->ndesign,user,&user->Jd));
979   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult));
980   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose));
981 
982   /* Compute true state function ytrue given utrue */
983   PetscCall(VecDuplicate(user->y,&user->ytrue));
984 
985   /* First compute Av_u = Av*exp(-u) */
986   PetscCall(VecSet(user->uwork, 0));
987   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue)); /* Note: user->utrue */
988   PetscCall(VecExp(user->uwork));
989   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
990 
991   /* Next form DSG = Div*S*Grad */
992   PetscCall(VecCopy(user->Av_u,user->Swork));
993   PetscCall(VecReciprocal(user->Swork));
994   if (user->use_ptap) {
995     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
996     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG));
997   } else {
998     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
999     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
1000 
1001     PetscCall(MatMatMult(user->Divwork,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG));
1002   }
1003 
1004   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE));
1005   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1006 
1007   if (user->use_lrc == PETSC_TRUE) {
1008     v=PetscSqrtReal(1.0 /user->ndesign);
1009     PetscCall(PetscMalloc1(user->ndesign,&user->ones));
1010 
1011     for (i=0;i<user->ndesign;i++) {
1012       user->ones[i]=v;
1013     }
1014     PetscCall(MatCreateDense(PETSC_COMM_WORLD,ysubnlocal,PETSC_DECIDE,user->ndesign,1,user->ones,&user->Ones));
1015     PetscCall(MatAssemblyBegin(user->Ones, MAT_FINAL_ASSEMBLY));
1016     PetscCall(MatAssemblyEnd(user->Ones, MAT_FINAL_ASSEMBLY));
1017     PetscCall(MatCreateLRC(user->DSG,user->Ones,NULL,user->Ones,&user->JsBlock));
1018     PetscCall(MatSetUp(user->JsBlock));
1019   } else {
1020     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
1021     PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal,ysubnlocal,user->ndesign,user->ndesign,user,&user->JsBlock));
1022     PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateBlockMatMult));
1023     PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateBlockMatMult));
1024   }
1025   PetscCall(MatSetOption(user->JsBlock,MAT_SYMMETRIC,PETSC_TRUE));
1026   PetscCall(MatSetOption(user->JsBlock,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1027   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->Js));
1028   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult));
1029   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMult));
1030   PetscCall(MatSetOption(user->Js,MAT_SYMMETRIC,PETSC_TRUE));
1031   PetscCall(MatSetOption(user->Js,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1032 
1033   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->JsInv));
1034   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateInvMatMult));
1035   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateInvMatMult));
1036   PetscCall(MatSetOption(user->JsInv,MAT_SYMMETRIC,PETSC_TRUE));
1037   PetscCall(MatSetOption(user->JsInv,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1038 
1039   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE));
1040   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1041   /* Now solve for ytrue */
1042   PetscCall(KSPCreate(PETSC_COMM_WORLD,&user->solver));
1043   PetscCall(KSPSetFromOptions(user->solver));
1044 
1045   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->DSG));
1046 
1047   PetscCall(MatMult(user->JsInv,user->q,user->ytrue));
1048   /* First compute Av_u = Av*exp(-u) */
1049   PetscCall(VecSet(user->uwork,0));
1050   PetscCall(VecAXPY(user->uwork,-1.0,user->u)); /* Note: user->u */
1051   PetscCall(VecExp(user->uwork));
1052   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1053 
1054   /* Next update DSG = Div*S*Grad  with user->u */
1055   PetscCall(VecCopy(user->Av_u,user->Swork));
1056   PetscCall(VecReciprocal(user->Swork));
1057   if (user->use_ptap) {
1058     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
1059     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG));
1060   } else {
1061     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1062     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u));
1063     PetscCall(MatProductNumeric(user->DSG));
1064   }
1065 
1066   /* Now solve for y */
1067 
1068   PetscCall(MatMult(user->JsInv,user->q,user->y));
1069 
1070   user->ksp_its_initial = user->ksp_its;
1071   user->ksp_its = 0;
1072   /* Construct projection matrix Q (blocks) */
1073   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Q));
1074   PetscCall(MatSetSizes(user->Q,dsubnlocal,ysubnlocal,user->ndata,user->ndesign));
1075   PetscCall(MatSetFromOptions(user->Q));
1076   PetscCall(MatMPIAIJSetPreallocation(user->Q,8,NULL,8,NULL));
1077   PetscCall(MatSeqAIJSetPreallocation(user->Q,8,NULL));
1078 
1079   for (i=0; i<user->mx; i++) {
1080     x[i] = h*(i+0.5);
1081     y[i] = h*(i+0.5);
1082     z[i] = h*(i+0.5);
1083   }
1084   PetscCall(MatGetOwnershipRange(user->Q,&istart,&iend));
1085 
1086   nx = user->mx; ny = user->mx; nz = user->mx;
1087   for (i=istart; i<iend; i++) {
1088 
1089     xri = xr[i];
1090     im = 0;
1091     xim = x[im];
1092     while (xri>xim && im<nx) {
1093       im = im+1;
1094       xim = x[im];
1095     }
1096     indx1 = im-1;
1097     indx2 = im;
1098     dx1 = xri - x[indx1];
1099     dx2 = x[indx2] - xri;
1100 
1101     yri = yr[i];
1102     im = 0;
1103     yim = y[im];
1104     while (yri>yim && im<ny) {
1105       im = im+1;
1106       yim = y[im];
1107     }
1108     indy1 = im-1;
1109     indy2 = im;
1110     dy1 = yri - y[indy1];
1111     dy2 = y[indy2] - yri;
1112 
1113     zri = zr[i];
1114     im = 0;
1115     zim = z[im];
1116     while (zri>zim && im<nz) {
1117       im = im+1;
1118       zim = z[im];
1119     }
1120     indz1 = im-1;
1121     indz2 = im;
1122     dz1 = zri - z[indz1];
1123     dz2 = z[indz2] - zri;
1124 
1125     Dx = x[indx2] - x[indx1];
1126     Dy = y[indy2] - y[indy1];
1127     Dz = z[indz2] - z[indz1];
1128 
1129     j = indx1 + indy1*nx + indz1*nx*ny;
1130     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1131     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1132 
1133     j = indx1 + indy1*nx + indz2*nx*ny;
1134     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1135     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1136 
1137     j = indx1 + indy2*nx + indz1*nx*ny;
1138     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1139     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1140 
1141     j = indx1 + indy2*nx + indz2*nx*ny;
1142     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1143     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1144 
1145     j = indx2 + indy1*nx + indz1*nx*ny;
1146     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1147     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1148 
1149     j = indx2 + indy1*nx + indz2*nx*ny;
1150     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1151     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1152 
1153     j = indx2 + indy2*nx + indz1*nx*ny;
1154     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1155     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1156 
1157     j = indx2 + indy2*nx + indz2*nx*ny;
1158     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1159     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1160   }
1161 
1162   PetscCall(MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY));
1163   PetscCall(MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY));
1164   /* Create MQ (composed of blocks of Q */
1165   PetscCall(MatCreateShell(PETSC_COMM_WORLD,dsubnlocal*user->ns,PETSC_DECIDE,user->ndata*user->ns,user->nstate,user,&user->MQ));
1166   PetscCall(MatShellSetOperation(user->MQ,MATOP_MULT,(void(*)(void))QMatMult));
1167   PetscCall(MatShellSetOperation(user->MQ,MATOP_MULT_TRANSPOSE,(void(*)(void))QMatMultTranspose));
1168 
1169   /* Add noise to the measurement data */
1170   PetscCall(VecSet(user->ywork,1.0));
1171   PetscCall(VecAYPX(user->ywork,user->noise,user->ytrue));
1172   PetscCall(MatMult(user->MQ,user->ywork,user->d));
1173 
1174   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1175   PetscCall(PetscFree(x));
1176   PetscCall(PetscFree(y));
1177   PetscCall(PetscFree(z));
1178   PetscCall(PetscLogStagePop());
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode EllipticDestroy(AppCtx *user)
1183 {
1184   PetscInt       i;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(MatDestroy(&user->DSG));
1188   PetscCall(KSPDestroy(&user->solver));
1189   PetscCall(MatDestroy(&user->Q));
1190   PetscCall(MatDestroy(&user->MQ));
1191   if (!user->use_ptap) {
1192     PetscCall(MatDestroy(&user->Div));
1193     PetscCall(MatDestroy(&user->Divwork));
1194   } else {
1195     PetscCall(MatDestroy(&user->Diag));
1196   }
1197   if (user->use_lrc) {
1198     PetscCall(MatDestroy(&user->Ones));
1199   }
1200 
1201   PetscCall(MatDestroy(&user->Grad));
1202   PetscCall(MatDestroy(&user->Av));
1203   PetscCall(MatDestroy(&user->Avwork));
1204   PetscCall(MatDestroy(&user->L));
1205   PetscCall(MatDestroy(&user->Js));
1206   PetscCall(MatDestroy(&user->Jd));
1207   PetscCall(MatDestroy(&user->JsBlock));
1208   PetscCall(MatDestroy(&user->JsInv));
1209 
1210   PetscCall(VecDestroy(&user->x));
1211   PetscCall(VecDestroy(&user->u));
1212   PetscCall(VecDestroy(&user->uwork));
1213   PetscCall(VecDestroy(&user->utrue));
1214   PetscCall(VecDestroy(&user->y));
1215   PetscCall(VecDestroy(&user->ywork));
1216   PetscCall(VecDestroy(&user->ytrue));
1217   PetscCall(VecDestroy(&user->c));
1218   PetscCall(VecDestroy(&user->cwork));
1219   PetscCall(VecDestroy(&user->ur));
1220   PetscCall(VecDestroy(&user->q));
1221   PetscCall(VecDestroy(&user->d));
1222   PetscCall(VecDestroy(&user->dwork));
1223   PetscCall(VecDestroy(&user->lwork));
1224   PetscCall(VecDestroy(&user->S));
1225   PetscCall(VecDestroy(&user->Swork));
1226   PetscCall(VecDestroy(&user->Sdiag));
1227   PetscCall(VecDestroy(&user->Ywork));
1228   PetscCall(VecDestroy(&user->Twork));
1229   PetscCall(VecDestroy(&user->Av_u));
1230   PetscCall(VecDestroy(&user->js_diag));
1231   PetscCall(ISDestroy(&user->s_is));
1232   PetscCall(ISDestroy(&user->d_is));
1233   PetscCall(VecDestroy(&user->suby));
1234   PetscCall(VecDestroy(&user->subd));
1235   PetscCall(VecDestroy(&user->subq));
1236   PetscCall(VecScatterDestroy(&user->state_scatter));
1237   PetscCall(VecScatterDestroy(&user->design_scatter));
1238   for (i=0;i<user->ns;i++) {
1239     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1240     PetscCall(VecScatterDestroy(&user->di_scatter[i]));
1241   }
1242   PetscCall(PetscFree(user->yi_scatter));
1243   PetscCall(PetscFree(user->di_scatter));
1244   if (user->use_lrc) {
1245     PetscCall(PetscFree(user->ones));
1246     PetscCall(MatDestroy(&user->Ones));
1247   }
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 PetscErrorCode EllipticMonitor(Tao tao, void *ptr)
1252 {
1253   Vec            X;
1254   PetscReal      unorm,ynorm;
1255   AppCtx         *user = (AppCtx*)ptr;
1256 
1257   PetscFunctionBegin;
1258   PetscCall(TaoGetSolution(tao,&X));
1259   PetscCall(Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
1260   PetscCall(VecAXPY(user->ywork,-1.0,user->ytrue));
1261   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue));
1262   PetscCall(VecNorm(user->uwork,NORM_2,&unorm));
1263   PetscCall(VecNorm(user->ywork,NORM_2,&ynorm));
1264   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm));
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 /*TEST
1269 
1270    build:
1271       requires: !complex
1272 
1273    test:
1274       args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11
1275       requires: !single
1276 
1277    test:
1278       suffix: 2
1279       args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3
1280       requires: !single
1281 
1282 TEST*/
1283