xref: /petsc/src/tao/pde_constrained/tutorials/elliptic.c (revision ffa8c5705e8ab2cf85ee1d14dbe507a6e2eb5283)
1 #include <petsc/private/taoimpl.h>
2 
3 /*T
4    Concepts: TAO^Solving a system of nonlinear equations, nonlinear least squares
5    Routines: TaoCreate();
6    Routines: TaoSetType();
7    Routines: TaoSetSolution();
8    Routines: TaoSetObjective();
9    Routines: TaoSetGradient();
10    Routines: TaoSetConstraintsRoutine();
11    Routines: TaoSetJacobianStateRoutine();
12    Routines: TaoSetJacobianDesignRoutine();
13    Routines: TaoSetStateDesignIS();
14    Routines: TaoSetFromOptions();
15    Routines: TaoSolve();
16    Routines: TaoDestroy();
17    Processors: n
18 T*/
19 
20 typedef struct {
21   PetscInt n; /* Number of total variables */
22   PetscInt m; /* Number of constraints */
23   PetscInt nstate;
24   PetscInt ndesign;
25   PetscInt mx; /* grid points in each direction */
26   PetscInt ns; /* Number of data samples (1<=ns<=8)
27                   Currently only ns=1 is supported */
28   PetscInt ndata; /* Number of data points per sample */
29   IS       s_is;
30   IS       d_is;
31 
32   VecScatter state_scatter;
33   VecScatter design_scatter;
34   VecScatter *yi_scatter, *di_scatter;
35   Vec        suby,subq,subd;
36   Mat        Js,Jd,JsPrec,JsInv,JsBlock;
37 
38   PetscReal alpha; /* Regularization parameter */
39   PetscReal beta; /* Weight attributed to ||u||^2 in regularization functional */
40   PetscReal noise; /* Amount of noise to add to data */
41   PetscReal *ones;
42   Mat       Q;
43   Mat       MQ;
44   Mat       L;
45 
46   Mat Grad;
47   Mat Av,Avwork;
48   Mat Div, Divwork;
49   Mat DSG;
50   Mat Diag,Ones;
51 
52   Vec q;
53   Vec ur; /* reference */
54 
55   Vec d;
56   Vec dwork;
57 
58   Vec x; /* super vec of y,u */
59 
60   Vec y; /* state variables */
61   Vec ywork;
62 
63   Vec ytrue;
64 
65   Vec u; /* design variables */
66   Vec uwork;
67 
68   Vec utrue;
69 
70   Vec js_diag;
71 
72   Vec c; /* constraint vector */
73   Vec cwork;
74 
75   Vec lwork;
76   Vec S;
77   Vec Swork,Twork,Sdiag,Ywork;
78   Vec Av_u;
79 
80   KSP solver;
81   PC  prec;
82 
83   PetscReal tola,tolb,tolc,told;
84   PetscInt  ksp_its;
85   PetscInt  ksp_its_initial;
86   PetscLogStage stages[10];
87   PetscBool use_ptap;
88   PetscBool use_lrc;
89 } AppCtx;
90 
91 PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
92 PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
93 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
94 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
95 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*);
96 PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
97 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
98 PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter);
99 PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter);
100 PetscErrorCode EllipticInitialize(AppCtx*);
101 PetscErrorCode EllipticDestroy(AppCtx*);
102 PetscErrorCode EllipticMonitor(Tao, void*);
103 
104 PetscErrorCode StateBlockMatMult(Mat,Vec,Vec);
105 PetscErrorCode StateMatMult(Mat,Vec,Vec);
106 
107 PetscErrorCode StateInvMatMult(Mat,Vec,Vec);
108 PetscErrorCode DesignMatMult(Mat,Vec,Vec);
109 PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);
110 
111 PetscErrorCode QMatMult(Mat,Vec,Vec);
112 PetscErrorCode QMatMultTranspose(Mat,Vec,Vec);
113 
114 static  char help[]="";
115 
116 int main(int argc, char **argv)
117 {
118   PetscErrorCode     ierr;
119   Vec                x0;
120   Tao                tao;
121   AppCtx             user;
122   PetscInt           ntests = 1;
123   PetscInt           i;
124 
125   PetscCall(PetscInitialize(&argc, &argv, (char*)0,help));
126   user.mx = 8;
127   ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"elliptic example",NULL);PetscCall(ierr);
128   PetscCall(PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL));
129   user.ns = 6;
130   PetscCall(PetscOptionsInt("-ns","Number of data samples (1<=ns<=8)","",user.ns,&user.ns,NULL));
131   user.ndata = 64;
132   PetscCall(PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL));
133   user.alpha = 0.1;
134   PetscCall(PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL));
135   user.beta = 0.00001;
136   PetscCall(PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL));
137   user.noise = 0.01;
138   PetscCall(PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL));
139 
140   user.use_ptap = PETSC_FALSE;
141   PetscCall(PetscOptionsBool("-use_ptap","Use ptap matrix for DSG","",user.use_ptap,&user.use_ptap,NULL));
142   user.use_lrc = PETSC_FALSE;
143   PetscCall(PetscOptionsBool("-use_lrc","Use lrc matrix for Js","",user.use_lrc,&user.use_lrc,NULL));
144   PetscCall(PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL));
145   ierr = PetscOptionsEnd();PetscCall(ierr);
146 
147   user.m = user.ns*user.mx*user.mx*user.mx; /* number of constraints */
148   user.nstate =  user.m;
149   user.ndesign = user.mx*user.mx*user.mx;
150   user.n = user.nstate + user.ndesign; /* number of variables */
151 
152   /* Create TAO solver and set desired solution method */
153   PetscCall(TaoCreate(PETSC_COMM_WORLD,&tao));
154   PetscCall(TaoSetType(tao,TAOLCL));
155 
156   /* Set up initial vectors and matrices */
157   PetscCall(EllipticInitialize(&user));
158 
159   PetscCall(Gather(user.x,user.y,user.state_scatter,user.u,user.design_scatter));
160   PetscCall(VecDuplicate(user.x,&x0));
161   PetscCall(VecCopy(user.x,x0));
162 
163   /* Set solution vector with an initial guess */
164   PetscCall(TaoSetSolution(tao,user.x));
165   PetscCall(TaoSetObjective(tao, FormFunction, &user));
166   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
167   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
168 
169   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, &user));
170   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
171 
172   PetscCall(TaoSetStateDesignIS(tao,user.s_is,user.d_is));
173   PetscCall(TaoSetFromOptions(tao));
174 
175   /* SOLVE THE APPLICATION */
176   PetscCall(PetscLogStageRegister("Trials",&user.stages[1]));
177   PetscCall(PetscLogStagePush(user.stages[1]));
178   for (i=0; i<ntests; i++) {
179     PetscCall(TaoSolve(tao));
180     PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its));
181     PetscCall(VecCopy(x0,user.x));
182   }
183   PetscCall(PetscLogStagePop());
184   PetscCall(PetscBarrier((PetscObject)user.x));
185   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: "));
186   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial));
187 
188   PetscCall(TaoDestroy(&tao));
189   PetscCall(VecDestroy(&x0));
190   PetscCall(EllipticDestroy(&user));
191   PetscCall(PetscFinalize());
192   return 0;
193 }
194 /* ------------------------------------------------------------------- */
195 /*
196    dwork = Qy - d
197    lwork = L*(u-ur)
198    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
199 */
200 PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
201 {
202   PetscReal      d1=0,d2=0;
203   AppCtx         *user = (AppCtx*)ptr;
204 
205   PetscFunctionBegin;
206   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
207   PetscCall(MatMult(user->MQ,user->y,user->dwork));
208   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
209   PetscCall(VecDot(user->dwork,user->dwork,&d1));
210   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
211   PetscCall(MatMult(user->L,user->uwork,user->lwork));
212   PetscCall(VecDot(user->lwork,user->lwork,&d2));
213   *f = 0.5 * (d1 + user->alpha*d2);
214   PetscFunctionReturn(0);
215 }
216 
217 /* ------------------------------------------------------------------- */
218 /*
219     state: g_s = Q' *(Qy - d)
220     design: g_d = alpha*L'*L*(u-ur)
221 */
222 PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
223 {
224   AppCtx         *user = (AppCtx*)ptr;
225 
226   PetscFunctionBegin;
227   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
228   PetscCall(MatMult(user->MQ,user->y,user->dwork));
229   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
230   PetscCall(MatMultTranspose(user->MQ,user->dwork,user->ywork));
231   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
232   PetscCall(MatMult(user->L,user->uwork,user->lwork));
233   PetscCall(MatMultTranspose(user->L,user->lwork,user->uwork));
234   PetscCall(VecScale(user->uwork, user->alpha));
235   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
236   PetscFunctionReturn(0);
237 }
238 
239 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
240 {
241   PetscReal      d1,d2;
242   AppCtx         *user = (AppCtx*)ptr;
243 
244   PetscFunctionBegin;
245   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
246   PetscCall(MatMult(user->MQ,user->y,user->dwork));
247   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
248   PetscCall(VecDot(user->dwork,user->dwork,&d1));
249   PetscCall(MatMultTranspose(user->MQ,user->dwork,user->ywork));
250 
251   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
252   PetscCall(MatMult(user->L,user->uwork,user->lwork));
253   PetscCall(VecDot(user->lwork,user->lwork,&d2));
254   PetscCall(MatMultTranspose(user->L,user->lwork,user->uwork));
255   PetscCall(VecScale(user->uwork, user->alpha));
256   *f = 0.5 * (d1 + user->alpha*d2);
257   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
258   PetscFunctionReturn(0);
259 }
260 
261 /* ------------------------------------------------------------------- */
262 /* A
263 MatShell object
264 */
265 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
266 {
267   AppCtx         *user = (AppCtx*)ptr;
268 
269   PetscFunctionBegin;
270   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
271   /* DSG = Div * (1/Av_u) * Grad */
272   PetscCall(VecSet(user->uwork,0));
273   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
274   PetscCall(VecExp(user->uwork));
275   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
276   PetscCall(VecCopy(user->Av_u,user->Swork));
277   PetscCall(VecReciprocal(user->Swork));
278   if (user->use_ptap) {
279     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
280     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG));
281   } else {
282     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
283     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
284     PetscCall(MatProductNumeric(user->DSG));
285   }
286   PetscFunctionReturn(0);
287 }
288 /* ------------------------------------------------------------------- */
289 /* B */
290 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
291 {
292   AppCtx         *user = (AppCtx*)ptr;
293 
294   PetscFunctionBegin;
295   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
296   PetscFunctionReturn(0);
297 }
298 
299 PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y)
300 {
301   PetscReal      sum;
302   AppCtx         *user;
303 
304   PetscFunctionBegin;
305   PetscCall(MatShellGetContext(J_shell,&user));
306   PetscCall(MatMult(user->DSG,X,Y));
307   PetscCall(VecSum(X,&sum));
308   sum /= user->ndesign;
309   PetscCall(VecShift(Y,sum));
310   PetscFunctionReturn(0);
311 }
312 
313 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
314 {
315   PetscInt       i;
316   AppCtx         *user;
317 
318   PetscFunctionBegin;
319   PetscCall(MatShellGetContext(J_shell,&user));
320   if (user->ns == 1) {
321     PetscCall(MatMult(user->JsBlock,X,Y));
322   } else {
323     for (i=0;i<user->ns;i++) {
324       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
325       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
326       PetscCall(MatMult(user->JsBlock,user->subq,user->suby));
327       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
328     }
329   }
330   PetscFunctionReturn(0);
331 }
332 
333 PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y)
334 {
335   PetscInt       its,i;
336   AppCtx         *user;
337 
338   PetscFunctionBegin;
339   PetscCall(MatShellGetContext(J_shell,&user));
340   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->DSG));
341   if (Y == user->ytrue) {
342     /* First solve is done using true solution to set up problem */
343     PetscCall(KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
344   } else {
345     PetscCall(KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
346   }
347   if (user->ns == 1) {
348     PetscCall(KSPSolve(user->solver,X,Y));
349     PetscCall(KSPGetIterationNumber(user->solver,&its));
350     user->ksp_its+=its;
351   } else {
352     for (i=0;i<user->ns;i++) {
353       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
354       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
355       PetscCall(KSPSolve(user->solver,user->subq,user->suby));
356       PetscCall(KSPGetIterationNumber(user->solver,&its));
357       user->ksp_its+=its;
358       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
359     }
360   }
361   PetscFunctionReturn(0);
362 }
363 PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
364 {
365   AppCtx         *user;
366   PetscInt       i;
367 
368   PetscFunctionBegin;
369   PetscCall(MatShellGetContext(J_shell,&user));
370   if (user->ns == 1) {
371     PetscCall(MatMult(user->Q,X,Y));
372   } else {
373     for (i=0;i<user->ns;i++) {
374       PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
375       PetscCall(Scatter(Y,user->subd,user->di_scatter[i],0,0));
376       PetscCall(MatMult(user->Q,user->subq,user->subd));
377       PetscCall(Gather(Y,user->subd,user->di_scatter[i],0,0));
378     }
379   }
380   PetscFunctionReturn(0);
381 }
382 
383 PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
384 {
385   AppCtx         *user;
386   PetscInt       i;
387 
388   PetscFunctionBegin;
389   PetscCall(MatShellGetContext(J_shell,&user));
390   if (user->ns == 1) {
391     PetscCall(MatMultTranspose(user->Q,X,Y));
392   } else {
393     for (i=0;i<user->ns;i++) {
394       PetscCall(Scatter(X,user->subd,user->di_scatter[i],0,0));
395       PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0));
396       PetscCall(MatMultTranspose(user->Q,user->subd,user->suby));
397       PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0));
398     }
399   }
400   PetscFunctionReturn(0);
401 }
402 
403 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
404 {
405   PetscInt       i;
406   AppCtx         *user;
407 
408   PetscFunctionBegin;
409   PetscCall(MatShellGetContext(J_shell,&user));
410 
411   /* sdiag(1./v) */
412   PetscCall(VecSet(user->uwork,0));
413   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
414   PetscCall(VecExp(user->uwork));
415 
416   /* sdiag(1./((Av*(1./v)).^2)) */
417   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
418   PetscCall(VecPointwiseMult(user->Swork,user->Swork,user->Swork));
419   PetscCall(VecReciprocal(user->Swork));
420 
421   /* (Av * (sdiag(1./v) * b)) */
422   PetscCall(VecPointwiseMult(user->uwork,user->uwork,X));
423   PetscCall(MatMult(user->Av,user->uwork,user->Twork));
424 
425   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
426   PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
427 
428   if (user->ns == 1) {
429     /* (sdiag(Grad*y(:,i)) */
430     PetscCall(MatMult(user->Grad,user->y,user->Twork));
431 
432     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
433     PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
434     PetscCall(MatMultTranspose(user->Grad,user->Swork,Y));
435   } else {
436     for (i=0;i<user->ns;i++) {
437       PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
438       PetscCall(Scatter(Y,user->subq,user->yi_scatter[i],0,0));
439 
440       PetscCall(MatMult(user->Grad,user->suby,user->Twork));
441       PetscCall(VecPointwiseMult(user->Twork,user->Twork,user->Swork));
442       PetscCall(MatMultTranspose(user->Grad,user->Twork,user->subq));
443       PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
444       PetscCall(Gather(Y,user->subq,user->yi_scatter[i],0,0));
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
451 {
452   PetscInt       i;
453   AppCtx         *user;
454 
455   PetscFunctionBegin;
456   PetscCall(MatShellGetContext(J_shell,&user));
457   PetscCall(VecZeroEntries(Y));
458 
459   /* Sdiag = 1./((Av*(1./v)).^2) */
460   PetscCall(VecSet(user->uwork,0));
461   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
462   PetscCall(VecExp(user->uwork));
463   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
464   PetscCall(VecPointwiseMult(user->Sdiag,user->Swork,user->Swork));
465   PetscCall(VecReciprocal(user->Sdiag));
466 
467   for (i=0;i<user->ns;i++) {
468     PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0));
469     PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
470 
471     /* Swork = (Div' * b(:,i)) */
472     PetscCall(MatMult(user->Grad,user->subq,user->Swork));
473 
474     /* Twork = Grad*y(:,i) */
475     PetscCall(MatMult(user->Grad,user->suby,user->Twork));
476 
477     /* Twork = sdiag(Twork) * Swork */
478     PetscCall(VecPointwiseMult(user->Twork,user->Swork,user->Twork));
479 
480     /* Swork = pointwisemult(Sdiag,Twork) */
481     PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Sdiag));
482 
483     /* Ywork = Av' * Swork */
484     PetscCall(MatMultTranspose(user->Av,user->Swork,user->Ywork));
485 
486     /* Ywork = pointwisemult(uwork,Ywork) */
487     PetscCall(VecPointwiseMult(user->Ywork,user->uwork,user->Ywork));
488     PetscCall(VecAXPY(Y,1.0,user->Ywork));
489     PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
495 {
496    /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
497    PetscReal      sum;
498    PetscInt       i;
499    AppCtx         *user = (AppCtx*)ptr;
500 
501    PetscFunctionBegin;
502    PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
503    if (user->ns == 1) {
504      PetscCall(MatMult(user->Grad,user->y,user->Swork));
505      PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
506      PetscCall(MatMultTranspose(user->Grad,user->Swork,C));
507      PetscCall(VecSum(user->y,&sum));
508      sum /= user->ndesign;
509      PetscCall(VecShift(C,sum));
510    } else {
511      for (i=0;i<user->ns;i++) {
512       PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0));
513       PetscCall(Scatter(C,user->subq,user->yi_scatter[i],0,0));
514       PetscCall(MatMult(user->Grad,user->suby,user->Swork));
515       PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
516       PetscCall(MatMultTranspose(user->Grad,user->Swork,user->subq));
517 
518       PetscCall(VecSum(user->suby,&sum));
519       sum /= user->ndesign;
520       PetscCall(VecShift(user->subq,sum));
521 
522       PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0));
523       PetscCall(Gather(C,user->subq,user->yi_scatter[i],0,0));
524      }
525    }
526    PetscCall(VecAXPY(C,-1.0,user->q));
527    PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
531 {
532   PetscFunctionBegin;
533   PetscCall(VecScatterBegin(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD));
534   PetscCall(VecScatterEnd(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD));
535   if (sub2) {
536     PetscCall(VecScatterBegin(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD));
537     PetscCall(VecScatterEnd(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD));
538   }
539   PetscFunctionReturn(0);
540 }
541 
542 PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
543 {
544   PetscFunctionBegin;
545   PetscCall(VecScatterBegin(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE));
546   PetscCall(VecScatterEnd(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE));
547   if (sub2) {
548     PetscCall(VecScatterBegin(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE));
549     PetscCall(VecScatterEnd(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE));
550   }
551   PetscFunctionReturn(0);
552 }
553 
554 PetscErrorCode EllipticInitialize(AppCtx *user)
555 {
556   PetscInt       m,n,i,j,k,l,linear_index,is,js,ks,ls,istart,iend,iblock;
557   Vec            XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork;
558   PetscReal      *x,*y,*z;
559   PetscReal      h,meanut;
560   PetscScalar    hinv,neg_hinv,half = 0.5,sqrt_beta;
561   PetscInt       im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
562   IS             is_alldesign,is_allstate;
563   IS             is_from_d;
564   IS             is_from_y;
565   PetscInt       lo,hi,hi2,lo2,ysubnlocal,dsubnlocal;
566   const PetscInt *ranges, *subranges;
567   PetscMPIInt    size;
568   PetscReal      xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
569   PetscScalar    v,vx,vy,vz;
570   PetscInt       offset,subindex,subvec,nrank,kk;
571 
572   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,
573                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,
574                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,
575                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,
576                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,
577                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,
578                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,
579                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
580 
581   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,
582                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,
583                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,
584                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,
585                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,
586                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,
587                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,
588                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
589 
590   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,
591                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,
592                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,
593                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,
594                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,
595                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,
596                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,
597                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};
598 
599   PetscFunctionBegin;
600   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size));
601   PetscCall(PetscLogStageRegister("Elliptic Setup",&user->stages[0]));
602   PetscCall(PetscLogStagePush(user->stages[0]));
603 
604   /* Create u,y,c,x */
605   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->u));
606   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->y));
607   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->c));
608   PetscCall(VecSetSizes(user->u,PETSC_DECIDE,user->ndesign));
609   PetscCall(VecSetFromOptions(user->u));
610   PetscCall(VecGetLocalSize(user->u,&ysubnlocal));
611   PetscCall(VecSetSizes(user->y,ysubnlocal*user->ns,user->nstate));
612   PetscCall(VecSetSizes(user->c,ysubnlocal*user->ns,user->m));
613   PetscCall(VecSetFromOptions(user->y));
614   PetscCall(VecSetFromOptions(user->c));
615 
616   /*
617      *******************************
618      Create scatters for x <-> y,u
619      *******************************
620 
621      If the state vector y and design vector u are partitioned as
622      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
623      then the solution vector x is organized as
624      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
625      The index sets user->s_is and user->d_is correspond to the indices of the
626      state and design variables owned by the current processor.
627   */
628   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->x));
629 
630   PetscCall(VecGetOwnershipRange(user->y,&lo,&hi));
631   PetscCall(VecGetOwnershipRange(user->u,&lo2,&hi2));
632 
633   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate));
634   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user->s_is));
635   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign));
636   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user->d_is));
637 
638   PetscCall(VecSetSizes(user->x,hi-lo+hi2-lo2,user->n));
639   PetscCall(VecSetFromOptions(user->x));
640 
641   PetscCall(VecScatterCreate(user->x,user->s_is,user->y,is_allstate,&user->state_scatter));
642   PetscCall(VecScatterCreate(user->x,user->d_is,user->u,is_alldesign,&user->design_scatter));
643   PetscCall(ISDestroy(&is_alldesign));
644   PetscCall(ISDestroy(&is_allstate));
645   /*
646      *******************************
647      Create scatter from y to y_1,y_2,...,y_ns
648      *******************************
649   */
650   PetscCall(PetscMalloc1(user->ns,&user->yi_scatter));
651   PetscCall(VecDuplicate(user->u,&user->suby));
652   PetscCall(VecDuplicate(user->u,&user->subq));
653 
654   PetscCall(VecGetOwnershipRange(user->y,&lo2,&hi2));
655   istart = 0;
656   for (i=0; i<user->ns; i++) {
657     PetscCall(VecGetOwnershipRange(user->suby,&lo,&hi));
658     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y));
659     PetscCall(VecScatterCreate(user->y,is_from_y,user->suby,NULL,&user->yi_scatter[i]));
660     istart = istart + hi-lo;
661     PetscCall(ISDestroy(&is_from_y));
662   }
663   /*
664      *******************************
665      Create scatter from d to d_1,d_2,...,d_ns
666      *******************************
667   */
668   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->subd));
669   PetscCall(VecSetSizes(user->subd,PETSC_DECIDE,user->ndata));
670   PetscCall(VecSetFromOptions(user->subd));
671   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->d));
672   PetscCall(VecGetLocalSize(user->subd,&dsubnlocal));
673   PetscCall(VecSetSizes(user->d,dsubnlocal*user->ns,user->ndata*user->ns));
674   PetscCall(VecSetFromOptions(user->d));
675   PetscCall(PetscMalloc1(user->ns,&user->di_scatter));
676 
677   PetscCall(VecGetOwnershipRange(user->d,&lo2,&hi2));
678   istart = 0;
679   for (i=0; i<user->ns; i++) {
680     PetscCall(VecGetOwnershipRange(user->subd,&lo,&hi));
681     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d));
682     PetscCall(VecScatterCreate(user->d,is_from_d,user->subd,NULL,&user->di_scatter[i]));
683     istart = istart + hi-lo;
684     PetscCall(ISDestroy(&is_from_d));
685   }
686 
687   PetscCall(PetscMalloc1(user->mx,&x));
688   PetscCall(PetscMalloc1(user->mx,&y));
689   PetscCall(PetscMalloc1(user->mx,&z));
690 
691   user->ksp_its = 0;
692   user->ksp_its_initial = 0;
693 
694   n = user->mx * user->mx * user->mx;
695   m = 3 * user->mx * user->mx * (user->mx-1);
696   sqrt_beta = PetscSqrtScalar(user->beta);
697 
698   PetscCall(VecCreate(PETSC_COMM_WORLD,&XX));
699   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->q));
700   PetscCall(VecSetSizes(XX,ysubnlocal,n));
701   PetscCall(VecSetSizes(user->q,ysubnlocal*user->ns,user->m));
702   PetscCall(VecSetFromOptions(XX));
703   PetscCall(VecSetFromOptions(user->q));
704 
705   PetscCall(VecDuplicate(XX,&YY));
706   PetscCall(VecDuplicate(XX,&ZZ));
707   PetscCall(VecDuplicate(XX,&XXwork));
708   PetscCall(VecDuplicate(XX,&YYwork));
709   PetscCall(VecDuplicate(XX,&ZZwork));
710   PetscCall(VecDuplicate(XX,&UTwork));
711   PetscCall(VecDuplicate(XX,&user->utrue));
712 
713   /* map for striding q */
714   PetscCall(VecGetOwnershipRanges(user->q,&ranges));
715   PetscCall(VecGetOwnershipRanges(user->u,&subranges));
716 
717   PetscCall(VecGetOwnershipRange(user->q,&lo2,&hi2));
718   PetscCall(VecGetOwnershipRange(user->u,&lo,&hi));
719   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
720   h = 1.0/user->mx;
721   hinv = user->mx;
722   neg_hinv = -hinv;
723 
724   PetscCall(VecGetOwnershipRange(XX,&istart,&iend));
725   for (linear_index=istart; linear_index<iend; linear_index++) {
726     i = linear_index % user->mx;
727     j = ((linear_index-i)/user->mx) % user->mx;
728     k = ((linear_index-i)/user->mx-j) / user->mx;
729     vx = h*(i+0.5);
730     vy = h*(j+0.5);
731     vz = h*(k+0.5);
732     PetscCall(VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES));
733     PetscCall(VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES));
734     PetscCall(VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES));
735     for (is=0; is<2; is++) {
736       for (js=0; js<2; js++) {
737         for (ks=0; ks<2; ks++) {
738           ls = is*4 + js*2 + ks;
739           if (ls<user->ns) {
740             l =ls*n + linear_index;
741             /* remap */
742             subindex = l%n;
743             subvec = l/n;
744             nrank=0;
745             while (subindex >= subranges[nrank+1]) nrank++;
746             offset = subindex - subranges[nrank];
747             istart=0;
748             for (kk=0;kk<nrank;kk++) istart+=user->ns*(subranges[kk+1]-subranges[kk]);
749             istart += (subranges[nrank+1]-subranges[nrank])*subvec;
750             l = istart+offset;
751             v = 100*PetscSinScalar(2*PETSC_PI*(vx+0.25*is))*PetscSinScalar(2*PETSC_PI*(vy+0.25*js))*PetscSinScalar(2*PETSC_PI*(vz+0.25*ks));
752             PetscCall(VecSetValues(user->q,1,&l,&v,INSERT_VALUES));
753           }
754         }
755       }
756     }
757   }
758 
759   PetscCall(VecAssemblyBegin(XX));
760   PetscCall(VecAssemblyEnd(XX));
761   PetscCall(VecAssemblyBegin(YY));
762   PetscCall(VecAssemblyEnd(YY));
763   PetscCall(VecAssemblyBegin(ZZ));
764   PetscCall(VecAssemblyEnd(ZZ));
765   PetscCall(VecAssemblyBegin(user->q));
766   PetscCall(VecAssemblyEnd(user->q));
767 
768   /* Compute true parameter function
769      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
770   PetscCall(VecCopy(XX,XXwork));
771   PetscCall(VecCopy(YY,YYwork));
772   PetscCall(VecCopy(ZZ,ZZwork));
773 
774   PetscCall(VecShift(XXwork,-0.25));
775   PetscCall(VecShift(YYwork,-0.25));
776   PetscCall(VecShift(ZZwork,-0.25));
777 
778   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
779   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
780   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
781 
782   PetscCall(VecCopy(XXwork,UTwork));
783   PetscCall(VecAXPY(UTwork,1.0,YYwork));
784   PetscCall(VecAXPY(UTwork,1.0,ZZwork));
785   PetscCall(VecScale(UTwork,-20.0));
786   PetscCall(VecExp(UTwork));
787   PetscCall(VecCopy(UTwork,user->utrue));
788 
789   PetscCall(VecCopy(XX,XXwork));
790   PetscCall(VecCopy(YY,YYwork));
791   PetscCall(VecCopy(ZZ,ZZwork));
792 
793   PetscCall(VecShift(XXwork,-0.75));
794   PetscCall(VecShift(YYwork,-0.75));
795   PetscCall(VecShift(ZZwork,-0.75));
796 
797   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
798   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
799   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
800 
801   PetscCall(VecCopy(XXwork,UTwork));
802   PetscCall(VecAXPY(UTwork,1.0,YYwork));
803   PetscCall(VecAXPY(UTwork,1.0,ZZwork));
804   PetscCall(VecScale(UTwork,-20.0));
805   PetscCall(VecExp(UTwork));
806 
807   PetscCall(VecAXPY(user->utrue,-1.0,UTwork));
808 
809   PetscCall(VecDestroy(&XX));
810   PetscCall(VecDestroy(&YY));
811   PetscCall(VecDestroy(&ZZ));
812   PetscCall(VecDestroy(&XXwork));
813   PetscCall(VecDestroy(&YYwork));
814   PetscCall(VecDestroy(&ZZwork));
815   PetscCall(VecDestroy(&UTwork));
816 
817   /* Initial guess and reference model */
818   PetscCall(VecDuplicate(user->utrue,&user->ur));
819   PetscCall(VecSum(user->utrue,&meanut));
820   meanut = meanut / n;
821   PetscCall(VecSet(user->ur,meanut));
822   PetscCall(VecCopy(user->ur,user->u));
823 
824   /* Generate Grad matrix */
825   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Grad));
826   PetscCall(MatSetSizes(user->Grad,PETSC_DECIDE,ysubnlocal,m,n));
827   PetscCall(MatSetFromOptions(user->Grad));
828   PetscCall(MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL));
829   PetscCall(MatSeqAIJSetPreallocation(user->Grad,2,NULL));
830   PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
831 
832   for (i=istart; i<iend; i++) {
833     if (i<m/3) {
834       iblock = i / (user->mx-1);
835       j = iblock*user->mx + (i % (user->mx-1));
836       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
837       j = j+1;
838       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
839     }
840     if (i>=m/3 && i<2*m/3) {
841       iblock = (i-m/3) / (user->mx*(user->mx-1));
842       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
843       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
844       j = j + user->mx;
845       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
846     }
847     if (i>=2*m/3) {
848       j = i-2*m/3;
849       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
850       j = j + user->mx*user->mx;
851       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
852     }
853   }
854 
855   PetscCall(MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY));
856   PetscCall(MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY));
857 
858   /* Generate arithmetic averaging matrix Av */
859   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Av));
860   PetscCall(MatSetSizes(user->Av,PETSC_DECIDE,ysubnlocal,m,n));
861   PetscCall(MatSetFromOptions(user->Av));
862   PetscCall(MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL));
863   PetscCall(MatSeqAIJSetPreallocation(user->Av,2,NULL));
864   PetscCall(MatGetOwnershipRange(user->Av,&istart,&iend));
865 
866   for (i=istart; i<iend; i++) {
867     if (i<m/3) {
868       iblock = i / (user->mx-1);
869       j = iblock*user->mx + (i % (user->mx-1));
870       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
871       j = j+1;
872       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
873     }
874     if (i>=m/3 && i<2*m/3) {
875       iblock = (i-m/3) / (user->mx*(user->mx-1));
876       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
877       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
878       j = j + user->mx;
879       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
880     }
881     if (i>=2*m/3) {
882       j = i-2*m/3;
883       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
884       j = j + user->mx*user->mx;
885       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
886     }
887   }
888 
889   PetscCall(MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY));
890   PetscCall(MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY));
891 
892   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->L));
893   PetscCall(MatSetSizes(user->L,PETSC_DECIDE,ysubnlocal,m+n,n));
894   PetscCall(MatSetFromOptions(user->L));
895   PetscCall(MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL));
896   PetscCall(MatSeqAIJSetPreallocation(user->L,2,NULL));
897   PetscCall(MatGetOwnershipRange(user->L,&istart,&iend));
898 
899   for (i=istart; i<iend; i++) {
900     if (i<m/3) {
901       iblock = i / (user->mx-1);
902       j = iblock*user->mx + (i % (user->mx-1));
903       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
904       j = j+1;
905       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
906     }
907     if (i>=m/3 && i<2*m/3) {
908       iblock = (i-m/3) / (user->mx*(user->mx-1));
909       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
910       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
911       j = j + user->mx;
912       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
913     }
914     if (i>=2*m/3 && i<m) {
915       j = i-2*m/3;
916       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
917       j = j + user->mx*user->mx;
918       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
919     }
920     if (i>=m) {
921       j = i - m;
922       PetscCall(MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES));
923     }
924   }
925   PetscCall(MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY));
926   PetscCall(MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY));
927   PetscCall(MatScale(user->L,PetscPowScalar(h,1.5)));
928 
929   /* Generate Div matrix */
930   if (!user->use_ptap) {
931     /* Generate Div matrix */
932     PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Div));
933     PetscCall(MatSetSizes(user->Div,ysubnlocal,PETSC_DECIDE,n,m));
934     PetscCall(MatSetFromOptions(user->Div));
935     PetscCall(MatMPIAIJSetPreallocation(user->Div,4,NULL,4,NULL));
936     PetscCall(MatSeqAIJSetPreallocation(user->Div,6,NULL));
937     PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
938 
939     for (i=istart; i<iend; i++) {
940       if (i<m/3) {
941         iblock = i / (user->mx-1);
942         j = iblock*user->mx + (i % (user->mx-1));
943         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
944         j = j+1;
945         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
946       }
947       if (i>=m/3 && i<2*m/3) {
948         iblock = (i-m/3) / (user->mx*(user->mx-1));
949         j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
950         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
951         j = j + user->mx;
952         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
953       }
954       if (i>=2*m/3) {
955         j = i-2*m/3;
956         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES));
957         j = j + user->mx*user->mx;
958         PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES));
959       }
960     }
961 
962     PetscCall(MatAssemblyBegin(user->Div,MAT_FINAL_ASSEMBLY));
963     PetscCall(MatAssemblyEnd(user->Div,MAT_FINAL_ASSEMBLY));
964     PetscCall(MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork));
965   } else {
966     PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Diag));
967     PetscCall(MatSetSizes(user->Diag,PETSC_DECIDE,PETSC_DECIDE,m,m));
968     PetscCall(MatSetFromOptions(user->Diag));
969     PetscCall(MatMPIAIJSetPreallocation(user->Diag,1,NULL,0,NULL));
970     PetscCall(MatSeqAIJSetPreallocation(user->Diag,1,NULL));
971   }
972 
973   /* Build work vectors and matrices */
974   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->S));
975   PetscCall(VecSetSizes(user->S, PETSC_DECIDE, m));
976   PetscCall(VecSetFromOptions(user->S));
977 
978   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->lwork));
979   PetscCall(VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx));
980   PetscCall(VecSetFromOptions(user->lwork));
981 
982   PetscCall(MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork));
983 
984   PetscCall(VecDuplicate(user->S,&user->Swork));
985   PetscCall(VecDuplicate(user->S,&user->Sdiag));
986   PetscCall(VecDuplicate(user->S,&user->Av_u));
987   PetscCall(VecDuplicate(user->S,&user->Twork));
988   PetscCall(VecDuplicate(user->y,&user->ywork));
989   PetscCall(VecDuplicate(user->u,&user->Ywork));
990   PetscCall(VecDuplicate(user->u,&user->uwork));
991   PetscCall(VecDuplicate(user->u,&user->js_diag));
992   PetscCall(VecDuplicate(user->c,&user->cwork));
993   PetscCall(VecDuplicate(user->d,&user->dwork));
994 
995   /* Create a matrix-free shell user->Jd for computing B*x */
996   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal,user->nstate,user->ndesign,user,&user->Jd));
997   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult));
998   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose));
999 
1000   /* Compute true state function ytrue given utrue */
1001   PetscCall(VecDuplicate(user->y,&user->ytrue));
1002 
1003   /* First compute Av_u = Av*exp(-u) */
1004   PetscCall(VecSet(user->uwork, 0));
1005   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue)); /* Note: user->utrue */
1006   PetscCall(VecExp(user->uwork));
1007   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1008 
1009   /* Next form DSG = Div*S*Grad */
1010   PetscCall(VecCopy(user->Av_u,user->Swork));
1011   PetscCall(VecReciprocal(user->Swork));
1012   if (user->use_ptap) {
1013     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
1014     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG));
1015   } else {
1016     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1017     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
1018 
1019     PetscCall(MatMatMult(user->Divwork,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG));
1020   }
1021 
1022   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE));
1023   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1024 
1025   if (user->use_lrc == PETSC_TRUE) {
1026     v=PetscSqrtReal(1.0 /user->ndesign);
1027     PetscCall(PetscMalloc1(user->ndesign,&user->ones));
1028 
1029     for (i=0;i<user->ndesign;i++) {
1030       user->ones[i]=v;
1031     }
1032     PetscCall(MatCreateDense(PETSC_COMM_WORLD,ysubnlocal,PETSC_DECIDE,user->ndesign,1,user->ones,&user->Ones));
1033     PetscCall(MatAssemblyBegin(user->Ones, MAT_FINAL_ASSEMBLY));
1034     PetscCall(MatAssemblyEnd(user->Ones, MAT_FINAL_ASSEMBLY));
1035     PetscCall(MatCreateLRC(user->DSG,user->Ones,NULL,user->Ones,&user->JsBlock));
1036     PetscCall(MatSetUp(user->JsBlock));
1037   } else {
1038     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
1039     PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal,ysubnlocal,user->ndesign,user->ndesign,user,&user->JsBlock));
1040     PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateBlockMatMult));
1041     PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateBlockMatMult));
1042   }
1043   PetscCall(MatSetOption(user->JsBlock,MAT_SYMMETRIC,PETSC_TRUE));
1044   PetscCall(MatSetOption(user->JsBlock,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1045   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->Js));
1046   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult));
1047   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMult));
1048   PetscCall(MatSetOption(user->Js,MAT_SYMMETRIC,PETSC_TRUE));
1049   PetscCall(MatSetOption(user->Js,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1050 
1051   PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->JsInv));
1052   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateInvMatMult));
1053   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateInvMatMult));
1054   PetscCall(MatSetOption(user->JsInv,MAT_SYMMETRIC,PETSC_TRUE));
1055   PetscCall(MatSetOption(user->JsInv,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1056 
1057   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE));
1058   PetscCall(MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
1059   /* Now solve for ytrue */
1060   PetscCall(KSPCreate(PETSC_COMM_WORLD,&user->solver));
1061   PetscCall(KSPSetFromOptions(user->solver));
1062 
1063   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->DSG));
1064 
1065   PetscCall(MatMult(user->JsInv,user->q,user->ytrue));
1066   /* First compute Av_u = Av*exp(-u) */
1067   PetscCall(VecSet(user->uwork,0));
1068   PetscCall(VecAXPY(user->uwork,-1.0,user->u)); /* Note: user->u */
1069   PetscCall(VecExp(user->uwork));
1070   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1071 
1072   /* Next update DSG = Div*S*Grad  with user->u */
1073   PetscCall(VecCopy(user->Av_u,user->Swork));
1074   PetscCall(VecReciprocal(user->Swork));
1075   if (user->use_ptap) {
1076     PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES));
1077     PetscCall(MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG));
1078   } else {
1079     PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1080     PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u));
1081     PetscCall(MatProductNumeric(user->DSG));
1082   }
1083 
1084   /* Now solve for y */
1085 
1086   PetscCall(MatMult(user->JsInv,user->q,user->y));
1087 
1088   user->ksp_its_initial = user->ksp_its;
1089   user->ksp_its = 0;
1090   /* Construct projection matrix Q (blocks) */
1091   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Q));
1092   PetscCall(MatSetSizes(user->Q,dsubnlocal,ysubnlocal,user->ndata,user->ndesign));
1093   PetscCall(MatSetFromOptions(user->Q));
1094   PetscCall(MatMPIAIJSetPreallocation(user->Q,8,NULL,8,NULL));
1095   PetscCall(MatSeqAIJSetPreallocation(user->Q,8,NULL));
1096 
1097   for (i=0; i<user->mx; i++) {
1098     x[i] = h*(i+0.5);
1099     y[i] = h*(i+0.5);
1100     z[i] = h*(i+0.5);
1101   }
1102   PetscCall(MatGetOwnershipRange(user->Q,&istart,&iend));
1103 
1104   nx = user->mx; ny = user->mx; nz = user->mx;
1105   for (i=istart; i<iend; i++) {
1106 
1107     xri = xr[i];
1108     im = 0;
1109     xim = x[im];
1110     while (xri>xim && im<nx) {
1111       im = im+1;
1112       xim = x[im];
1113     }
1114     indx1 = im-1;
1115     indx2 = im;
1116     dx1 = xri - x[indx1];
1117     dx2 = x[indx2] - xri;
1118 
1119     yri = yr[i];
1120     im = 0;
1121     yim = y[im];
1122     while (yri>yim && im<ny) {
1123       im = im+1;
1124       yim = y[im];
1125     }
1126     indy1 = im-1;
1127     indy2 = im;
1128     dy1 = yri - y[indy1];
1129     dy2 = y[indy2] - yri;
1130 
1131     zri = zr[i];
1132     im = 0;
1133     zim = z[im];
1134     while (zri>zim && im<nz) {
1135       im = im+1;
1136       zim = z[im];
1137     }
1138     indz1 = im-1;
1139     indz2 = im;
1140     dz1 = zri - z[indz1];
1141     dz2 = z[indz2] - zri;
1142 
1143     Dx = x[indx2] - x[indx1];
1144     Dy = y[indy2] - y[indy1];
1145     Dz = z[indz2] - z[indz1];
1146 
1147     j = indx1 + indy1*nx + indz1*nx*ny;
1148     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1149     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1150 
1151     j = indx1 + indy1*nx + indz2*nx*ny;
1152     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1153     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1154 
1155     j = indx1 + indy2*nx + indz1*nx*ny;
1156     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1157     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1158 
1159     j = indx1 + indy2*nx + indz2*nx*ny;
1160     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1161     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1162 
1163     j = indx2 + indy1*nx + indz1*nx*ny;
1164     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1165     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1166 
1167     j = indx2 + indy1*nx + indz2*nx*ny;
1168     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1169     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1170 
1171     j = indx2 + indy2*nx + indz1*nx*ny;
1172     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1173     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1174 
1175     j = indx2 + indy2*nx + indz2*nx*ny;
1176     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1177     PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES));
1178   }
1179 
1180   PetscCall(MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY));
1181   PetscCall(MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY));
1182   /* Create MQ (composed of blocks of Q */
1183   PetscCall(MatCreateShell(PETSC_COMM_WORLD,dsubnlocal*user->ns,PETSC_DECIDE,user->ndata*user->ns,user->nstate,user,&user->MQ));
1184   PetscCall(MatShellSetOperation(user->MQ,MATOP_MULT,(void(*)(void))QMatMult));
1185   PetscCall(MatShellSetOperation(user->MQ,MATOP_MULT_TRANSPOSE,(void(*)(void))QMatMultTranspose));
1186 
1187   /* Add noise to the measurement data */
1188   PetscCall(VecSet(user->ywork,1.0));
1189   PetscCall(VecAYPX(user->ywork,user->noise,user->ytrue));
1190   PetscCall(MatMult(user->MQ,user->ywork,user->d));
1191 
1192   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1193   PetscCall(PetscFree(x));
1194   PetscCall(PetscFree(y));
1195   PetscCall(PetscFree(z));
1196   PetscCall(PetscLogStagePop());
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode EllipticDestroy(AppCtx *user)
1201 {
1202   PetscInt       i;
1203 
1204   PetscFunctionBegin;
1205   PetscCall(MatDestroy(&user->DSG));
1206   PetscCall(KSPDestroy(&user->solver));
1207   PetscCall(MatDestroy(&user->Q));
1208   PetscCall(MatDestroy(&user->MQ));
1209   if (!user->use_ptap) {
1210     PetscCall(MatDestroy(&user->Div));
1211     PetscCall(MatDestroy(&user->Divwork));
1212   } else {
1213     PetscCall(MatDestroy(&user->Diag));
1214   }
1215   if (user->use_lrc) {
1216     PetscCall(MatDestroy(&user->Ones));
1217   }
1218 
1219   PetscCall(MatDestroy(&user->Grad));
1220   PetscCall(MatDestroy(&user->Av));
1221   PetscCall(MatDestroy(&user->Avwork));
1222   PetscCall(MatDestroy(&user->L));
1223   PetscCall(MatDestroy(&user->Js));
1224   PetscCall(MatDestroy(&user->Jd));
1225   PetscCall(MatDestroy(&user->JsBlock));
1226   PetscCall(MatDestroy(&user->JsInv));
1227 
1228   PetscCall(VecDestroy(&user->x));
1229   PetscCall(VecDestroy(&user->u));
1230   PetscCall(VecDestroy(&user->uwork));
1231   PetscCall(VecDestroy(&user->utrue));
1232   PetscCall(VecDestroy(&user->y));
1233   PetscCall(VecDestroy(&user->ywork));
1234   PetscCall(VecDestroy(&user->ytrue));
1235   PetscCall(VecDestroy(&user->c));
1236   PetscCall(VecDestroy(&user->cwork));
1237   PetscCall(VecDestroy(&user->ur));
1238   PetscCall(VecDestroy(&user->q));
1239   PetscCall(VecDestroy(&user->d));
1240   PetscCall(VecDestroy(&user->dwork));
1241   PetscCall(VecDestroy(&user->lwork));
1242   PetscCall(VecDestroy(&user->S));
1243   PetscCall(VecDestroy(&user->Swork));
1244   PetscCall(VecDestroy(&user->Sdiag));
1245   PetscCall(VecDestroy(&user->Ywork));
1246   PetscCall(VecDestroy(&user->Twork));
1247   PetscCall(VecDestroy(&user->Av_u));
1248   PetscCall(VecDestroy(&user->js_diag));
1249   PetscCall(ISDestroy(&user->s_is));
1250   PetscCall(ISDestroy(&user->d_is));
1251   PetscCall(VecDestroy(&user->suby));
1252   PetscCall(VecDestroy(&user->subd));
1253   PetscCall(VecDestroy(&user->subq));
1254   PetscCall(VecScatterDestroy(&user->state_scatter));
1255   PetscCall(VecScatterDestroy(&user->design_scatter));
1256   for (i=0;i<user->ns;i++) {
1257     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1258     PetscCall(VecScatterDestroy(&user->di_scatter[i]));
1259   }
1260   PetscCall(PetscFree(user->yi_scatter));
1261   PetscCall(PetscFree(user->di_scatter));
1262   if (user->use_lrc) {
1263     PetscCall(PetscFree(user->ones));
1264     PetscCall(MatDestroy(&user->Ones));
1265   }
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 PetscErrorCode EllipticMonitor(Tao tao, void *ptr)
1270 {
1271   Vec            X;
1272   PetscReal      unorm,ynorm;
1273   AppCtx         *user = (AppCtx*)ptr;
1274 
1275   PetscFunctionBegin;
1276   PetscCall(TaoGetSolution(tao,&X));
1277   PetscCall(Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
1278   PetscCall(VecAXPY(user->ywork,-1.0,user->ytrue));
1279   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue));
1280   PetscCall(VecNorm(user->uwork,NORM_2,&unorm));
1281   PetscCall(VecNorm(user->ywork,NORM_2,&ynorm));
1282   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm));
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 /*TEST
1287 
1288    build:
1289       requires: !complex
1290 
1291    test:
1292       args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11
1293       requires: !single
1294 
1295    test:
1296       suffix: 2
1297       args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3
1298       requires: !single
1299 
1300 TEST*/
1301