xref: /petsc/src/tao/pde_constrained/tutorials/elliptic.c (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 #include <petsc/private/taoimpl.h>
2 
3 typedef struct {
4   PetscInt n; /* Number of total variables */
5   PetscInt m; /* Number of constraints */
6   PetscInt nstate;
7   PetscInt ndesign;
8   PetscInt mx;    /* grid points in each direction */
9   PetscInt ns;    /* Number of data samples (1<=ns<=8)
10                   Currently only ns=1 is supported */
11   PetscInt ndata; /* Number of data points per sample */
12   IS       s_is;
13   IS       d_is;
14 
15   VecScatter  state_scatter;
16   VecScatter  design_scatter;
17   VecScatter *yi_scatter, *di_scatter;
18   Vec         suby, subq, subd;
19   Mat         Js, Jd, JsPrec, JsInv, JsBlock;
20 
21   PetscReal  alpha; /* Regularization parameter */
22   PetscReal  beta;  /* Weight attributed to ||u||^2 in regularization functional */
23   PetscReal  noise; /* Amount of noise to add to data */
24   PetscReal *ones;
25   Mat        Q;
26   Mat        MQ;
27   Mat        L;
28 
29   Mat Grad;
30   Mat Av, Avwork;
31   Mat Div, Divwork;
32   Mat DSG;
33   Mat Diag, Ones;
34 
35   Vec q;
36   Vec ur; /* reference */
37 
38   Vec d;
39   Vec dwork;
40 
41   Vec x; /* super vec of y,u */
42 
43   Vec y; /* state variables */
44   Vec ywork;
45 
46   Vec ytrue;
47 
48   Vec u; /* design variables */
49   Vec uwork;
50 
51   Vec utrue;
52 
53   Vec js_diag;
54 
55   Vec c; /* constraint vector */
56   Vec cwork;
57 
58   Vec lwork;
59   Vec S;
60   Vec Swork, Twork, Sdiag, Ywork;
61   Vec Av_u;
62 
63   KSP solver;
64   PC  prec;
65 
66   PetscReal     tola, tolb, tolc, told;
67   PetscInt      ksp_its;
68   PetscInt      ksp_its_initial;
69   PetscLogStage stages[10];
70   PetscBool     use_ptap;
71   PetscBool     use_lrc;
72 } AppCtx;
73 
74 PetscErrorCode FormFunction(Tao, Vec, PetscReal *, void *);
75 PetscErrorCode FormGradient(Tao, Vec, Vec, void *);
76 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal *, Vec, void *);
77 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void *);
78 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat, void *);
79 PetscErrorCode FormConstraints(Tao, Vec, Vec, void *);
80 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void *);
81 PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter);
82 PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter);
83 PetscErrorCode EllipticInitialize(AppCtx *);
84 PetscErrorCode EllipticDestroy(AppCtx *);
85 PetscErrorCode EllipticMonitor(Tao, void *);
86 
87 PetscErrorCode StateBlockMatMult(Mat, Vec, Vec);
88 PetscErrorCode StateMatMult(Mat, Vec, Vec);
89 
90 PetscErrorCode StateInvMatMult(Mat, Vec, Vec);
91 PetscErrorCode DesignMatMult(Mat, Vec, Vec);
92 PetscErrorCode DesignMatMultTranspose(Mat, Vec, Vec);
93 
94 PetscErrorCode QMatMult(Mat, Vec, Vec);
95 PetscErrorCode QMatMultTranspose(Mat, Vec, Vec);
96 
97 static char help[] = "";
98 
99 int main(int argc, char **argv) {
100   Vec      x0;
101   Tao      tao;
102   AppCtx   user;
103   PetscInt ntests = 1;
104   PetscInt i;
105 
106   PetscFunctionBeginUser;
107   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
108   user.mx = 8;
109   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "elliptic example", NULL);
110   PetscCall(PetscOptionsInt("-mx", "Number of grid points in each direction", "", user.mx, &user.mx, NULL));
111   user.ns = 6;
112   PetscCall(PetscOptionsInt("-ns", "Number of data samples (1<=ns<=8)", "", user.ns, &user.ns, NULL));
113   user.ndata = 64;
114   PetscCall(PetscOptionsInt("-ndata", "Numbers of data points per sample", "", user.ndata, &user.ndata, NULL));
115   user.alpha = 0.1;
116   PetscCall(PetscOptionsReal("-alpha", "Regularization parameter", "", user.alpha, &user.alpha, NULL));
117   user.beta = 0.00001;
118   PetscCall(PetscOptionsReal("-beta", "Weight attributed to ||u||^2 in regularization functional", "", user.beta, &user.beta, NULL));
119   user.noise = 0.01;
120   PetscCall(PetscOptionsReal("-noise", "Amount of noise to add to data", "", user.noise, &user.noise, NULL));
121 
122   user.use_ptap = PETSC_FALSE;
123   PetscCall(PetscOptionsBool("-use_ptap", "Use ptap matrix for DSG", "", user.use_ptap, &user.use_ptap, NULL));
124   user.use_lrc = PETSC_FALSE;
125   PetscCall(PetscOptionsBool("-use_lrc", "Use lrc matrix for Js", "", user.use_lrc, &user.use_lrc, NULL));
126   PetscCall(PetscOptionsInt("-ntests", "Number of times to repeat TaoSolve", "", ntests, &ntests, NULL));
127   PetscOptionsEnd();
128 
129   user.m       = user.ns * user.mx * user.mx * user.mx; /* number of constraints */
130   user.nstate  = user.m;
131   user.ndesign = user.mx * user.mx * user.mx;
132   user.n       = user.nstate + user.ndesign; /* number of variables */
133 
134   /* Create TAO solver and set desired solution method */
135   PetscCall(TaoCreate(PETSC_COMM_WORLD, &tao));
136   PetscCall(TaoSetType(tao, TAOLCL));
137 
138   /* Set up initial vectors and matrices */
139   PetscCall(EllipticInitialize(&user));
140 
141   PetscCall(Gather(user.x, user.y, user.state_scatter, user.u, user.design_scatter));
142   PetscCall(VecDuplicate(user.x, &x0));
143   PetscCall(VecCopy(user.x, x0));
144 
145   /* Set solution vector with an initial guess */
146   PetscCall(TaoSetSolution(tao, user.x));
147   PetscCall(TaoSetObjective(tao, FormFunction, &user));
148   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
149   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
150 
151   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, &user));
152   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
153 
154   PetscCall(TaoSetStateDesignIS(tao, user.s_is, user.d_is));
155   PetscCall(TaoSetFromOptions(tao));
156 
157   /* SOLVE THE APPLICATION */
158   PetscCall(PetscLogStageRegister("Trials", &user.stages[1]));
159   PetscCall(PetscLogStagePush(user.stages[1]));
160   for (i = 0; i < ntests; i++) {
161     PetscCall(TaoSolve(tao));
162     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "KSP Iterations = %" PetscInt_FMT "\n", user.ksp_its));
163     PetscCall(VecCopy(x0, user.x));
164   }
165   PetscCall(PetscLogStagePop());
166   PetscCall(PetscBarrier((PetscObject)user.x));
167   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "KSP iterations within initialization: "));
168   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", user.ksp_its_initial));
169 
170   PetscCall(TaoDestroy(&tao));
171   PetscCall(VecDestroy(&x0));
172   PetscCall(EllipticDestroy(&user));
173   PetscCall(PetscFinalize());
174   return 0;
175 }
176 /* ------------------------------------------------------------------- */
177 /*
178    dwork = Qy - d
179    lwork = L*(u-ur)
180    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
181 */
182 PetscErrorCode FormFunction(Tao tao, Vec X, PetscReal *f, void *ptr) {
183   PetscReal d1 = 0, d2 = 0;
184   AppCtx   *user = (AppCtx *)ptr;
185 
186   PetscFunctionBegin;
187   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
188   PetscCall(MatMult(user->MQ, user->y, user->dwork));
189   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
190   PetscCall(VecDot(user->dwork, user->dwork, &d1));
191   PetscCall(VecWAXPY(user->uwork, -1.0, user->ur, user->u));
192   PetscCall(MatMult(user->L, user->uwork, user->lwork));
193   PetscCall(VecDot(user->lwork, user->lwork, &d2));
194   *f = 0.5 * (d1 + user->alpha * d2);
195   PetscFunctionReturn(0);
196 }
197 
198 /* ------------------------------------------------------------------- */
199 /*
200     state: g_s = Q' *(Qy - d)
201     design: g_d = alpha*L'*L*(u-ur)
202 */
203 PetscErrorCode FormGradient(Tao tao, Vec X, Vec G, void *ptr) {
204   AppCtx *user = (AppCtx *)ptr;
205 
206   PetscFunctionBegin;
207   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
208   PetscCall(MatMult(user->MQ, user->y, user->dwork));
209   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
210   PetscCall(MatMultTranspose(user->MQ, user->dwork, user->ywork));
211   PetscCall(VecWAXPY(user->uwork, -1.0, user->ur, user->u));
212   PetscCall(MatMult(user->L, user->uwork, user->lwork));
213   PetscCall(MatMultTranspose(user->L, user->lwork, user->uwork));
214   PetscCall(VecScale(user->uwork, user->alpha));
215   PetscCall(Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
216   PetscFunctionReturn(0);
217 }
218 
219 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr) {
220   PetscReal d1, d2;
221   AppCtx   *user = (AppCtx *)ptr;
222 
223   PetscFunctionBegin;
224   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
225   PetscCall(MatMult(user->MQ, user->y, user->dwork));
226   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
227   PetscCall(VecDot(user->dwork, user->dwork, &d1));
228   PetscCall(MatMultTranspose(user->MQ, user->dwork, user->ywork));
229 
230   PetscCall(VecWAXPY(user->uwork, -1.0, user->ur, user->u));
231   PetscCall(MatMult(user->L, user->uwork, user->lwork));
232   PetscCall(VecDot(user->lwork, user->lwork, &d2));
233   PetscCall(MatMultTranspose(user->L, user->lwork, user->uwork));
234   PetscCall(VecScale(user->uwork, user->alpha));
235   *f = 0.5 * (d1 + user->alpha * d2);
236   PetscCall(Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
237   PetscFunctionReturn(0);
238 }
239 
240 /* ------------------------------------------------------------------- */
241 /* A
242 MatShell object
243 */
244 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr) {
245   AppCtx *user = (AppCtx *)ptr;
246 
247   PetscFunctionBegin;
248   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
249   /* DSG = Div * (1/Av_u) * Grad */
250   PetscCall(VecSet(user->uwork, 0));
251   PetscCall(VecAXPY(user->uwork, -1.0, user->u));
252   PetscCall(VecExp(user->uwork));
253   PetscCall(MatMult(user->Av, user->uwork, user->Av_u));
254   PetscCall(VecCopy(user->Av_u, user->Swork));
255   PetscCall(VecReciprocal(user->Swork));
256   if (user->use_ptap) {
257     PetscCall(MatDiagonalSet(user->Diag, user->Swork, INSERT_VALUES));
258     PetscCall(MatPtAP(user->Diag, user->Grad, MAT_REUSE_MATRIX, 1.0, &user->DSG));
259   } else {
260     PetscCall(MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN));
261     PetscCall(MatDiagonalScale(user->Divwork, NULL, user->Swork));
262     PetscCall(MatProductNumeric(user->DSG));
263   }
264   PetscFunctionReturn(0);
265 }
266 /* ------------------------------------------------------------------- */
267 /* B */
268 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr) {
269   AppCtx *user = (AppCtx *)ptr;
270 
271   PetscFunctionBegin;
272   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
273   PetscFunctionReturn(0);
274 }
275 
276 PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y) {
277   PetscReal sum;
278   AppCtx   *user;
279 
280   PetscFunctionBegin;
281   PetscCall(MatShellGetContext(J_shell, &user));
282   PetscCall(MatMult(user->DSG, X, Y));
283   PetscCall(VecSum(X, &sum));
284   sum /= user->ndesign;
285   PetscCall(VecShift(Y, sum));
286   PetscFunctionReturn(0);
287 }
288 
289 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y) {
290   PetscInt i;
291   AppCtx  *user;
292 
293   PetscFunctionBegin;
294   PetscCall(MatShellGetContext(J_shell, &user));
295   if (user->ns == 1) {
296     PetscCall(MatMult(user->JsBlock, X, Y));
297   } else {
298     for (i = 0; i < user->ns; i++) {
299       PetscCall(Scatter(X, user->subq, user->yi_scatter[i], 0, 0));
300       PetscCall(Scatter(Y, user->suby, user->yi_scatter[i], 0, 0));
301       PetscCall(MatMult(user->JsBlock, user->subq, user->suby));
302       PetscCall(Gather(Y, user->suby, user->yi_scatter[i], 0, 0));
303     }
304   }
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y) {
309   PetscInt its, i;
310   AppCtx  *user;
311 
312   PetscFunctionBegin;
313   PetscCall(MatShellGetContext(J_shell, &user));
314   PetscCall(KSPSetOperators(user->solver, user->JsBlock, user->DSG));
315   if (Y == user->ytrue) {
316     /* First solve is done using true solution to set up problem */
317     PetscCall(KSPSetTolerances(user->solver, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
318   } else {
319     PetscCall(KSPSetTolerances(user->solver, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
320   }
321   if (user->ns == 1) {
322     PetscCall(KSPSolve(user->solver, X, Y));
323     PetscCall(KSPGetIterationNumber(user->solver, &its));
324     user->ksp_its += its;
325   } else {
326     for (i = 0; i < user->ns; i++) {
327       PetscCall(Scatter(X, user->subq, user->yi_scatter[i], 0, 0));
328       PetscCall(Scatter(Y, user->suby, user->yi_scatter[i], 0, 0));
329       PetscCall(KSPSolve(user->solver, user->subq, user->suby));
330       PetscCall(KSPGetIterationNumber(user->solver, &its));
331       user->ksp_its += its;
332       PetscCall(Gather(Y, user->suby, user->yi_scatter[i], 0, 0));
333     }
334   }
335   PetscFunctionReturn(0);
336 }
337 PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y) {
338   AppCtx  *user;
339   PetscInt i;
340 
341   PetscFunctionBegin;
342   PetscCall(MatShellGetContext(J_shell, &user));
343   if (user->ns == 1) {
344     PetscCall(MatMult(user->Q, X, Y));
345   } else {
346     for (i = 0; i < user->ns; i++) {
347       PetscCall(Scatter(X, user->subq, user->yi_scatter[i], 0, 0));
348       PetscCall(Scatter(Y, user->subd, user->di_scatter[i], 0, 0));
349       PetscCall(MatMult(user->Q, user->subq, user->subd));
350       PetscCall(Gather(Y, user->subd, user->di_scatter[i], 0, 0));
351     }
352   }
353   PetscFunctionReturn(0);
354 }
355 
356 PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y) {
357   AppCtx  *user;
358   PetscInt i;
359 
360   PetscFunctionBegin;
361   PetscCall(MatShellGetContext(J_shell, &user));
362   if (user->ns == 1) {
363     PetscCall(MatMultTranspose(user->Q, X, Y));
364   } else {
365     for (i = 0; i < user->ns; i++) {
366       PetscCall(Scatter(X, user->subd, user->di_scatter[i], 0, 0));
367       PetscCall(Scatter(Y, user->suby, user->yi_scatter[i], 0, 0));
368       PetscCall(MatMultTranspose(user->Q, user->subd, user->suby));
369       PetscCall(Gather(Y, user->suby, user->yi_scatter[i], 0, 0));
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y) {
376   PetscInt i;
377   AppCtx  *user;
378 
379   PetscFunctionBegin;
380   PetscCall(MatShellGetContext(J_shell, &user));
381 
382   /* sdiag(1./v) */
383   PetscCall(VecSet(user->uwork, 0));
384   PetscCall(VecAXPY(user->uwork, -1.0, user->u));
385   PetscCall(VecExp(user->uwork));
386 
387   /* sdiag(1./((Av*(1./v)).^2)) */
388   PetscCall(MatMult(user->Av, user->uwork, user->Swork));
389   PetscCall(VecPointwiseMult(user->Swork, user->Swork, user->Swork));
390   PetscCall(VecReciprocal(user->Swork));
391 
392   /* (Av * (sdiag(1./v) * b)) */
393   PetscCall(VecPointwiseMult(user->uwork, user->uwork, X));
394   PetscCall(MatMult(user->Av, user->uwork, user->Twork));
395 
396   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
397   PetscCall(VecPointwiseMult(user->Swork, user->Twork, user->Swork));
398 
399   if (user->ns == 1) {
400     /* (sdiag(Grad*y(:,i)) */
401     PetscCall(MatMult(user->Grad, user->y, user->Twork));
402 
403     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
404     PetscCall(VecPointwiseMult(user->Swork, user->Twork, user->Swork));
405     PetscCall(MatMultTranspose(user->Grad, user->Swork, Y));
406   } else {
407     for (i = 0; i < user->ns; i++) {
408       PetscCall(Scatter(user->y, user->suby, user->yi_scatter[i], 0, 0));
409       PetscCall(Scatter(Y, user->subq, user->yi_scatter[i], 0, 0));
410 
411       PetscCall(MatMult(user->Grad, user->suby, user->Twork));
412       PetscCall(VecPointwiseMult(user->Twork, user->Twork, user->Swork));
413       PetscCall(MatMultTranspose(user->Grad, user->Twork, user->subq));
414       PetscCall(Gather(user->y, user->suby, user->yi_scatter[i], 0, 0));
415       PetscCall(Gather(Y, user->subq, user->yi_scatter[i], 0, 0));
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y) {
422   PetscInt i;
423   AppCtx  *user;
424 
425   PetscFunctionBegin;
426   PetscCall(MatShellGetContext(J_shell, &user));
427   PetscCall(VecZeroEntries(Y));
428 
429   /* Sdiag = 1./((Av*(1./v)).^2) */
430   PetscCall(VecSet(user->uwork, 0));
431   PetscCall(VecAXPY(user->uwork, -1.0, user->u));
432   PetscCall(VecExp(user->uwork));
433   PetscCall(MatMult(user->Av, user->uwork, user->Swork));
434   PetscCall(VecPointwiseMult(user->Sdiag, user->Swork, user->Swork));
435   PetscCall(VecReciprocal(user->Sdiag));
436 
437   for (i = 0; i < user->ns; i++) {
438     PetscCall(Scatter(X, user->subq, user->yi_scatter[i], 0, 0));
439     PetscCall(Scatter(user->y, user->suby, user->yi_scatter[i], 0, 0));
440 
441     /* Swork = (Div' * b(:,i)) */
442     PetscCall(MatMult(user->Grad, user->subq, user->Swork));
443 
444     /* Twork = Grad*y(:,i) */
445     PetscCall(MatMult(user->Grad, user->suby, user->Twork));
446 
447     /* Twork = sdiag(Twork) * Swork */
448     PetscCall(VecPointwiseMult(user->Twork, user->Swork, user->Twork));
449 
450     /* Swork = pointwisemult(Sdiag,Twork) */
451     PetscCall(VecPointwiseMult(user->Swork, user->Twork, user->Sdiag));
452 
453     /* Ywork = Av' * Swork */
454     PetscCall(MatMultTranspose(user->Av, user->Swork, user->Ywork));
455 
456     /* Ywork = pointwisemult(uwork,Ywork) */
457     PetscCall(VecPointwiseMult(user->Ywork, user->uwork, user->Ywork));
458     PetscCall(VecAXPY(Y, 1.0, user->Ywork));
459     PetscCall(Gather(user->y, user->suby, user->yi_scatter[i], 0, 0));
460   }
461   PetscFunctionReturn(0);
462 }
463 
464 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr) {
465   /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
466   PetscReal sum;
467   PetscInt  i;
468   AppCtx   *user = (AppCtx *)ptr;
469 
470   PetscFunctionBegin;
471   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
472   if (user->ns == 1) {
473     PetscCall(MatMult(user->Grad, user->y, user->Swork));
474     PetscCall(VecPointwiseDivide(user->Swork, user->Swork, user->Av_u));
475     PetscCall(MatMultTranspose(user->Grad, user->Swork, C));
476     PetscCall(VecSum(user->y, &sum));
477     sum /= user->ndesign;
478     PetscCall(VecShift(C, sum));
479   } else {
480     for (i = 0; i < user->ns; i++) {
481       PetscCall(Scatter(user->y, user->suby, user->yi_scatter[i], 0, 0));
482       PetscCall(Scatter(C, user->subq, user->yi_scatter[i], 0, 0));
483       PetscCall(MatMult(user->Grad, user->suby, user->Swork));
484       PetscCall(VecPointwiseDivide(user->Swork, user->Swork, user->Av_u));
485       PetscCall(MatMultTranspose(user->Grad, user->Swork, user->subq));
486 
487       PetscCall(VecSum(user->suby, &sum));
488       sum /= user->ndesign;
489       PetscCall(VecShift(user->subq, sum));
490 
491       PetscCall(Gather(user->y, user->suby, user->yi_scatter[i], 0, 0));
492       PetscCall(Gather(C, user->subq, user->yi_scatter[i], 0, 0));
493     }
494   }
495   PetscCall(VecAXPY(C, -1.0, user->q));
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2) {
500   PetscFunctionBegin;
501   PetscCall(VecScatterBegin(scat1, x, sub1, INSERT_VALUES, SCATTER_FORWARD));
502   PetscCall(VecScatterEnd(scat1, x, sub1, INSERT_VALUES, SCATTER_FORWARD));
503   if (sub2) {
504     PetscCall(VecScatterBegin(scat2, x, sub2, INSERT_VALUES, SCATTER_FORWARD));
505     PetscCall(VecScatterEnd(scat2, x, sub2, INSERT_VALUES, SCATTER_FORWARD));
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2) {
511   PetscFunctionBegin;
512   PetscCall(VecScatterBegin(scat1, sub1, x, INSERT_VALUES, SCATTER_REVERSE));
513   PetscCall(VecScatterEnd(scat1, sub1, x, INSERT_VALUES, SCATTER_REVERSE));
514   if (sub2) {
515     PetscCall(VecScatterBegin(scat2, sub2, x, INSERT_VALUES, SCATTER_REVERSE));
516     PetscCall(VecScatterEnd(scat2, sub2, x, INSERT_VALUES, SCATTER_REVERSE));
517   }
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode EllipticInitialize(AppCtx *user) {
522   PetscInt        m, n, i, j, k, l, linear_index, is, js, ks, ls, istart, iend, iblock;
523   Vec             XX, YY, ZZ, XXwork, YYwork, ZZwork, UTwork;
524   PetscReal      *x, *y, *z;
525   PetscReal       h, meanut;
526   PetscScalar     hinv, neg_hinv, half = 0.5, sqrt_beta;
527   PetscInt        im, indx1, indx2, indy1, indy2, indz1, indz2, nx, ny, nz;
528   IS              is_alldesign, is_allstate;
529   IS              is_from_d;
530   IS              is_from_y;
531   PetscInt        lo, hi, hi2, lo2, ysubnlocal, dsubnlocal;
532   const PetscInt *ranges, *subranges;
533   PetscMPIInt     size;
534   PetscReal       xri, yri, zri, xim, yim, zim, dx1, dx2, dy1, dy2, dz1, dz2, Dx, Dy, Dz;
535   PetscScalar     v, vx, vy, vz;
536   PetscInt        offset, subindex, subvec, nrank, kk;
537 
538   PetscScalar xr[64] = {0.4970, 0.8498, 0.7814, 0.6268, 0.7782, 0.6402, 0.3617, 0.3160, 0.3610, 0.5298, 0.6987, 0.3331, 0.7962, 0.5596, 0.3866, 0.6774, 0.5407, 0.4518, 0.6702, 0.6061, 0.7580, 0.8997,
539                         0.5198, 0.8326, 0.2138, 0.9198, 0.3000, 0.2833, 0.8288, 0.7076, 0.1820, 0.0728, 0.8447, 0.2367, 0.3239, 0.6413, 0.3114, 0.4731, 0.1192, 0.9273, 0.5724, 0.4331, 0.5136, 0.3547,
540                         0.4413, 0.2602, 0.5698, 0.7278, 0.5261, 0.6230, 0.2454, 0.3948, 0.7479, 0.6582, 0.4660, 0.5594, 0.7574, 0.1143, 0.5900, 0.1065, 0.4260, 0.3294, 0.8276, 0.0756};
541 
542   PetscScalar yr[64] = {0.7345, 0.9120, 0.9288, 0.7528, 0.4463, 0.4985, 0.2497, 0.6256, 0.3425, 0.9026, 0.6983, 0.4230, 0.7140, 0.2970, 0.4474, 0.8792, 0.6604, 0.2485, 0.7968, 0.6127, 0.1796, 0.2437,
543                         0.5938, 0.6137, 0.3867, 0.5658, 0.4575, 0.1009, 0.0863, 0.3361, 0.0738, 0.3985, 0.6602, 0.1437, 0.0934, 0.5983, 0.5950, 0.0763, 0.0768, 0.2288, 0.5761, 0.1129, 0.3841, 0.6150,
544                         0.6904, 0.6686, 0.1361, 0.4601, 0.4491, 0.3716, 0.1969, 0.6537, 0.6743, 0.6991, 0.4811, 0.5480, 0.1684, 0.4569, 0.6889, 0.8437, 0.3015, 0.2854, 0.8199, 0.2658};
545 
546   PetscScalar zr[64] = {0.7668, 0.8573, 0.2654, 0.2719, 0.1060, 0.1311, 0.6232, 0.2295, 0.8009, 0.2147, 0.2119, 0.9325, 0.4473, 0.3600, 0.3374, 0.3819, 0.4066, 0.5801, 0.1673, 0.0959, 0.4638, 0.8236,
547                         0.8800, 0.2939, 0.2028, 0.8262, 0.2706, 0.6276, 0.9085, 0.6443, 0.8241, 0.0712, 0.1824, 0.7789, 0.4389, 0.8415, 0.7055, 0.6639, 0.3653, 0.2078, 0.1987, 0.2297, 0.4321, 0.8115,
548                         0.4915, 0.7764, 0.4657, 0.4627, 0.4569, 0.4232, 0.8514, 0.0674, 0.3227, 0.1055, 0.6690, 0.6313, 0.9226, 0.5461, 0.4126, 0.2364, 0.6096, 0.7042, 0.3914, 0.0711};
549 
550   PetscFunctionBegin;
551   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
552   PetscCall(PetscLogStageRegister("Elliptic Setup", &user->stages[0]));
553   PetscCall(PetscLogStagePush(user->stages[0]));
554 
555   /* Create u,y,c,x */
556   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->u));
557   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->y));
558   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->c));
559   PetscCall(VecSetSizes(user->u, PETSC_DECIDE, user->ndesign));
560   PetscCall(VecSetFromOptions(user->u));
561   PetscCall(VecGetLocalSize(user->u, &ysubnlocal));
562   PetscCall(VecSetSizes(user->y, ysubnlocal * user->ns, user->nstate));
563   PetscCall(VecSetSizes(user->c, ysubnlocal * user->ns, user->m));
564   PetscCall(VecSetFromOptions(user->y));
565   PetscCall(VecSetFromOptions(user->c));
566 
567   /*
568      *******************************
569      Create scatters for x <-> y,u
570      *******************************
571 
572      If the state vector y and design vector u are partitioned as
573      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
574      then the solution vector x is organized as
575      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
576      The index sets user->s_is and user->d_is correspond to the indices of the
577      state and design variables owned by the current processor.
578   */
579   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->x));
580 
581   PetscCall(VecGetOwnershipRange(user->y, &lo, &hi));
582   PetscCall(VecGetOwnershipRange(user->u, &lo2, &hi2));
583 
584   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_allstate));
585   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + lo2, 1, &user->s_is));
586   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, lo2, 1, &is_alldesign));
587   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, hi + lo2, 1, &user->d_is));
588 
589   PetscCall(VecSetSizes(user->x, hi - lo + hi2 - lo2, user->n));
590   PetscCall(VecSetFromOptions(user->x));
591 
592   PetscCall(VecScatterCreate(user->x, user->s_is, user->y, is_allstate, &user->state_scatter));
593   PetscCall(VecScatterCreate(user->x, user->d_is, user->u, is_alldesign, &user->design_scatter));
594   PetscCall(ISDestroy(&is_alldesign));
595   PetscCall(ISDestroy(&is_allstate));
596   /*
597      *******************************
598      Create scatter from y to y_1,y_2,...,y_ns
599      *******************************
600   */
601   PetscCall(PetscMalloc1(user->ns, &user->yi_scatter));
602   PetscCall(VecDuplicate(user->u, &user->suby));
603   PetscCall(VecDuplicate(user->u, &user->subq));
604 
605   PetscCall(VecGetOwnershipRange(user->y, &lo2, &hi2));
606   istart = 0;
607   for (i = 0; i < user->ns; i++) {
608     PetscCall(VecGetOwnershipRange(user->suby, &lo, &hi));
609     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo2 + istart, 1, &is_from_y));
610     PetscCall(VecScatterCreate(user->y, is_from_y, user->suby, NULL, &user->yi_scatter[i]));
611     istart = istart + hi - lo;
612     PetscCall(ISDestroy(&is_from_y));
613   }
614   /*
615      *******************************
616      Create scatter from d to d_1,d_2,...,d_ns
617      *******************************
618   */
619   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->subd));
620   PetscCall(VecSetSizes(user->subd, PETSC_DECIDE, user->ndata));
621   PetscCall(VecSetFromOptions(user->subd));
622   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->d));
623   PetscCall(VecGetLocalSize(user->subd, &dsubnlocal));
624   PetscCall(VecSetSizes(user->d, dsubnlocal * user->ns, user->ndata * user->ns));
625   PetscCall(VecSetFromOptions(user->d));
626   PetscCall(PetscMalloc1(user->ns, &user->di_scatter));
627 
628   PetscCall(VecGetOwnershipRange(user->d, &lo2, &hi2));
629   istart = 0;
630   for (i = 0; i < user->ns; i++) {
631     PetscCall(VecGetOwnershipRange(user->subd, &lo, &hi));
632     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo2 + istart, 1, &is_from_d));
633     PetscCall(VecScatterCreate(user->d, is_from_d, user->subd, NULL, &user->di_scatter[i]));
634     istart = istart + hi - lo;
635     PetscCall(ISDestroy(&is_from_d));
636   }
637 
638   PetscCall(PetscMalloc1(user->mx, &x));
639   PetscCall(PetscMalloc1(user->mx, &y));
640   PetscCall(PetscMalloc1(user->mx, &z));
641 
642   user->ksp_its         = 0;
643   user->ksp_its_initial = 0;
644 
645   n         = user->mx * user->mx * user->mx;
646   m         = 3 * user->mx * user->mx * (user->mx - 1);
647   sqrt_beta = PetscSqrtScalar(user->beta);
648 
649   PetscCall(VecCreate(PETSC_COMM_WORLD, &XX));
650   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->q));
651   PetscCall(VecSetSizes(XX, ysubnlocal, n));
652   PetscCall(VecSetSizes(user->q, ysubnlocal * user->ns, user->m));
653   PetscCall(VecSetFromOptions(XX));
654   PetscCall(VecSetFromOptions(user->q));
655 
656   PetscCall(VecDuplicate(XX, &YY));
657   PetscCall(VecDuplicate(XX, &ZZ));
658   PetscCall(VecDuplicate(XX, &XXwork));
659   PetscCall(VecDuplicate(XX, &YYwork));
660   PetscCall(VecDuplicate(XX, &ZZwork));
661   PetscCall(VecDuplicate(XX, &UTwork));
662   PetscCall(VecDuplicate(XX, &user->utrue));
663 
664   /* map for striding q */
665   PetscCall(VecGetOwnershipRanges(user->q, &ranges));
666   PetscCall(VecGetOwnershipRanges(user->u, &subranges));
667 
668   PetscCall(VecGetOwnershipRange(user->q, &lo2, &hi2));
669   PetscCall(VecGetOwnershipRange(user->u, &lo, &hi));
670   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
671   h        = 1.0 / user->mx;
672   hinv     = user->mx;
673   neg_hinv = -hinv;
674 
675   PetscCall(VecGetOwnershipRange(XX, &istart, &iend));
676   for (linear_index = istart; linear_index < iend; linear_index++) {
677     i  = linear_index % user->mx;
678     j  = ((linear_index - i) / user->mx) % user->mx;
679     k  = ((linear_index - i) / user->mx - j) / user->mx;
680     vx = h * (i + 0.5);
681     vy = h * (j + 0.5);
682     vz = h * (k + 0.5);
683     PetscCall(VecSetValues(XX, 1, &linear_index, &vx, INSERT_VALUES));
684     PetscCall(VecSetValues(YY, 1, &linear_index, &vy, INSERT_VALUES));
685     PetscCall(VecSetValues(ZZ, 1, &linear_index, &vz, INSERT_VALUES));
686     for (is = 0; is < 2; is++) {
687       for (js = 0; js < 2; js++) {
688         for (ks = 0; ks < 2; ks++) {
689           ls = is * 4 + js * 2 + ks;
690           if (ls < user->ns) {
691             l        = ls * n + linear_index;
692             /* remap */
693             subindex = l % n;
694             subvec   = l / n;
695             nrank    = 0;
696             while (subindex >= subranges[nrank + 1]) nrank++;
697             offset = subindex - subranges[nrank];
698             istart = 0;
699             for (kk = 0; kk < nrank; kk++) istart += user->ns * (subranges[kk + 1] - subranges[kk]);
700             istart += (subranges[nrank + 1] - subranges[nrank]) * subvec;
701             l = istart + offset;
702             v = 100 * PetscSinScalar(2 * PETSC_PI * (vx + 0.25 * is)) * PetscSinScalar(2 * PETSC_PI * (vy + 0.25 * js)) * PetscSinScalar(2 * PETSC_PI * (vz + 0.25 * ks));
703             PetscCall(VecSetValues(user->q, 1, &l, &v, INSERT_VALUES));
704           }
705         }
706       }
707     }
708   }
709 
710   PetscCall(VecAssemblyBegin(XX));
711   PetscCall(VecAssemblyEnd(XX));
712   PetscCall(VecAssemblyBegin(YY));
713   PetscCall(VecAssemblyEnd(YY));
714   PetscCall(VecAssemblyBegin(ZZ));
715   PetscCall(VecAssemblyEnd(ZZ));
716   PetscCall(VecAssemblyBegin(user->q));
717   PetscCall(VecAssemblyEnd(user->q));
718 
719   /* Compute true parameter function
720      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
721   PetscCall(VecCopy(XX, XXwork));
722   PetscCall(VecCopy(YY, YYwork));
723   PetscCall(VecCopy(ZZ, ZZwork));
724 
725   PetscCall(VecShift(XXwork, -0.25));
726   PetscCall(VecShift(YYwork, -0.25));
727   PetscCall(VecShift(ZZwork, -0.25));
728 
729   PetscCall(VecPointwiseMult(XXwork, XXwork, XXwork));
730   PetscCall(VecPointwiseMult(YYwork, YYwork, YYwork));
731   PetscCall(VecPointwiseMult(ZZwork, ZZwork, ZZwork));
732 
733   PetscCall(VecCopy(XXwork, UTwork));
734   PetscCall(VecAXPY(UTwork, 1.0, YYwork));
735   PetscCall(VecAXPY(UTwork, 1.0, ZZwork));
736   PetscCall(VecScale(UTwork, -20.0));
737   PetscCall(VecExp(UTwork));
738   PetscCall(VecCopy(UTwork, user->utrue));
739 
740   PetscCall(VecCopy(XX, XXwork));
741   PetscCall(VecCopy(YY, YYwork));
742   PetscCall(VecCopy(ZZ, ZZwork));
743 
744   PetscCall(VecShift(XXwork, -0.75));
745   PetscCall(VecShift(YYwork, -0.75));
746   PetscCall(VecShift(ZZwork, -0.75));
747 
748   PetscCall(VecPointwiseMult(XXwork, XXwork, XXwork));
749   PetscCall(VecPointwiseMult(YYwork, YYwork, YYwork));
750   PetscCall(VecPointwiseMult(ZZwork, ZZwork, ZZwork));
751 
752   PetscCall(VecCopy(XXwork, UTwork));
753   PetscCall(VecAXPY(UTwork, 1.0, YYwork));
754   PetscCall(VecAXPY(UTwork, 1.0, ZZwork));
755   PetscCall(VecScale(UTwork, -20.0));
756   PetscCall(VecExp(UTwork));
757 
758   PetscCall(VecAXPY(user->utrue, -1.0, UTwork));
759 
760   PetscCall(VecDestroy(&XX));
761   PetscCall(VecDestroy(&YY));
762   PetscCall(VecDestroy(&ZZ));
763   PetscCall(VecDestroy(&XXwork));
764   PetscCall(VecDestroy(&YYwork));
765   PetscCall(VecDestroy(&ZZwork));
766   PetscCall(VecDestroy(&UTwork));
767 
768   /* Initial guess and reference model */
769   PetscCall(VecDuplicate(user->utrue, &user->ur));
770   PetscCall(VecSum(user->utrue, &meanut));
771   meanut = meanut / n;
772   PetscCall(VecSet(user->ur, meanut));
773   PetscCall(VecCopy(user->ur, user->u));
774 
775   /* Generate Grad matrix */
776   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Grad));
777   PetscCall(MatSetSizes(user->Grad, PETSC_DECIDE, ysubnlocal, m, n));
778   PetscCall(MatSetFromOptions(user->Grad));
779   PetscCall(MatMPIAIJSetPreallocation(user->Grad, 2, NULL, 2, NULL));
780   PetscCall(MatSeqAIJSetPreallocation(user->Grad, 2, NULL));
781   PetscCall(MatGetOwnershipRange(user->Grad, &istart, &iend));
782 
783   for (i = istart; i < iend; i++) {
784     if (i < m / 3) {
785       iblock = i / (user->mx - 1);
786       j      = iblock * user->mx + (i % (user->mx - 1));
787       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
788       j = j + 1;
789       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES));
790     }
791     if (i >= m / 3 && i < 2 * m / 3) {
792       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
793       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
794       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
795       j = j + user->mx;
796       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES));
797     }
798     if (i >= 2 * m / 3) {
799       j = i - 2 * m / 3;
800       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
801       j = j + user->mx * user->mx;
802       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES));
803     }
804   }
805 
806   PetscCall(MatAssemblyBegin(user->Grad, MAT_FINAL_ASSEMBLY));
807   PetscCall(MatAssemblyEnd(user->Grad, MAT_FINAL_ASSEMBLY));
808 
809   /* Generate arithmetic averaging matrix Av */
810   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Av));
811   PetscCall(MatSetSizes(user->Av, PETSC_DECIDE, ysubnlocal, m, n));
812   PetscCall(MatSetFromOptions(user->Av));
813   PetscCall(MatMPIAIJSetPreallocation(user->Av, 2, NULL, 2, NULL));
814   PetscCall(MatSeqAIJSetPreallocation(user->Av, 2, NULL));
815   PetscCall(MatGetOwnershipRange(user->Av, &istart, &iend));
816 
817   for (i = istart; i < iend; i++) {
818     if (i < m / 3) {
819       iblock = i / (user->mx - 1);
820       j      = iblock * user->mx + (i % (user->mx - 1));
821       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
822       j = j + 1;
823       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
824     }
825     if (i >= m / 3 && i < 2 * m / 3) {
826       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
827       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
828       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
829       j = j + user->mx;
830       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
831     }
832     if (i >= 2 * m / 3) {
833       j = i - 2 * m / 3;
834       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
835       j = j + user->mx * user->mx;
836       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
837     }
838   }
839 
840   PetscCall(MatAssemblyBegin(user->Av, MAT_FINAL_ASSEMBLY));
841   PetscCall(MatAssemblyEnd(user->Av, MAT_FINAL_ASSEMBLY));
842 
843   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->L));
844   PetscCall(MatSetSizes(user->L, PETSC_DECIDE, ysubnlocal, m + n, n));
845   PetscCall(MatSetFromOptions(user->L));
846   PetscCall(MatMPIAIJSetPreallocation(user->L, 2, NULL, 2, NULL));
847   PetscCall(MatSeqAIJSetPreallocation(user->L, 2, NULL));
848   PetscCall(MatGetOwnershipRange(user->L, &istart, &iend));
849 
850   for (i = istart; i < iend; i++) {
851     if (i < m / 3) {
852       iblock = i / (user->mx - 1);
853       j      = iblock * user->mx + (i % (user->mx - 1));
854       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
855       j = j + 1;
856       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES));
857     }
858     if (i >= m / 3 && i < 2 * m / 3) {
859       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
860       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
861       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
862       j = j + user->mx;
863       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES));
864     }
865     if (i >= 2 * m / 3 && i < m) {
866       j = i - 2 * m / 3;
867       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
868       j = j + user->mx * user->mx;
869       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES));
870     }
871     if (i >= m) {
872       j = i - m;
873       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &sqrt_beta, INSERT_VALUES));
874     }
875   }
876   PetscCall(MatAssemblyBegin(user->L, MAT_FINAL_ASSEMBLY));
877   PetscCall(MatAssemblyEnd(user->L, MAT_FINAL_ASSEMBLY));
878   PetscCall(MatScale(user->L, PetscPowScalar(h, 1.5)));
879 
880   /* Generate Div matrix */
881   if (!user->use_ptap) {
882     /* Generate Div matrix */
883     PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Div));
884     PetscCall(MatSetSizes(user->Div, ysubnlocal, PETSC_DECIDE, n, m));
885     PetscCall(MatSetFromOptions(user->Div));
886     PetscCall(MatMPIAIJSetPreallocation(user->Div, 4, NULL, 4, NULL));
887     PetscCall(MatSeqAIJSetPreallocation(user->Div, 6, NULL));
888     PetscCall(MatGetOwnershipRange(user->Grad, &istart, &iend));
889 
890     for (i = istart; i < iend; i++) {
891       if (i < m / 3) {
892         iblock = i / (user->mx - 1);
893         j      = iblock * user->mx + (i % (user->mx - 1));
894         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &neg_hinv, INSERT_VALUES));
895         j = j + 1;
896         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &hinv, INSERT_VALUES));
897       }
898       if (i >= m / 3 && i < 2 * m / 3) {
899         iblock = (i - m / 3) / (user->mx * (user->mx - 1));
900         j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
901         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &neg_hinv, INSERT_VALUES));
902         j = j + user->mx;
903         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &hinv, INSERT_VALUES));
904       }
905       if (i >= 2 * m / 3) {
906         j = i - 2 * m / 3;
907         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &neg_hinv, INSERT_VALUES));
908         j = j + user->mx * user->mx;
909         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &hinv, INSERT_VALUES));
910       }
911     }
912 
913     PetscCall(MatAssemblyBegin(user->Div, MAT_FINAL_ASSEMBLY));
914     PetscCall(MatAssemblyEnd(user->Div, MAT_FINAL_ASSEMBLY));
915     PetscCall(MatDuplicate(user->Div, MAT_SHARE_NONZERO_PATTERN, &user->Divwork));
916   } else {
917     PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Diag));
918     PetscCall(MatSetSizes(user->Diag, PETSC_DECIDE, PETSC_DECIDE, m, m));
919     PetscCall(MatSetFromOptions(user->Diag));
920     PetscCall(MatMPIAIJSetPreallocation(user->Diag, 1, NULL, 0, NULL));
921     PetscCall(MatSeqAIJSetPreallocation(user->Diag, 1, NULL));
922   }
923 
924   /* Build work vectors and matrices */
925   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->S));
926   PetscCall(VecSetSizes(user->S, PETSC_DECIDE, m));
927   PetscCall(VecSetFromOptions(user->S));
928 
929   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->lwork));
930   PetscCall(VecSetSizes(user->lwork, PETSC_DECIDE, m + user->mx * user->mx * user->mx));
931   PetscCall(VecSetFromOptions(user->lwork));
932 
933   PetscCall(MatDuplicate(user->Av, MAT_SHARE_NONZERO_PATTERN, &user->Avwork));
934 
935   PetscCall(VecDuplicate(user->S, &user->Swork));
936   PetscCall(VecDuplicate(user->S, &user->Sdiag));
937   PetscCall(VecDuplicate(user->S, &user->Av_u));
938   PetscCall(VecDuplicate(user->S, &user->Twork));
939   PetscCall(VecDuplicate(user->y, &user->ywork));
940   PetscCall(VecDuplicate(user->u, &user->Ywork));
941   PetscCall(VecDuplicate(user->u, &user->uwork));
942   PetscCall(VecDuplicate(user->u, &user->js_diag));
943   PetscCall(VecDuplicate(user->c, &user->cwork));
944   PetscCall(VecDuplicate(user->d, &user->dwork));
945 
946   /* Create a matrix-free shell user->Jd for computing B*x */
947   PetscCall(MatCreateShell(PETSC_COMM_WORLD, ysubnlocal * user->ns, ysubnlocal, user->nstate, user->ndesign, user, &user->Jd));
948   PetscCall(MatShellSetOperation(user->Jd, MATOP_MULT, (void (*)(void))DesignMatMult));
949   PetscCall(MatShellSetOperation(user->Jd, MATOP_MULT_TRANSPOSE, (void (*)(void))DesignMatMultTranspose));
950 
951   /* Compute true state function ytrue given utrue */
952   PetscCall(VecDuplicate(user->y, &user->ytrue));
953 
954   /* First compute Av_u = Av*exp(-u) */
955   PetscCall(VecSet(user->uwork, 0));
956   PetscCall(VecAXPY(user->uwork, -1.0, user->utrue)); /* Note: user->utrue */
957   PetscCall(VecExp(user->uwork));
958   PetscCall(MatMult(user->Av, user->uwork, user->Av_u));
959 
960   /* Next form DSG = Div*S*Grad */
961   PetscCall(VecCopy(user->Av_u, user->Swork));
962   PetscCall(VecReciprocal(user->Swork));
963   if (user->use_ptap) {
964     PetscCall(MatDiagonalSet(user->Diag, user->Swork, INSERT_VALUES));
965     PetscCall(MatPtAP(user->Diag, user->Grad, MAT_INITIAL_MATRIX, 1.0, &user->DSG));
966   } else {
967     PetscCall(MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN));
968     PetscCall(MatDiagonalScale(user->Divwork, NULL, user->Swork));
969 
970     PetscCall(MatMatMult(user->Divwork, user->Grad, MAT_INITIAL_MATRIX, 1.0, &user->DSG));
971   }
972 
973   PetscCall(MatSetOption(user->DSG, MAT_SYMMETRIC, PETSC_TRUE));
974   PetscCall(MatSetOption(user->DSG, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
975 
976   if (user->use_lrc == PETSC_TRUE) {
977     v = PetscSqrtReal(1.0 / user->ndesign);
978     PetscCall(PetscMalloc1(user->ndesign, &user->ones));
979 
980     for (i = 0; i < user->ndesign; i++) user->ones[i] = v;
981     PetscCall(MatCreateDense(PETSC_COMM_WORLD, ysubnlocal, PETSC_DECIDE, user->ndesign, 1, user->ones, &user->Ones));
982     PetscCall(MatCreateLRC(user->DSG, user->Ones, NULL, user->Ones, &user->JsBlock));
983     PetscCall(MatSetUp(user->JsBlock));
984   } else {
985     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
986     PetscCall(MatCreateShell(PETSC_COMM_WORLD, ysubnlocal, ysubnlocal, user->ndesign, user->ndesign, user, &user->JsBlock));
987     PetscCall(MatShellSetOperation(user->JsBlock, MATOP_MULT, (void (*)(void))StateBlockMatMult));
988     PetscCall(MatShellSetOperation(user->JsBlock, MATOP_MULT_TRANSPOSE, (void (*)(void))StateBlockMatMult));
989   }
990   PetscCall(MatSetOption(user->JsBlock, MAT_SYMMETRIC, PETSC_TRUE));
991   PetscCall(MatSetOption(user->JsBlock, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
992   PetscCall(MatCreateShell(PETSC_COMM_WORLD, ysubnlocal * user->ns, ysubnlocal * user->ns, user->nstate, user->nstate, user, &user->Js));
993   PetscCall(MatShellSetOperation(user->Js, MATOP_MULT, (void (*)(void))StateMatMult));
994   PetscCall(MatShellSetOperation(user->Js, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatMult));
995   PetscCall(MatSetOption(user->Js, MAT_SYMMETRIC, PETSC_TRUE));
996   PetscCall(MatSetOption(user->Js, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
997 
998   PetscCall(MatCreateShell(PETSC_COMM_WORLD, ysubnlocal * user->ns, ysubnlocal * user->ns, user->nstate, user->nstate, user, &user->JsInv));
999   PetscCall(MatShellSetOperation(user->JsInv, MATOP_MULT, (void (*)(void))StateInvMatMult));
1000   PetscCall(MatShellSetOperation(user->JsInv, MATOP_MULT_TRANSPOSE, (void (*)(void))StateInvMatMult));
1001   PetscCall(MatSetOption(user->JsInv, MAT_SYMMETRIC, PETSC_TRUE));
1002   PetscCall(MatSetOption(user->JsInv, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
1003 
1004   PetscCall(MatSetOption(user->DSG, MAT_SYMMETRIC, PETSC_TRUE));
1005   PetscCall(MatSetOption(user->DSG, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
1006   /* Now solve for ytrue */
1007   PetscCall(KSPCreate(PETSC_COMM_WORLD, &user->solver));
1008   PetscCall(KSPSetFromOptions(user->solver));
1009 
1010   PetscCall(KSPSetOperators(user->solver, user->JsBlock, user->DSG));
1011 
1012   PetscCall(MatMult(user->JsInv, user->q, user->ytrue));
1013   /* First compute Av_u = Av*exp(-u) */
1014   PetscCall(VecSet(user->uwork, 0));
1015   PetscCall(VecAXPY(user->uwork, -1.0, user->u)); /* Note: user->u */
1016   PetscCall(VecExp(user->uwork));
1017   PetscCall(MatMult(user->Av, user->uwork, user->Av_u));
1018 
1019   /* Next update DSG = Div*S*Grad  with user->u */
1020   PetscCall(VecCopy(user->Av_u, user->Swork));
1021   PetscCall(VecReciprocal(user->Swork));
1022   if (user->use_ptap) {
1023     PetscCall(MatDiagonalSet(user->Diag, user->Swork, INSERT_VALUES));
1024     PetscCall(MatPtAP(user->Diag, user->Grad, MAT_REUSE_MATRIX, 1.0, &user->DSG));
1025   } else {
1026     PetscCall(MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN));
1027     PetscCall(MatDiagonalScale(user->Divwork, NULL, user->Av_u));
1028     PetscCall(MatProductNumeric(user->DSG));
1029   }
1030 
1031   /* Now solve for y */
1032 
1033   PetscCall(MatMult(user->JsInv, user->q, user->y));
1034 
1035   user->ksp_its_initial = user->ksp_its;
1036   user->ksp_its         = 0;
1037   /* Construct projection matrix Q (blocks) */
1038   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Q));
1039   PetscCall(MatSetSizes(user->Q, dsubnlocal, ysubnlocal, user->ndata, user->ndesign));
1040   PetscCall(MatSetFromOptions(user->Q));
1041   PetscCall(MatMPIAIJSetPreallocation(user->Q, 8, NULL, 8, NULL));
1042   PetscCall(MatSeqAIJSetPreallocation(user->Q, 8, NULL));
1043 
1044   for (i = 0; i < user->mx; i++) {
1045     x[i] = h * (i + 0.5);
1046     y[i] = h * (i + 0.5);
1047     z[i] = h * (i + 0.5);
1048   }
1049   PetscCall(MatGetOwnershipRange(user->Q, &istart, &iend));
1050 
1051   nx = user->mx;
1052   ny = user->mx;
1053   nz = user->mx;
1054   for (i = istart; i < iend; i++) {
1055     xri = xr[i];
1056     im  = 0;
1057     xim = x[im];
1058     while (xri > xim && im < nx) {
1059       im  = im + 1;
1060       xim = x[im];
1061     }
1062     indx1 = im - 1;
1063     indx2 = im;
1064     dx1   = xri - x[indx1];
1065     dx2   = x[indx2] - xri;
1066 
1067     yri = yr[i];
1068     im  = 0;
1069     yim = y[im];
1070     while (yri > yim && im < ny) {
1071       im  = im + 1;
1072       yim = y[im];
1073     }
1074     indy1 = im - 1;
1075     indy2 = im;
1076     dy1   = yri - y[indy1];
1077     dy2   = y[indy2] - yri;
1078 
1079     zri = zr[i];
1080     im  = 0;
1081     zim = z[im];
1082     while (zri > zim && im < nz) {
1083       im  = im + 1;
1084       zim = z[im];
1085     }
1086     indz1 = im - 1;
1087     indz2 = im;
1088     dz1   = zri - z[indz1];
1089     dz2   = z[indz2] - zri;
1090 
1091     Dx = x[indx2] - x[indx1];
1092     Dy = y[indy2] - y[indy1];
1093     Dz = z[indz2] - z[indz1];
1094 
1095     j = indx1 + indy1 * nx + indz1 * nx * ny;
1096     v = (1 - dx1 / Dx) * (1 - dy1 / Dy) * (1 - dz1 / Dz);
1097     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1098 
1099     j = indx1 + indy1 * nx + indz2 * nx * ny;
1100     v = (1 - dx1 / Dx) * (1 - dy1 / Dy) * (1 - dz2 / Dz);
1101     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1102 
1103     j = indx1 + indy2 * nx + indz1 * nx * ny;
1104     v = (1 - dx1 / Dx) * (1 - dy2 / Dy) * (1 - dz1 / Dz);
1105     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1106 
1107     j = indx1 + indy2 * nx + indz2 * nx * ny;
1108     v = (1 - dx1 / Dx) * (1 - dy2 / Dy) * (1 - dz2 / Dz);
1109     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1110 
1111     j = indx2 + indy1 * nx + indz1 * nx * ny;
1112     v = (1 - dx2 / Dx) * (1 - dy1 / Dy) * (1 - dz1 / Dz);
1113     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1114 
1115     j = indx2 + indy1 * nx + indz2 * nx * ny;
1116     v = (1 - dx2 / Dx) * (1 - dy1 / Dy) * (1 - dz2 / Dz);
1117     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1118 
1119     j = indx2 + indy2 * nx + indz1 * nx * ny;
1120     v = (1 - dx2 / Dx) * (1 - dy2 / Dy) * (1 - dz1 / Dz);
1121     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1122 
1123     j = indx2 + indy2 * nx + indz2 * nx * ny;
1124     v = (1 - dx2 / Dx) * (1 - dy2 / Dy) * (1 - dz2 / Dz);
1125     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1126   }
1127 
1128   PetscCall(MatAssemblyBegin(user->Q, MAT_FINAL_ASSEMBLY));
1129   PetscCall(MatAssemblyEnd(user->Q, MAT_FINAL_ASSEMBLY));
1130   /* Create MQ (composed of blocks of Q */
1131   PetscCall(MatCreateShell(PETSC_COMM_WORLD, dsubnlocal * user->ns, PETSC_DECIDE, user->ndata * user->ns, user->nstate, user, &user->MQ));
1132   PetscCall(MatShellSetOperation(user->MQ, MATOP_MULT, (void (*)(void))QMatMult));
1133   PetscCall(MatShellSetOperation(user->MQ, MATOP_MULT_TRANSPOSE, (void (*)(void))QMatMultTranspose));
1134 
1135   /* Add noise to the measurement data */
1136   PetscCall(VecSet(user->ywork, 1.0));
1137   PetscCall(VecAYPX(user->ywork, user->noise, user->ytrue));
1138   PetscCall(MatMult(user->MQ, user->ywork, user->d));
1139 
1140   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1141   PetscCall(PetscFree(x));
1142   PetscCall(PetscFree(y));
1143   PetscCall(PetscFree(z));
1144   PetscCall(PetscLogStagePop());
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 PetscErrorCode EllipticDestroy(AppCtx *user) {
1149   PetscInt i;
1150 
1151   PetscFunctionBegin;
1152   PetscCall(MatDestroy(&user->DSG));
1153   PetscCall(KSPDestroy(&user->solver));
1154   PetscCall(MatDestroy(&user->Q));
1155   PetscCall(MatDestroy(&user->MQ));
1156   if (!user->use_ptap) {
1157     PetscCall(MatDestroy(&user->Div));
1158     PetscCall(MatDestroy(&user->Divwork));
1159   } else {
1160     PetscCall(MatDestroy(&user->Diag));
1161   }
1162   if (user->use_lrc) PetscCall(MatDestroy(&user->Ones));
1163 
1164   PetscCall(MatDestroy(&user->Grad));
1165   PetscCall(MatDestroy(&user->Av));
1166   PetscCall(MatDestroy(&user->Avwork));
1167   PetscCall(MatDestroy(&user->L));
1168   PetscCall(MatDestroy(&user->Js));
1169   PetscCall(MatDestroy(&user->Jd));
1170   PetscCall(MatDestroy(&user->JsBlock));
1171   PetscCall(MatDestroy(&user->JsInv));
1172 
1173   PetscCall(VecDestroy(&user->x));
1174   PetscCall(VecDestroy(&user->u));
1175   PetscCall(VecDestroy(&user->uwork));
1176   PetscCall(VecDestroy(&user->utrue));
1177   PetscCall(VecDestroy(&user->y));
1178   PetscCall(VecDestroy(&user->ywork));
1179   PetscCall(VecDestroy(&user->ytrue));
1180   PetscCall(VecDestroy(&user->c));
1181   PetscCall(VecDestroy(&user->cwork));
1182   PetscCall(VecDestroy(&user->ur));
1183   PetscCall(VecDestroy(&user->q));
1184   PetscCall(VecDestroy(&user->d));
1185   PetscCall(VecDestroy(&user->dwork));
1186   PetscCall(VecDestroy(&user->lwork));
1187   PetscCall(VecDestroy(&user->S));
1188   PetscCall(VecDestroy(&user->Swork));
1189   PetscCall(VecDestroy(&user->Sdiag));
1190   PetscCall(VecDestroy(&user->Ywork));
1191   PetscCall(VecDestroy(&user->Twork));
1192   PetscCall(VecDestroy(&user->Av_u));
1193   PetscCall(VecDestroy(&user->js_diag));
1194   PetscCall(ISDestroy(&user->s_is));
1195   PetscCall(ISDestroy(&user->d_is));
1196   PetscCall(VecDestroy(&user->suby));
1197   PetscCall(VecDestroy(&user->subd));
1198   PetscCall(VecDestroy(&user->subq));
1199   PetscCall(VecScatterDestroy(&user->state_scatter));
1200   PetscCall(VecScatterDestroy(&user->design_scatter));
1201   for (i = 0; i < user->ns; i++) {
1202     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1203     PetscCall(VecScatterDestroy(&user->di_scatter[i]));
1204   }
1205   PetscCall(PetscFree(user->yi_scatter));
1206   PetscCall(PetscFree(user->di_scatter));
1207   if (user->use_lrc) {
1208     PetscCall(PetscFree(user->ones));
1209     PetscCall(MatDestroy(&user->Ones));
1210   }
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode EllipticMonitor(Tao tao, void *ptr) {
1215   Vec       X;
1216   PetscReal unorm, ynorm;
1217   AppCtx   *user = (AppCtx *)ptr;
1218 
1219   PetscFunctionBegin;
1220   PetscCall(TaoGetSolution(tao, &X));
1221   PetscCall(Scatter(X, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
1222   PetscCall(VecAXPY(user->ywork, -1.0, user->ytrue));
1223   PetscCall(VecAXPY(user->uwork, -1.0, user->utrue));
1224   PetscCall(VecNorm(user->uwork, NORM_2, &unorm));
1225   PetscCall(VecNorm(user->ywork, NORM_2, &ynorm));
1226   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n", (double)unorm, (double)ynorm));
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 /*TEST
1231 
1232    build:
1233       requires: !complex
1234 
1235    test:
1236       args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11
1237       requires: !single
1238 
1239    test:
1240       suffix: 2
1241       args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3
1242       requires: !single
1243 
1244 TEST*/
1245