xref: /petsc/src/tao/pde_constrained/tutorials/elliptic.c (revision df4cd43f92eaa320656440c40edb1046daee8f75)
1 #include <petsc/private/taoimpl.h>
2 
3 typedef struct {
4   PetscInt n; /* Number of total variables */
5   PetscInt m; /* Number of constraints */
6   PetscInt nstate;
7   PetscInt ndesign;
8   PetscInt mx;    /* grid points in each direction */
9   PetscInt ns;    /* Number of data samples (1<=ns<=8)
10                   Currently only ns=1 is supported */
11   PetscInt ndata; /* Number of data points per sample */
12   IS       s_is;
13   IS       d_is;
14 
15   VecScatter  state_scatter;
16   VecScatter  design_scatter;
17   VecScatter *yi_scatter, *di_scatter;
18   Vec         suby, subq, subd;
19   Mat         Js, Jd, JsPrec, JsInv, JsBlock;
20 
21   PetscReal  alpha; /* Regularization parameter */
22   PetscReal  beta;  /* Weight attributed to ||u||^2 in regularization functional */
23   PetscReal  noise; /* Amount of noise to add to data */
24   PetscReal *ones;
25   Mat        Q;
26   Mat        MQ;
27   Mat        L;
28 
29   Mat Grad;
30   Mat Av, Avwork;
31   Mat Div, Divwork;
32   Mat DSG;
33   Mat Diag, Ones;
34 
35   Vec q;
36   Vec ur; /* reference */
37 
38   Vec d;
39   Vec dwork;
40 
41   Vec x; /* super vec of y,u */
42 
43   Vec y; /* state variables */
44   Vec ywork;
45 
46   Vec ytrue;
47 
48   Vec u; /* design variables */
49   Vec uwork;
50 
51   Vec utrue;
52 
53   Vec js_diag;
54 
55   Vec c; /* constraint vector */
56   Vec cwork;
57 
58   Vec lwork;
59   Vec S;
60   Vec Swork, Twork, Sdiag, Ywork;
61   Vec Av_u;
62 
63   KSP solver;
64   PC  prec;
65 
66   PetscReal     tola, tolb, tolc, told;
67   PetscInt      ksp_its;
68   PetscInt      ksp_its_initial;
69   PetscLogStage stages[10];
70   PetscBool     use_ptap;
71   PetscBool     use_lrc;
72 } AppCtx;
73 
74 PetscErrorCode FormFunction(Tao, Vec, PetscReal *, void *);
75 PetscErrorCode FormGradient(Tao, Vec, Vec, void *);
76 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal *, Vec, void *);
77 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void *);
78 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat, void *);
79 PetscErrorCode FormConstraints(Tao, Vec, Vec, void *);
80 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void *);
81 PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter);
82 PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter);
83 PetscErrorCode EllipticInitialize(AppCtx *);
84 PetscErrorCode EllipticDestroy(AppCtx *);
85 PetscErrorCode EllipticMonitor(Tao, void *);
86 
87 PetscErrorCode StateBlockMatMult(Mat, Vec, Vec);
88 PetscErrorCode StateMatMult(Mat, Vec, Vec);
89 
90 PetscErrorCode StateInvMatMult(Mat, Vec, Vec);
91 PetscErrorCode DesignMatMult(Mat, Vec, Vec);
92 PetscErrorCode DesignMatMultTranspose(Mat, Vec, Vec);
93 
94 PetscErrorCode QMatMult(Mat, Vec, Vec);
95 PetscErrorCode QMatMultTranspose(Mat, Vec, Vec);
96 
97 static char help[] = "";
98 
99 int main(int argc, char **argv)
100 {
101   Vec      x0;
102   Tao      tao;
103   AppCtx   user;
104   PetscInt ntests = 1;
105   PetscInt i;
106 
107   PetscFunctionBeginUser;
108   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
109   user.mx = 8;
110   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "elliptic example", NULL);
111   PetscCall(PetscOptionsInt("-mx", "Number of grid points in each direction", "", user.mx, &user.mx, NULL));
112   user.ns = 6;
113   PetscCall(PetscOptionsInt("-ns", "Number of data samples (1<=ns<=8)", "", user.ns, &user.ns, NULL));
114   user.ndata = 64;
115   PetscCall(PetscOptionsInt("-ndata", "Numbers of data points per sample", "", user.ndata, &user.ndata, NULL));
116   user.alpha = 0.1;
117   PetscCall(PetscOptionsReal("-alpha", "Regularization parameter", "", user.alpha, &user.alpha, NULL));
118   user.beta = 0.00001;
119   PetscCall(PetscOptionsReal("-beta", "Weight attributed to ||u||^2 in regularization functional", "", user.beta, &user.beta, NULL));
120   user.noise = 0.01;
121   PetscCall(PetscOptionsReal("-noise", "Amount of noise to add to data", "", user.noise, &user.noise, NULL));
122 
123   user.use_ptap = PETSC_FALSE;
124   PetscCall(PetscOptionsBool("-use_ptap", "Use ptap matrix for DSG", "", user.use_ptap, &user.use_ptap, NULL));
125   user.use_lrc = PETSC_FALSE;
126   PetscCall(PetscOptionsBool("-use_lrc", "Use lrc matrix for Js", "", user.use_lrc, &user.use_lrc, NULL));
127   PetscCall(PetscOptionsInt("-ntests", "Number of times to repeat TaoSolve", "", ntests, &ntests, NULL));
128   PetscOptionsEnd();
129 
130   user.m       = user.ns * user.mx * user.mx * user.mx; /* number of constraints */
131   user.nstate  = user.m;
132   user.ndesign = user.mx * user.mx * user.mx;
133   user.n       = user.nstate + user.ndesign; /* number of variables */
134 
135   /* Create TAO solver and set desired solution method */
136   PetscCall(TaoCreate(PETSC_COMM_WORLD, &tao));
137   PetscCall(TaoSetType(tao, TAOLCL));
138 
139   /* Set up initial vectors and matrices */
140   PetscCall(EllipticInitialize(&user));
141 
142   PetscCall(Gather(user.x, user.y, user.state_scatter, user.u, user.design_scatter));
143   PetscCall(VecDuplicate(user.x, &x0));
144   PetscCall(VecCopy(user.x, x0));
145 
146   /* Set solution vector with an initial guess */
147   PetscCall(TaoSetSolution(tao, user.x));
148   PetscCall(TaoSetObjective(tao, FormFunction, &user));
149   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
150   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
151 
152   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, &user));
153   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
154 
155   PetscCall(TaoSetStateDesignIS(tao, user.s_is, user.d_is));
156   PetscCall(TaoSetFromOptions(tao));
157 
158   /* SOLVE THE APPLICATION */
159   PetscCall(PetscLogStageRegister("Trials", &user.stages[1]));
160   PetscCall(PetscLogStagePush(user.stages[1]));
161   for (i = 0; i < ntests; i++) {
162     PetscCall(TaoSolve(tao));
163     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "KSP Iterations = %" PetscInt_FMT "\n", user.ksp_its));
164     PetscCall(VecCopy(x0, user.x));
165   }
166   PetscCall(PetscLogStagePop());
167   PetscCall(PetscBarrier((PetscObject)user.x));
168   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "KSP iterations within initialization: "));
169   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", user.ksp_its_initial));
170 
171   PetscCall(TaoDestroy(&tao));
172   PetscCall(VecDestroy(&x0));
173   PetscCall(EllipticDestroy(&user));
174   PetscCall(PetscFinalize());
175   return 0;
176 }
177 /* ------------------------------------------------------------------- */
178 /*
179    dwork = Qy - d
180    lwork = L*(u-ur)
181    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
182 */
183 PetscErrorCode FormFunction(Tao tao, Vec X, PetscReal *f, void *ptr)
184 {
185   PetscReal d1 = 0, d2 = 0;
186   AppCtx   *user = (AppCtx *)ptr;
187 
188   PetscFunctionBegin;
189   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
190   PetscCall(MatMult(user->MQ, user->y, user->dwork));
191   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
192   PetscCall(VecDot(user->dwork, user->dwork, &d1));
193   PetscCall(VecWAXPY(user->uwork, -1.0, user->ur, user->u));
194   PetscCall(MatMult(user->L, user->uwork, user->lwork));
195   PetscCall(VecDot(user->lwork, user->lwork, &d2));
196   *f = 0.5 * (d1 + user->alpha * d2);
197   PetscFunctionReturn(PETSC_SUCCESS);
198 }
199 
200 /* ------------------------------------------------------------------- */
201 /*
202     state: g_s = Q' *(Qy - d)
203     design: g_d = alpha*L'*L*(u-ur)
204 */
205 PetscErrorCode FormGradient(Tao tao, Vec X, Vec G, void *ptr)
206 {
207   AppCtx *user = (AppCtx *)ptr;
208 
209   PetscFunctionBegin;
210   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
211   PetscCall(MatMult(user->MQ, user->y, user->dwork));
212   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
213   PetscCall(MatMultTranspose(user->MQ, user->dwork, user->ywork));
214   PetscCall(VecWAXPY(user->uwork, -1.0, user->ur, user->u));
215   PetscCall(MatMult(user->L, user->uwork, user->lwork));
216   PetscCall(MatMultTranspose(user->L, user->lwork, user->uwork));
217   PetscCall(VecScale(user->uwork, user->alpha));
218   PetscCall(Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
219   PetscFunctionReturn(PETSC_SUCCESS);
220 }
221 
222 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
223 {
224   PetscReal d1, d2;
225   AppCtx   *user = (AppCtx *)ptr;
226 
227   PetscFunctionBegin;
228   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
229   PetscCall(MatMult(user->MQ, user->y, user->dwork));
230   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
231   PetscCall(VecDot(user->dwork, user->dwork, &d1));
232   PetscCall(MatMultTranspose(user->MQ, user->dwork, user->ywork));
233 
234   PetscCall(VecWAXPY(user->uwork, -1.0, user->ur, user->u));
235   PetscCall(MatMult(user->L, user->uwork, user->lwork));
236   PetscCall(VecDot(user->lwork, user->lwork, &d2));
237   PetscCall(MatMultTranspose(user->L, user->lwork, user->uwork));
238   PetscCall(VecScale(user->uwork, user->alpha));
239   *f = 0.5 * (d1 + user->alpha * d2);
240   PetscCall(Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
241   PetscFunctionReturn(PETSC_SUCCESS);
242 }
243 
244 /* ------------------------------------------------------------------- */
245 /* A
246 MatShell object
247 */
248 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
249 {
250   AppCtx *user = (AppCtx *)ptr;
251 
252   PetscFunctionBegin;
253   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
254   /* DSG = Div * (1/Av_u) * Grad */
255   PetscCall(VecSet(user->uwork, 0));
256   PetscCall(VecAXPY(user->uwork, -1.0, user->u));
257   PetscCall(VecExp(user->uwork));
258   PetscCall(MatMult(user->Av, user->uwork, user->Av_u));
259   PetscCall(VecCopy(user->Av_u, user->Swork));
260   PetscCall(VecReciprocal(user->Swork));
261   if (user->use_ptap) {
262     PetscCall(MatDiagonalSet(user->Diag, user->Swork, INSERT_VALUES));
263     PetscCall(MatPtAP(user->Diag, user->Grad, MAT_REUSE_MATRIX, 1.0, &user->DSG));
264   } else {
265     PetscCall(MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN));
266     PetscCall(MatDiagonalScale(user->Divwork, NULL, user->Swork));
267     PetscCall(MatProductNumeric(user->DSG));
268   }
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 /* ------------------------------------------------------------------- */
272 /* B */
273 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
274 {
275   AppCtx *user = (AppCtx *)ptr;
276 
277   PetscFunctionBegin;
278   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
279   PetscFunctionReturn(PETSC_SUCCESS);
280 }
281 
282 PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y)
283 {
284   PetscReal sum;
285   AppCtx   *user;
286 
287   PetscFunctionBegin;
288   PetscCall(MatShellGetContext(J_shell, &user));
289   PetscCall(MatMult(user->DSG, X, Y));
290   PetscCall(VecSum(X, &sum));
291   sum /= user->ndesign;
292   PetscCall(VecShift(Y, sum));
293   PetscFunctionReturn(PETSC_SUCCESS);
294 }
295 
296 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
297 {
298   PetscInt i;
299   AppCtx  *user;
300 
301   PetscFunctionBegin;
302   PetscCall(MatShellGetContext(J_shell, &user));
303   if (user->ns == 1) {
304     PetscCall(MatMult(user->JsBlock, X, Y));
305   } else {
306     for (i = 0; i < user->ns; i++) {
307       PetscCall(Scatter(X, user->subq, user->yi_scatter[i], 0, 0));
308       PetscCall(Scatter(Y, user->suby, user->yi_scatter[i], 0, 0));
309       PetscCall(MatMult(user->JsBlock, user->subq, user->suby));
310       PetscCall(Gather(Y, user->suby, user->yi_scatter[i], 0, 0));
311     }
312   }
313   PetscFunctionReturn(PETSC_SUCCESS);
314 }
315 
316 PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y)
317 {
318   PetscInt its, i;
319   AppCtx  *user;
320 
321   PetscFunctionBegin;
322   PetscCall(MatShellGetContext(J_shell, &user));
323   PetscCall(KSPSetOperators(user->solver, user->JsBlock, user->DSG));
324   if (Y == user->ytrue) {
325     /* First solve is done using true solution to set up problem */
326     PetscCall(KSPSetTolerances(user->solver, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
327   } else {
328     PetscCall(KSPSetTolerances(user->solver, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
329   }
330   if (user->ns == 1) {
331     PetscCall(KSPSolve(user->solver, X, Y));
332     PetscCall(KSPGetIterationNumber(user->solver, &its));
333     user->ksp_its += its;
334   } else {
335     for (i = 0; i < user->ns; i++) {
336       PetscCall(Scatter(X, user->subq, user->yi_scatter[i], 0, 0));
337       PetscCall(Scatter(Y, user->suby, user->yi_scatter[i], 0, 0));
338       PetscCall(KSPSolve(user->solver, user->subq, user->suby));
339       PetscCall(KSPGetIterationNumber(user->solver, &its));
340       user->ksp_its += its;
341       PetscCall(Gather(Y, user->suby, user->yi_scatter[i], 0, 0));
342     }
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
347 {
348   AppCtx  *user;
349   PetscInt i;
350 
351   PetscFunctionBegin;
352   PetscCall(MatShellGetContext(J_shell, &user));
353   if (user->ns == 1) {
354     PetscCall(MatMult(user->Q, X, Y));
355   } else {
356     for (i = 0; i < user->ns; i++) {
357       PetscCall(Scatter(X, user->subq, user->yi_scatter[i], 0, 0));
358       PetscCall(Scatter(Y, user->subd, user->di_scatter[i], 0, 0));
359       PetscCall(MatMult(user->Q, user->subq, user->subd));
360       PetscCall(Gather(Y, user->subd, user->di_scatter[i], 0, 0));
361     }
362   }
363   PetscFunctionReturn(PETSC_SUCCESS);
364 }
365 
366 PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
367 {
368   AppCtx  *user;
369   PetscInt i;
370 
371   PetscFunctionBegin;
372   PetscCall(MatShellGetContext(J_shell, &user));
373   if (user->ns == 1) {
374     PetscCall(MatMultTranspose(user->Q, X, Y));
375   } else {
376     for (i = 0; i < user->ns; i++) {
377       PetscCall(Scatter(X, user->subd, user->di_scatter[i], 0, 0));
378       PetscCall(Scatter(Y, user->suby, user->yi_scatter[i], 0, 0));
379       PetscCall(MatMultTranspose(user->Q, user->subd, user->suby));
380       PetscCall(Gather(Y, user->suby, user->yi_scatter[i], 0, 0));
381     }
382   }
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
387 {
388   PetscInt i;
389   AppCtx  *user;
390 
391   PetscFunctionBegin;
392   PetscCall(MatShellGetContext(J_shell, &user));
393 
394   /* sdiag(1./v) */
395   PetscCall(VecSet(user->uwork, 0));
396   PetscCall(VecAXPY(user->uwork, -1.0, user->u));
397   PetscCall(VecExp(user->uwork));
398 
399   /* sdiag(1./((Av*(1./v)).^2)) */
400   PetscCall(MatMult(user->Av, user->uwork, user->Swork));
401   PetscCall(VecPointwiseMult(user->Swork, user->Swork, user->Swork));
402   PetscCall(VecReciprocal(user->Swork));
403 
404   /* (Av * (sdiag(1./v) * b)) */
405   PetscCall(VecPointwiseMult(user->uwork, user->uwork, X));
406   PetscCall(MatMult(user->Av, user->uwork, user->Twork));
407 
408   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
409   PetscCall(VecPointwiseMult(user->Swork, user->Twork, user->Swork));
410 
411   if (user->ns == 1) {
412     /* (sdiag(Grad*y(:,i)) */
413     PetscCall(MatMult(user->Grad, user->y, user->Twork));
414 
415     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
416     PetscCall(VecPointwiseMult(user->Swork, user->Twork, user->Swork));
417     PetscCall(MatMultTranspose(user->Grad, user->Swork, Y));
418   } else {
419     for (i = 0; i < user->ns; i++) {
420       PetscCall(Scatter(user->y, user->suby, user->yi_scatter[i], 0, 0));
421       PetscCall(Scatter(Y, user->subq, user->yi_scatter[i], 0, 0));
422 
423       PetscCall(MatMult(user->Grad, user->suby, user->Twork));
424       PetscCall(VecPointwiseMult(user->Twork, user->Twork, user->Swork));
425       PetscCall(MatMultTranspose(user->Grad, user->Twork, user->subq));
426       PetscCall(Gather(user->y, user->suby, user->yi_scatter[i], 0, 0));
427       PetscCall(Gather(Y, user->subq, user->yi_scatter[i], 0, 0));
428     }
429   }
430   PetscFunctionReturn(PETSC_SUCCESS);
431 }
432 
433 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
434 {
435   PetscInt i;
436   AppCtx  *user;
437 
438   PetscFunctionBegin;
439   PetscCall(MatShellGetContext(J_shell, &user));
440   PetscCall(VecZeroEntries(Y));
441 
442   /* Sdiag = 1./((Av*(1./v)).^2) */
443   PetscCall(VecSet(user->uwork, 0));
444   PetscCall(VecAXPY(user->uwork, -1.0, user->u));
445   PetscCall(VecExp(user->uwork));
446   PetscCall(MatMult(user->Av, user->uwork, user->Swork));
447   PetscCall(VecPointwiseMult(user->Sdiag, user->Swork, user->Swork));
448   PetscCall(VecReciprocal(user->Sdiag));
449 
450   for (i = 0; i < user->ns; i++) {
451     PetscCall(Scatter(X, user->subq, user->yi_scatter[i], 0, 0));
452     PetscCall(Scatter(user->y, user->suby, user->yi_scatter[i], 0, 0));
453 
454     /* Swork = (Div' * b(:,i)) */
455     PetscCall(MatMult(user->Grad, user->subq, user->Swork));
456 
457     /* Twork = Grad*y(:,i) */
458     PetscCall(MatMult(user->Grad, user->suby, user->Twork));
459 
460     /* Twork = sdiag(Twork) * Swork */
461     PetscCall(VecPointwiseMult(user->Twork, user->Swork, user->Twork));
462 
463     /* Swork = pointwisemult(Sdiag,Twork) */
464     PetscCall(VecPointwiseMult(user->Swork, user->Twork, user->Sdiag));
465 
466     /* Ywork = Av' * Swork */
467     PetscCall(MatMultTranspose(user->Av, user->Swork, user->Ywork));
468 
469     /* Ywork = pointwisemult(uwork,Ywork) */
470     PetscCall(VecPointwiseMult(user->Ywork, user->uwork, user->Ywork));
471     PetscCall(VecAXPY(Y, 1.0, user->Ywork));
472     PetscCall(Gather(user->y, user->suby, user->yi_scatter[i], 0, 0));
473   }
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476 
477 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
478 {
479   /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
480   PetscReal sum;
481   PetscInt  i;
482   AppCtx   *user = (AppCtx *)ptr;
483 
484   PetscFunctionBegin;
485   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
486   if (user->ns == 1) {
487     PetscCall(MatMult(user->Grad, user->y, user->Swork));
488     PetscCall(VecPointwiseDivide(user->Swork, user->Swork, user->Av_u));
489     PetscCall(MatMultTranspose(user->Grad, user->Swork, C));
490     PetscCall(VecSum(user->y, &sum));
491     sum /= user->ndesign;
492     PetscCall(VecShift(C, sum));
493   } else {
494     for (i = 0; i < user->ns; i++) {
495       PetscCall(Scatter(user->y, user->suby, user->yi_scatter[i], 0, 0));
496       PetscCall(Scatter(C, user->subq, user->yi_scatter[i], 0, 0));
497       PetscCall(MatMult(user->Grad, user->suby, user->Swork));
498       PetscCall(VecPointwiseDivide(user->Swork, user->Swork, user->Av_u));
499       PetscCall(MatMultTranspose(user->Grad, user->Swork, user->subq));
500 
501       PetscCall(VecSum(user->suby, &sum));
502       sum /= user->ndesign;
503       PetscCall(VecShift(user->subq, sum));
504 
505       PetscCall(Gather(user->y, user->suby, user->yi_scatter[i], 0, 0));
506       PetscCall(Gather(C, user->subq, user->yi_scatter[i], 0, 0));
507     }
508   }
509   PetscCall(VecAXPY(C, -1.0, user->q));
510   PetscFunctionReturn(PETSC_SUCCESS);
511 }
512 
513 PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
514 {
515   PetscFunctionBegin;
516   PetscCall(VecScatterBegin(scat1, x, sub1, INSERT_VALUES, SCATTER_FORWARD));
517   PetscCall(VecScatterEnd(scat1, x, sub1, INSERT_VALUES, SCATTER_FORWARD));
518   if (sub2) {
519     PetscCall(VecScatterBegin(scat2, x, sub2, INSERT_VALUES, SCATTER_FORWARD));
520     PetscCall(VecScatterEnd(scat2, x, sub2, INSERT_VALUES, SCATTER_FORWARD));
521   }
522   PetscFunctionReturn(PETSC_SUCCESS);
523 }
524 
525 PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
526 {
527   PetscFunctionBegin;
528   PetscCall(VecScatterBegin(scat1, sub1, x, INSERT_VALUES, SCATTER_REVERSE));
529   PetscCall(VecScatterEnd(scat1, sub1, x, INSERT_VALUES, SCATTER_REVERSE));
530   if (sub2) {
531     PetscCall(VecScatterBegin(scat2, sub2, x, INSERT_VALUES, SCATTER_REVERSE));
532     PetscCall(VecScatterEnd(scat2, sub2, x, INSERT_VALUES, SCATTER_REVERSE));
533   }
534   PetscFunctionReturn(PETSC_SUCCESS);
535 }
536 
537 PetscErrorCode EllipticInitialize(AppCtx *user)
538 {
539   PetscInt        m, n, i, j, k, l, linear_index, is, js, ks, ls, istart, iend, iblock;
540   Vec             XX, YY, ZZ, XXwork, YYwork, ZZwork, UTwork;
541   PetscReal      *x, *y, *z;
542   PetscReal       h, meanut;
543   PetscScalar     hinv, neg_hinv, half = 0.5, sqrt_beta;
544   PetscInt        im, indx1, indx2, indy1, indy2, indz1, indz2, nx, ny, nz;
545   IS              is_alldesign, is_allstate;
546   IS              is_from_d;
547   IS              is_from_y;
548   PetscInt        lo, hi, hi2, lo2, ysubnlocal, dsubnlocal;
549   const PetscInt *ranges, *subranges;
550   PetscMPIInt     size;
551   PetscReal       xri, yri, zri, xim, yim, zim, dx1, dx2, dy1, dy2, dz1, dz2, Dx, Dy, Dz;
552   PetscScalar     v, vx, vy, vz;
553   PetscInt        offset, subindex, subvec, nrank, kk;
554 
555   PetscScalar xr[64] = {0.4970, 0.8498, 0.7814, 0.6268, 0.7782, 0.6402, 0.3617, 0.3160, 0.3610, 0.5298, 0.6987, 0.3331, 0.7962, 0.5596, 0.3866, 0.6774, 0.5407, 0.4518, 0.6702, 0.6061, 0.7580, 0.8997,
556                         0.5198, 0.8326, 0.2138, 0.9198, 0.3000, 0.2833, 0.8288, 0.7076, 0.1820, 0.0728, 0.8447, 0.2367, 0.3239, 0.6413, 0.3114, 0.4731, 0.1192, 0.9273, 0.5724, 0.4331, 0.5136, 0.3547,
557                         0.4413, 0.2602, 0.5698, 0.7278, 0.5261, 0.6230, 0.2454, 0.3948, 0.7479, 0.6582, 0.4660, 0.5594, 0.7574, 0.1143, 0.5900, 0.1065, 0.4260, 0.3294, 0.8276, 0.0756};
558 
559   PetscScalar yr[64] = {0.7345, 0.9120, 0.9288, 0.7528, 0.4463, 0.4985, 0.2497, 0.6256, 0.3425, 0.9026, 0.6983, 0.4230, 0.7140, 0.2970, 0.4474, 0.8792, 0.6604, 0.2485, 0.7968, 0.6127, 0.1796, 0.2437,
560                         0.5938, 0.6137, 0.3867, 0.5658, 0.4575, 0.1009, 0.0863, 0.3361, 0.0738, 0.3985, 0.6602, 0.1437, 0.0934, 0.5983, 0.5950, 0.0763, 0.0768, 0.2288, 0.5761, 0.1129, 0.3841, 0.6150,
561                         0.6904, 0.6686, 0.1361, 0.4601, 0.4491, 0.3716, 0.1969, 0.6537, 0.6743, 0.6991, 0.4811, 0.5480, 0.1684, 0.4569, 0.6889, 0.8437, 0.3015, 0.2854, 0.8199, 0.2658};
562 
563   PetscScalar zr[64] = {0.7668, 0.8573, 0.2654, 0.2719, 0.1060, 0.1311, 0.6232, 0.2295, 0.8009, 0.2147, 0.2119, 0.9325, 0.4473, 0.3600, 0.3374, 0.3819, 0.4066, 0.5801, 0.1673, 0.0959, 0.4638, 0.8236,
564                         0.8800, 0.2939, 0.2028, 0.8262, 0.2706, 0.6276, 0.9085, 0.6443, 0.8241, 0.0712, 0.1824, 0.7789, 0.4389, 0.8415, 0.7055, 0.6639, 0.3653, 0.2078, 0.1987, 0.2297, 0.4321, 0.8115,
565                         0.4915, 0.7764, 0.4657, 0.4627, 0.4569, 0.4232, 0.8514, 0.0674, 0.3227, 0.1055, 0.6690, 0.6313, 0.9226, 0.5461, 0.4126, 0.2364, 0.6096, 0.7042, 0.3914, 0.0711};
566 
567   PetscFunctionBegin;
568   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
569   PetscCall(PetscLogStageRegister("Elliptic Setup", &user->stages[0]));
570   PetscCall(PetscLogStagePush(user->stages[0]));
571 
572   /* Create u,y,c,x */
573   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->u));
574   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->y));
575   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->c));
576   PetscCall(VecSetSizes(user->u, PETSC_DECIDE, user->ndesign));
577   PetscCall(VecSetFromOptions(user->u));
578   PetscCall(VecGetLocalSize(user->u, &ysubnlocal));
579   PetscCall(VecSetSizes(user->y, ysubnlocal * user->ns, user->nstate));
580   PetscCall(VecSetSizes(user->c, ysubnlocal * user->ns, user->m));
581   PetscCall(VecSetFromOptions(user->y));
582   PetscCall(VecSetFromOptions(user->c));
583 
584   /*
585      *******************************
586      Create scatters for x <-> y,u
587      *******************************
588 
589      If the state vector y and design vector u are partitioned as
590      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
591      then the solution vector x is organized as
592      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
593      The index sets user->s_is and user->d_is correspond to the indices of the
594      state and design variables owned by the current processor.
595   */
596   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->x));
597 
598   PetscCall(VecGetOwnershipRange(user->y, &lo, &hi));
599   PetscCall(VecGetOwnershipRange(user->u, &lo2, &hi2));
600 
601   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_allstate));
602   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + lo2, 1, &user->s_is));
603   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, lo2, 1, &is_alldesign));
604   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, hi + lo2, 1, &user->d_is));
605 
606   PetscCall(VecSetSizes(user->x, hi - lo + hi2 - lo2, user->n));
607   PetscCall(VecSetFromOptions(user->x));
608 
609   PetscCall(VecScatterCreate(user->x, user->s_is, user->y, is_allstate, &user->state_scatter));
610   PetscCall(VecScatterCreate(user->x, user->d_is, user->u, is_alldesign, &user->design_scatter));
611   PetscCall(ISDestroy(&is_alldesign));
612   PetscCall(ISDestroy(&is_allstate));
613   /*
614      *******************************
615      Create scatter from y to y_1,y_2,...,y_ns
616      *******************************
617   */
618   PetscCall(PetscMalloc1(user->ns, &user->yi_scatter));
619   PetscCall(VecDuplicate(user->u, &user->suby));
620   PetscCall(VecDuplicate(user->u, &user->subq));
621 
622   PetscCall(VecGetOwnershipRange(user->y, &lo2, &hi2));
623   istart = 0;
624   for (i = 0; i < user->ns; i++) {
625     PetscCall(VecGetOwnershipRange(user->suby, &lo, &hi));
626     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo2 + istart, 1, &is_from_y));
627     PetscCall(VecScatterCreate(user->y, is_from_y, user->suby, NULL, &user->yi_scatter[i]));
628     istart = istart + hi - lo;
629     PetscCall(ISDestroy(&is_from_y));
630   }
631   /*
632      *******************************
633      Create scatter from d to d_1,d_2,...,d_ns
634      *******************************
635   */
636   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->subd));
637   PetscCall(VecSetSizes(user->subd, PETSC_DECIDE, user->ndata));
638   PetscCall(VecSetFromOptions(user->subd));
639   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->d));
640   PetscCall(VecGetLocalSize(user->subd, &dsubnlocal));
641   PetscCall(VecSetSizes(user->d, dsubnlocal * user->ns, user->ndata * user->ns));
642   PetscCall(VecSetFromOptions(user->d));
643   PetscCall(PetscMalloc1(user->ns, &user->di_scatter));
644 
645   PetscCall(VecGetOwnershipRange(user->d, &lo2, &hi2));
646   istart = 0;
647   for (i = 0; i < user->ns; i++) {
648     PetscCall(VecGetOwnershipRange(user->subd, &lo, &hi));
649     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo2 + istart, 1, &is_from_d));
650     PetscCall(VecScatterCreate(user->d, is_from_d, user->subd, NULL, &user->di_scatter[i]));
651     istart = istart + hi - lo;
652     PetscCall(ISDestroy(&is_from_d));
653   }
654 
655   PetscCall(PetscMalloc1(user->mx, &x));
656   PetscCall(PetscMalloc1(user->mx, &y));
657   PetscCall(PetscMalloc1(user->mx, &z));
658 
659   user->ksp_its         = 0;
660   user->ksp_its_initial = 0;
661 
662   n         = user->mx * user->mx * user->mx;
663   m         = 3 * user->mx * user->mx * (user->mx - 1);
664   sqrt_beta = PetscSqrtScalar(user->beta);
665 
666   PetscCall(VecCreate(PETSC_COMM_WORLD, &XX));
667   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->q));
668   PetscCall(VecSetSizes(XX, ysubnlocal, n));
669   PetscCall(VecSetSizes(user->q, ysubnlocal * user->ns, user->m));
670   PetscCall(VecSetFromOptions(XX));
671   PetscCall(VecSetFromOptions(user->q));
672 
673   PetscCall(VecDuplicate(XX, &YY));
674   PetscCall(VecDuplicate(XX, &ZZ));
675   PetscCall(VecDuplicate(XX, &XXwork));
676   PetscCall(VecDuplicate(XX, &YYwork));
677   PetscCall(VecDuplicate(XX, &ZZwork));
678   PetscCall(VecDuplicate(XX, &UTwork));
679   PetscCall(VecDuplicate(XX, &user->utrue));
680 
681   /* map for striding q */
682   PetscCall(VecGetOwnershipRanges(user->q, &ranges));
683   PetscCall(VecGetOwnershipRanges(user->u, &subranges));
684 
685   PetscCall(VecGetOwnershipRange(user->q, &lo2, &hi2));
686   PetscCall(VecGetOwnershipRange(user->u, &lo, &hi));
687   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
688   h        = 1.0 / user->mx;
689   hinv     = user->mx;
690   neg_hinv = -hinv;
691 
692   PetscCall(VecGetOwnershipRange(XX, &istart, &iend));
693   for (linear_index = istart; linear_index < iend; linear_index++) {
694     i  = linear_index % user->mx;
695     j  = ((linear_index - i) / user->mx) % user->mx;
696     k  = ((linear_index - i) / user->mx - j) / user->mx;
697     vx = h * (i + 0.5);
698     vy = h * (j + 0.5);
699     vz = h * (k + 0.5);
700     PetscCall(VecSetValues(XX, 1, &linear_index, &vx, INSERT_VALUES));
701     PetscCall(VecSetValues(YY, 1, &linear_index, &vy, INSERT_VALUES));
702     PetscCall(VecSetValues(ZZ, 1, &linear_index, &vz, INSERT_VALUES));
703     for (is = 0; is < 2; is++) {
704       for (js = 0; js < 2; js++) {
705         for (ks = 0; ks < 2; ks++) {
706           ls = is * 4 + js * 2 + ks;
707           if (ls < user->ns) {
708             l = ls * n + linear_index;
709             /* remap */
710             subindex = l % n;
711             subvec   = l / n;
712             nrank    = 0;
713             while (subindex >= subranges[nrank + 1]) nrank++;
714             offset = subindex - subranges[nrank];
715             istart = 0;
716             for (kk = 0; kk < nrank; kk++) istart += user->ns * (subranges[kk + 1] - subranges[kk]);
717             istart += (subranges[nrank + 1] - subranges[nrank]) * subvec;
718             l = istart + offset;
719             v = 100 * PetscSinScalar(2 * PETSC_PI * (vx + 0.25 * is)) * PetscSinScalar(2 * PETSC_PI * (vy + 0.25 * js)) * PetscSinScalar(2 * PETSC_PI * (vz + 0.25 * ks));
720             PetscCall(VecSetValues(user->q, 1, &l, &v, INSERT_VALUES));
721           }
722         }
723       }
724     }
725   }
726 
727   PetscCall(VecAssemblyBegin(XX));
728   PetscCall(VecAssemblyEnd(XX));
729   PetscCall(VecAssemblyBegin(YY));
730   PetscCall(VecAssemblyEnd(YY));
731   PetscCall(VecAssemblyBegin(ZZ));
732   PetscCall(VecAssemblyEnd(ZZ));
733   PetscCall(VecAssemblyBegin(user->q));
734   PetscCall(VecAssemblyEnd(user->q));
735 
736   /* Compute true parameter function
737      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
738   PetscCall(VecCopy(XX, XXwork));
739   PetscCall(VecCopy(YY, YYwork));
740   PetscCall(VecCopy(ZZ, ZZwork));
741 
742   PetscCall(VecShift(XXwork, -0.25));
743   PetscCall(VecShift(YYwork, -0.25));
744   PetscCall(VecShift(ZZwork, -0.25));
745 
746   PetscCall(VecPointwiseMult(XXwork, XXwork, XXwork));
747   PetscCall(VecPointwiseMult(YYwork, YYwork, YYwork));
748   PetscCall(VecPointwiseMult(ZZwork, ZZwork, ZZwork));
749 
750   PetscCall(VecCopy(XXwork, UTwork));
751   PetscCall(VecAXPY(UTwork, 1.0, YYwork));
752   PetscCall(VecAXPY(UTwork, 1.0, ZZwork));
753   PetscCall(VecScale(UTwork, -20.0));
754   PetscCall(VecExp(UTwork));
755   PetscCall(VecCopy(UTwork, user->utrue));
756 
757   PetscCall(VecCopy(XX, XXwork));
758   PetscCall(VecCopy(YY, YYwork));
759   PetscCall(VecCopy(ZZ, ZZwork));
760 
761   PetscCall(VecShift(XXwork, -0.75));
762   PetscCall(VecShift(YYwork, -0.75));
763   PetscCall(VecShift(ZZwork, -0.75));
764 
765   PetscCall(VecPointwiseMult(XXwork, XXwork, XXwork));
766   PetscCall(VecPointwiseMult(YYwork, YYwork, YYwork));
767   PetscCall(VecPointwiseMult(ZZwork, ZZwork, ZZwork));
768 
769   PetscCall(VecCopy(XXwork, UTwork));
770   PetscCall(VecAXPY(UTwork, 1.0, YYwork));
771   PetscCall(VecAXPY(UTwork, 1.0, ZZwork));
772   PetscCall(VecScale(UTwork, -20.0));
773   PetscCall(VecExp(UTwork));
774 
775   PetscCall(VecAXPY(user->utrue, -1.0, UTwork));
776 
777   PetscCall(VecDestroy(&XX));
778   PetscCall(VecDestroy(&YY));
779   PetscCall(VecDestroy(&ZZ));
780   PetscCall(VecDestroy(&XXwork));
781   PetscCall(VecDestroy(&YYwork));
782   PetscCall(VecDestroy(&ZZwork));
783   PetscCall(VecDestroy(&UTwork));
784 
785   /* Initial guess and reference model */
786   PetscCall(VecDuplicate(user->utrue, &user->ur));
787   PetscCall(VecSum(user->utrue, &meanut));
788   meanut = meanut / n;
789   PetscCall(VecSet(user->ur, meanut));
790   PetscCall(VecCopy(user->ur, user->u));
791 
792   /* Generate Grad matrix */
793   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Grad));
794   PetscCall(MatSetSizes(user->Grad, PETSC_DECIDE, ysubnlocal, m, n));
795   PetscCall(MatSetFromOptions(user->Grad));
796   PetscCall(MatMPIAIJSetPreallocation(user->Grad, 2, NULL, 2, NULL));
797   PetscCall(MatSeqAIJSetPreallocation(user->Grad, 2, NULL));
798   PetscCall(MatGetOwnershipRange(user->Grad, &istart, &iend));
799 
800   for (i = istart; i < iend; i++) {
801     if (i < m / 3) {
802       iblock = i / (user->mx - 1);
803       j      = iblock * user->mx + (i % (user->mx - 1));
804       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
805       j = j + 1;
806       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES));
807     }
808     if (i >= m / 3 && i < 2 * m / 3) {
809       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
810       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
811       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
812       j = j + user->mx;
813       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES));
814     }
815     if (i >= 2 * m / 3) {
816       j = i - 2 * m / 3;
817       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
818       j = j + user->mx * user->mx;
819       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES));
820     }
821   }
822 
823   PetscCall(MatAssemblyBegin(user->Grad, MAT_FINAL_ASSEMBLY));
824   PetscCall(MatAssemblyEnd(user->Grad, MAT_FINAL_ASSEMBLY));
825 
826   /* Generate arithmetic averaging matrix Av */
827   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Av));
828   PetscCall(MatSetSizes(user->Av, PETSC_DECIDE, ysubnlocal, m, n));
829   PetscCall(MatSetFromOptions(user->Av));
830   PetscCall(MatMPIAIJSetPreallocation(user->Av, 2, NULL, 2, NULL));
831   PetscCall(MatSeqAIJSetPreallocation(user->Av, 2, NULL));
832   PetscCall(MatGetOwnershipRange(user->Av, &istart, &iend));
833 
834   for (i = istart; i < iend; i++) {
835     if (i < m / 3) {
836       iblock = i / (user->mx - 1);
837       j      = iblock * user->mx + (i % (user->mx - 1));
838       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
839       j = j + 1;
840       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
841     }
842     if (i >= m / 3 && i < 2 * m / 3) {
843       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
844       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
845       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
846       j = j + user->mx;
847       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
848     }
849     if (i >= 2 * m / 3) {
850       j = i - 2 * m / 3;
851       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
852       j = j + user->mx * user->mx;
853       PetscCall(MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES));
854     }
855   }
856 
857   PetscCall(MatAssemblyBegin(user->Av, MAT_FINAL_ASSEMBLY));
858   PetscCall(MatAssemblyEnd(user->Av, MAT_FINAL_ASSEMBLY));
859 
860   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->L));
861   PetscCall(MatSetSizes(user->L, PETSC_DECIDE, ysubnlocal, m + n, n));
862   PetscCall(MatSetFromOptions(user->L));
863   PetscCall(MatMPIAIJSetPreallocation(user->L, 2, NULL, 2, NULL));
864   PetscCall(MatSeqAIJSetPreallocation(user->L, 2, NULL));
865   PetscCall(MatGetOwnershipRange(user->L, &istart, &iend));
866 
867   for (i = istart; i < iend; i++) {
868     if (i < m / 3) {
869       iblock = i / (user->mx - 1);
870       j      = iblock * user->mx + (i % (user->mx - 1));
871       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
872       j = j + 1;
873       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES));
874     }
875     if (i >= m / 3 && i < 2 * m / 3) {
876       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
877       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
878       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
879       j = j + user->mx;
880       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES));
881     }
882     if (i >= 2 * m / 3 && i < m) {
883       j = i - 2 * m / 3;
884       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES));
885       j = j + user->mx * user->mx;
886       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES));
887     }
888     if (i >= m) {
889       j = i - m;
890       PetscCall(MatSetValues(user->L, 1, &i, 1, &j, &sqrt_beta, INSERT_VALUES));
891     }
892   }
893   PetscCall(MatAssemblyBegin(user->L, MAT_FINAL_ASSEMBLY));
894   PetscCall(MatAssemblyEnd(user->L, MAT_FINAL_ASSEMBLY));
895   PetscCall(MatScale(user->L, PetscPowScalar(h, 1.5)));
896 
897   /* Generate Div matrix */
898   if (!user->use_ptap) {
899     /* Generate Div matrix */
900     PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Div));
901     PetscCall(MatSetSizes(user->Div, ysubnlocal, PETSC_DECIDE, n, m));
902     PetscCall(MatSetFromOptions(user->Div));
903     PetscCall(MatMPIAIJSetPreallocation(user->Div, 4, NULL, 4, NULL));
904     PetscCall(MatSeqAIJSetPreallocation(user->Div, 6, NULL));
905     PetscCall(MatGetOwnershipRange(user->Grad, &istart, &iend));
906 
907     for (i = istart; i < iend; i++) {
908       if (i < m / 3) {
909         iblock = i / (user->mx - 1);
910         j      = iblock * user->mx + (i % (user->mx - 1));
911         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &neg_hinv, INSERT_VALUES));
912         j = j + 1;
913         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &hinv, INSERT_VALUES));
914       }
915       if (i >= m / 3 && i < 2 * m / 3) {
916         iblock = (i - m / 3) / (user->mx * (user->mx - 1));
917         j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
918         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &neg_hinv, INSERT_VALUES));
919         j = j + user->mx;
920         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &hinv, INSERT_VALUES));
921       }
922       if (i >= 2 * m / 3) {
923         j = i - 2 * m / 3;
924         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &neg_hinv, INSERT_VALUES));
925         j = j + user->mx * user->mx;
926         PetscCall(MatSetValues(user->Div, 1, &j, 1, &i, &hinv, INSERT_VALUES));
927       }
928     }
929 
930     PetscCall(MatAssemblyBegin(user->Div, MAT_FINAL_ASSEMBLY));
931     PetscCall(MatAssemblyEnd(user->Div, MAT_FINAL_ASSEMBLY));
932     PetscCall(MatDuplicate(user->Div, MAT_SHARE_NONZERO_PATTERN, &user->Divwork));
933   } else {
934     PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Diag));
935     PetscCall(MatSetSizes(user->Diag, PETSC_DECIDE, PETSC_DECIDE, m, m));
936     PetscCall(MatSetFromOptions(user->Diag));
937     PetscCall(MatMPIAIJSetPreallocation(user->Diag, 1, NULL, 0, NULL));
938     PetscCall(MatSeqAIJSetPreallocation(user->Diag, 1, NULL));
939   }
940 
941   /* Build work vectors and matrices */
942   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->S));
943   PetscCall(VecSetSizes(user->S, PETSC_DECIDE, m));
944   PetscCall(VecSetFromOptions(user->S));
945 
946   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->lwork));
947   PetscCall(VecSetSizes(user->lwork, PETSC_DECIDE, m + user->mx * user->mx * user->mx));
948   PetscCall(VecSetFromOptions(user->lwork));
949 
950   PetscCall(MatDuplicate(user->Av, MAT_SHARE_NONZERO_PATTERN, &user->Avwork));
951 
952   PetscCall(VecDuplicate(user->S, &user->Swork));
953   PetscCall(VecDuplicate(user->S, &user->Sdiag));
954   PetscCall(VecDuplicate(user->S, &user->Av_u));
955   PetscCall(VecDuplicate(user->S, &user->Twork));
956   PetscCall(VecDuplicate(user->y, &user->ywork));
957   PetscCall(VecDuplicate(user->u, &user->Ywork));
958   PetscCall(VecDuplicate(user->u, &user->uwork));
959   PetscCall(VecDuplicate(user->u, &user->js_diag));
960   PetscCall(VecDuplicate(user->c, &user->cwork));
961   PetscCall(VecDuplicate(user->d, &user->dwork));
962 
963   /* Create a matrix-free shell user->Jd for computing B*x */
964   PetscCall(MatCreateShell(PETSC_COMM_WORLD, ysubnlocal * user->ns, ysubnlocal, user->nstate, user->ndesign, user, &user->Jd));
965   PetscCall(MatShellSetOperation(user->Jd, MATOP_MULT, (void (*)(void))DesignMatMult));
966   PetscCall(MatShellSetOperation(user->Jd, MATOP_MULT_TRANSPOSE, (void (*)(void))DesignMatMultTranspose));
967 
968   /* Compute true state function ytrue given utrue */
969   PetscCall(VecDuplicate(user->y, &user->ytrue));
970 
971   /* First compute Av_u = Av*exp(-u) */
972   PetscCall(VecSet(user->uwork, 0));
973   PetscCall(VecAXPY(user->uwork, -1.0, user->utrue)); /* Note: user->utrue */
974   PetscCall(VecExp(user->uwork));
975   PetscCall(MatMult(user->Av, user->uwork, user->Av_u));
976 
977   /* Next form DSG = Div*S*Grad */
978   PetscCall(VecCopy(user->Av_u, user->Swork));
979   PetscCall(VecReciprocal(user->Swork));
980   if (user->use_ptap) {
981     PetscCall(MatDiagonalSet(user->Diag, user->Swork, INSERT_VALUES));
982     PetscCall(MatPtAP(user->Diag, user->Grad, MAT_INITIAL_MATRIX, 1.0, &user->DSG));
983   } else {
984     PetscCall(MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN));
985     PetscCall(MatDiagonalScale(user->Divwork, NULL, user->Swork));
986 
987     PetscCall(MatMatMult(user->Divwork, user->Grad, MAT_INITIAL_MATRIX, 1.0, &user->DSG));
988   }
989 
990   PetscCall(MatSetOption(user->DSG, MAT_SYMMETRIC, PETSC_TRUE));
991   PetscCall(MatSetOption(user->DSG, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
992 
993   if (user->use_lrc == PETSC_TRUE) {
994     v = PetscSqrtReal(1.0 / user->ndesign);
995     PetscCall(PetscMalloc1(user->ndesign, &user->ones));
996 
997     for (i = 0; i < user->ndesign; i++) user->ones[i] = v;
998     PetscCall(MatCreateDense(PETSC_COMM_WORLD, ysubnlocal, PETSC_DECIDE, user->ndesign, 1, user->ones, &user->Ones));
999     PetscCall(MatCreateLRC(user->DSG, user->Ones, NULL, user->Ones, &user->JsBlock));
1000     PetscCall(MatSetUp(user->JsBlock));
1001   } else {
1002     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
1003     PetscCall(MatCreateShell(PETSC_COMM_WORLD, ysubnlocal, ysubnlocal, user->ndesign, user->ndesign, user, &user->JsBlock));
1004     PetscCall(MatShellSetOperation(user->JsBlock, MATOP_MULT, (void (*)(void))StateBlockMatMult));
1005     PetscCall(MatShellSetOperation(user->JsBlock, MATOP_MULT_TRANSPOSE, (void (*)(void))StateBlockMatMult));
1006   }
1007   PetscCall(MatSetOption(user->JsBlock, MAT_SYMMETRIC, PETSC_TRUE));
1008   PetscCall(MatSetOption(user->JsBlock, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
1009   PetscCall(MatCreateShell(PETSC_COMM_WORLD, ysubnlocal * user->ns, ysubnlocal * user->ns, user->nstate, user->nstate, user, &user->Js));
1010   PetscCall(MatShellSetOperation(user->Js, MATOP_MULT, (void (*)(void))StateMatMult));
1011   PetscCall(MatShellSetOperation(user->Js, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatMult));
1012   PetscCall(MatSetOption(user->Js, MAT_SYMMETRIC, PETSC_TRUE));
1013   PetscCall(MatSetOption(user->Js, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
1014 
1015   PetscCall(MatCreateShell(PETSC_COMM_WORLD, ysubnlocal * user->ns, ysubnlocal * user->ns, user->nstate, user->nstate, user, &user->JsInv));
1016   PetscCall(MatShellSetOperation(user->JsInv, MATOP_MULT, (void (*)(void))StateInvMatMult));
1017   PetscCall(MatShellSetOperation(user->JsInv, MATOP_MULT_TRANSPOSE, (void (*)(void))StateInvMatMult));
1018   PetscCall(MatSetOption(user->JsInv, MAT_SYMMETRIC, PETSC_TRUE));
1019   PetscCall(MatSetOption(user->JsInv, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
1020 
1021   PetscCall(MatSetOption(user->DSG, MAT_SYMMETRIC, PETSC_TRUE));
1022   PetscCall(MatSetOption(user->DSG, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
1023   /* Now solve for ytrue */
1024   PetscCall(KSPCreate(PETSC_COMM_WORLD, &user->solver));
1025   PetscCall(KSPSetFromOptions(user->solver));
1026 
1027   PetscCall(KSPSetOperators(user->solver, user->JsBlock, user->DSG));
1028 
1029   PetscCall(MatMult(user->JsInv, user->q, user->ytrue));
1030   /* First compute Av_u = Av*exp(-u) */
1031   PetscCall(VecSet(user->uwork, 0));
1032   PetscCall(VecAXPY(user->uwork, -1.0, user->u)); /* Note: user->u */
1033   PetscCall(VecExp(user->uwork));
1034   PetscCall(MatMult(user->Av, user->uwork, user->Av_u));
1035 
1036   /* Next update DSG = Div*S*Grad  with user->u */
1037   PetscCall(VecCopy(user->Av_u, user->Swork));
1038   PetscCall(VecReciprocal(user->Swork));
1039   if (user->use_ptap) {
1040     PetscCall(MatDiagonalSet(user->Diag, user->Swork, INSERT_VALUES));
1041     PetscCall(MatPtAP(user->Diag, user->Grad, MAT_REUSE_MATRIX, 1.0, &user->DSG));
1042   } else {
1043     PetscCall(MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN));
1044     PetscCall(MatDiagonalScale(user->Divwork, NULL, user->Av_u));
1045     PetscCall(MatProductNumeric(user->DSG));
1046   }
1047 
1048   /* Now solve for y */
1049 
1050   PetscCall(MatMult(user->JsInv, user->q, user->y));
1051 
1052   user->ksp_its_initial = user->ksp_its;
1053   user->ksp_its         = 0;
1054   /* Construct projection matrix Q (blocks) */
1055   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Q));
1056   PetscCall(MatSetSizes(user->Q, dsubnlocal, ysubnlocal, user->ndata, user->ndesign));
1057   PetscCall(MatSetFromOptions(user->Q));
1058   PetscCall(MatMPIAIJSetPreallocation(user->Q, 8, NULL, 8, NULL));
1059   PetscCall(MatSeqAIJSetPreallocation(user->Q, 8, NULL));
1060 
1061   for (i = 0; i < user->mx; i++) {
1062     x[i] = h * (i + 0.5);
1063     y[i] = h * (i + 0.5);
1064     z[i] = h * (i + 0.5);
1065   }
1066   PetscCall(MatGetOwnershipRange(user->Q, &istart, &iend));
1067 
1068   nx = user->mx;
1069   ny = user->mx;
1070   nz = user->mx;
1071   for (i = istart; i < iend; i++) {
1072     xri = xr[i];
1073     im  = 0;
1074     xim = x[im];
1075     while (xri > xim && im < nx) {
1076       im  = im + 1;
1077       xim = x[im];
1078     }
1079     indx1 = im - 1;
1080     indx2 = im;
1081     dx1   = xri - x[indx1];
1082     dx2   = x[indx2] - xri;
1083 
1084     yri = yr[i];
1085     im  = 0;
1086     yim = y[im];
1087     while (yri > yim && im < ny) {
1088       im  = im + 1;
1089       yim = y[im];
1090     }
1091     indy1 = im - 1;
1092     indy2 = im;
1093     dy1   = yri - y[indy1];
1094     dy2   = y[indy2] - yri;
1095 
1096     zri = zr[i];
1097     im  = 0;
1098     zim = z[im];
1099     while (zri > zim && im < nz) {
1100       im  = im + 1;
1101       zim = z[im];
1102     }
1103     indz1 = im - 1;
1104     indz2 = im;
1105     dz1   = zri - z[indz1];
1106     dz2   = z[indz2] - zri;
1107 
1108     Dx = x[indx2] - x[indx1];
1109     Dy = y[indy2] - y[indy1];
1110     Dz = z[indz2] - z[indz1];
1111 
1112     j = indx1 + indy1 * nx + indz1 * nx * ny;
1113     v = (1 - dx1 / Dx) * (1 - dy1 / Dy) * (1 - dz1 / Dz);
1114     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1115 
1116     j = indx1 + indy1 * nx + indz2 * nx * ny;
1117     v = (1 - dx1 / Dx) * (1 - dy1 / Dy) * (1 - dz2 / Dz);
1118     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1119 
1120     j = indx1 + indy2 * nx + indz1 * nx * ny;
1121     v = (1 - dx1 / Dx) * (1 - dy2 / Dy) * (1 - dz1 / Dz);
1122     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1123 
1124     j = indx1 + indy2 * nx + indz2 * nx * ny;
1125     v = (1 - dx1 / Dx) * (1 - dy2 / Dy) * (1 - dz2 / Dz);
1126     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1127 
1128     j = indx2 + indy1 * nx + indz1 * nx * ny;
1129     v = (1 - dx2 / Dx) * (1 - dy1 / Dy) * (1 - dz1 / Dz);
1130     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1131 
1132     j = indx2 + indy1 * nx + indz2 * nx * ny;
1133     v = (1 - dx2 / Dx) * (1 - dy1 / Dy) * (1 - dz2 / Dz);
1134     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1135 
1136     j = indx2 + indy2 * nx + indz1 * nx * ny;
1137     v = (1 - dx2 / Dx) * (1 - dy2 / Dy) * (1 - dz1 / Dz);
1138     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1139 
1140     j = indx2 + indy2 * nx + indz2 * nx * ny;
1141     v = (1 - dx2 / Dx) * (1 - dy2 / Dy) * (1 - dz2 / Dz);
1142     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES));
1143   }
1144 
1145   PetscCall(MatAssemblyBegin(user->Q, MAT_FINAL_ASSEMBLY));
1146   PetscCall(MatAssemblyEnd(user->Q, MAT_FINAL_ASSEMBLY));
1147   /* Create MQ (composed of blocks of Q */
1148   PetscCall(MatCreateShell(PETSC_COMM_WORLD, dsubnlocal * user->ns, PETSC_DECIDE, user->ndata * user->ns, user->nstate, user, &user->MQ));
1149   PetscCall(MatShellSetOperation(user->MQ, MATOP_MULT, (void (*)(void))QMatMult));
1150   PetscCall(MatShellSetOperation(user->MQ, MATOP_MULT_TRANSPOSE, (void (*)(void))QMatMultTranspose));
1151 
1152   /* Add noise to the measurement data */
1153   PetscCall(VecSet(user->ywork, 1.0));
1154   PetscCall(VecAYPX(user->ywork, user->noise, user->ytrue));
1155   PetscCall(MatMult(user->MQ, user->ywork, user->d));
1156 
1157   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1158   PetscCall(PetscFree(x));
1159   PetscCall(PetscFree(y));
1160   PetscCall(PetscFree(z));
1161   PetscCall(PetscLogStagePop());
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 PetscErrorCode EllipticDestroy(AppCtx *user)
1166 {
1167   PetscInt i;
1168 
1169   PetscFunctionBegin;
1170   PetscCall(MatDestroy(&user->DSG));
1171   PetscCall(KSPDestroy(&user->solver));
1172   PetscCall(MatDestroy(&user->Q));
1173   PetscCall(MatDestroy(&user->MQ));
1174   if (!user->use_ptap) {
1175     PetscCall(MatDestroy(&user->Div));
1176     PetscCall(MatDestroy(&user->Divwork));
1177   } else {
1178     PetscCall(MatDestroy(&user->Diag));
1179   }
1180   if (user->use_lrc) PetscCall(MatDestroy(&user->Ones));
1181 
1182   PetscCall(MatDestroy(&user->Grad));
1183   PetscCall(MatDestroy(&user->Av));
1184   PetscCall(MatDestroy(&user->Avwork));
1185   PetscCall(MatDestroy(&user->L));
1186   PetscCall(MatDestroy(&user->Js));
1187   PetscCall(MatDestroy(&user->Jd));
1188   PetscCall(MatDestroy(&user->JsBlock));
1189   PetscCall(MatDestroy(&user->JsInv));
1190 
1191   PetscCall(VecDestroy(&user->x));
1192   PetscCall(VecDestroy(&user->u));
1193   PetscCall(VecDestroy(&user->uwork));
1194   PetscCall(VecDestroy(&user->utrue));
1195   PetscCall(VecDestroy(&user->y));
1196   PetscCall(VecDestroy(&user->ywork));
1197   PetscCall(VecDestroy(&user->ytrue));
1198   PetscCall(VecDestroy(&user->c));
1199   PetscCall(VecDestroy(&user->cwork));
1200   PetscCall(VecDestroy(&user->ur));
1201   PetscCall(VecDestroy(&user->q));
1202   PetscCall(VecDestroy(&user->d));
1203   PetscCall(VecDestroy(&user->dwork));
1204   PetscCall(VecDestroy(&user->lwork));
1205   PetscCall(VecDestroy(&user->S));
1206   PetscCall(VecDestroy(&user->Swork));
1207   PetscCall(VecDestroy(&user->Sdiag));
1208   PetscCall(VecDestroy(&user->Ywork));
1209   PetscCall(VecDestroy(&user->Twork));
1210   PetscCall(VecDestroy(&user->Av_u));
1211   PetscCall(VecDestroy(&user->js_diag));
1212   PetscCall(ISDestroy(&user->s_is));
1213   PetscCall(ISDestroy(&user->d_is));
1214   PetscCall(VecDestroy(&user->suby));
1215   PetscCall(VecDestroy(&user->subd));
1216   PetscCall(VecDestroy(&user->subq));
1217   PetscCall(VecScatterDestroy(&user->state_scatter));
1218   PetscCall(VecScatterDestroy(&user->design_scatter));
1219   for (i = 0; i < user->ns; i++) {
1220     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1221     PetscCall(VecScatterDestroy(&user->di_scatter[i]));
1222   }
1223   PetscCall(PetscFree(user->yi_scatter));
1224   PetscCall(PetscFree(user->di_scatter));
1225   if (user->use_lrc) {
1226     PetscCall(PetscFree(user->ones));
1227     PetscCall(MatDestroy(&user->Ones));
1228   }
1229   PetscFunctionReturn(PETSC_SUCCESS);
1230 }
1231 
1232 PetscErrorCode EllipticMonitor(Tao tao, void *ptr)
1233 {
1234   Vec       X;
1235   PetscReal unorm, ynorm;
1236   AppCtx   *user = (AppCtx *)ptr;
1237 
1238   PetscFunctionBegin;
1239   PetscCall(TaoGetSolution(tao, &X));
1240   PetscCall(Scatter(X, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
1241   PetscCall(VecAXPY(user->ywork, -1.0, user->ytrue));
1242   PetscCall(VecAXPY(user->uwork, -1.0, user->utrue));
1243   PetscCall(VecNorm(user->uwork, NORM_2, &unorm));
1244   PetscCall(VecNorm(user->ywork, NORM_2, &ynorm));
1245   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n", (double)unorm, (double)ynorm));
1246   PetscFunctionReturn(PETSC_SUCCESS);
1247 }
1248 
1249 /*TEST
1250 
1251    build:
1252       requires: !complex
1253 
1254    test:
1255       args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11
1256       requires: !single
1257 
1258    test:
1259       suffix: 2
1260       args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3
1261       requires: !single
1262 
1263 TEST*/
1264