xref: /petsc/src/tao/pde_constrained/tutorials/hyperbolic.c (revision a16fd2c93c02146fccd68469496ac02ca99b9ebe)
1 #include <petsctao.h>
2 
3 typedef struct {
4   PetscInt    n;     /*  Number of variables */
5   PetscInt    m;     /*  Number of constraints */
6   PetscInt    mx;    /*  grid points in each direction */
7   PetscInt    nt;    /*  Number of time steps */
8   PetscInt    ndata; /*  Number of data points per sample */
9   IS          s_is;
10   IS          d_is;
11   VecScatter  state_scatter;
12   VecScatter  design_scatter;
13   VecScatter *uxi_scatter, *uyi_scatter, *ux_scatter, *uy_scatter, *ui_scatter;
14   VecScatter *yi_scatter;
15 
16   Mat       Js, Jd, JsBlockPrec, JsInv, JsBlock;
17   PetscBool jformed, c_formed;
18 
19   PetscReal alpha; /*  Regularization parameter */
20   PetscReal gamma;
21   PetscReal ht; /*  Time step */
22   PetscReal T;  /*  Final time */
23   Mat       Q, QT;
24   Mat       L, LT;
25   Mat       Div, Divwork, Divxy[2];
26   Mat       Grad, Gradxy[2];
27   Mat       M;
28   Mat      *C, *Cwork;
29   /* Mat Hs,Hd,Hsd; */
30   Vec       q;
31   Vec       ur; /*  reference */
32 
33   Vec d;
34   Vec dwork;
35 
36   Vec  y; /*  state variables */
37   Vec  ywork;
38   Vec  ytrue;
39   Vec *yi, *yiwork, *ziwork;
40   Vec *uxi, *uyi, *uxiwork, *uyiwork, *ui, *uiwork;
41 
42   Vec u; /*  design variables */
43   Vec uwork, vwork;
44   Vec utrue;
45 
46   Vec js_diag;
47 
48   Vec c; /*  constraint vector */
49   Vec cwork;
50 
51   Vec lwork;
52 
53   KSP      solver;
54   PC       prec;
55   PetscInt block_index;
56 
57   PetscInt ksp_its;
58   PetscInt ksp_its_initial;
59 } AppCtx;
60 
61 PetscErrorCode FormFunction(Tao, Vec, PetscReal *, void *);
62 PetscErrorCode FormGradient(Tao, Vec, Vec, void *);
63 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal *, Vec, void *);
64 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void *);
65 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat, void *);
66 PetscErrorCode FormConstraints(Tao, Vec, Vec, void *);
67 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void *);
68 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
69 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
70 PetscErrorCode HyperbolicInitialize(AppCtx *user);
71 PetscErrorCode HyperbolicDestroy(AppCtx *user);
72 PetscErrorCode HyperbolicMonitor(Tao, void *);
73 
74 PetscErrorCode StateMatMult(Mat, Vec, Vec);
75 PetscErrorCode StateMatBlockMult(Mat, Vec, Vec);
76 PetscErrorCode StateMatBlockMultTranspose(Mat, Vec, Vec);
77 PetscErrorCode StateMatMultTranspose(Mat, Vec, Vec);
78 PetscErrorCode StateMatGetDiagonal(Mat, Vec);
79 PetscErrorCode StateMatDuplicate(Mat, MatDuplicateOption, Mat *);
80 PetscErrorCode StateMatInvMult(Mat, Vec, Vec);
81 PetscErrorCode StateMatInvTransposeMult(Mat, Vec, Vec);
82 PetscErrorCode StateMatBlockPrecMult(PC, Vec, Vec);
83 
84 PetscErrorCode DesignMatMult(Mat, Vec, Vec);
85 PetscErrorCode DesignMatMultTranspose(Mat, Vec, Vec);
86 
87 PetscErrorCode Scatter_yi(Vec, Vec *, VecScatter *, PetscInt); /*  y to y1,y2,...,y_nt */
88 PetscErrorCode Gather_yi(Vec, Vec *, VecScatter *, PetscInt);
89 PetscErrorCode Scatter_uxi_uyi(Vec, Vec *, VecScatter *, Vec *, VecScatter *, PetscInt); /*  u to ux_1,uy_1,ux_2,uy_2,...,u */
90 PetscErrorCode Gather_uxi_uyi(Vec, Vec *, VecScatter *, Vec *, VecScatter *, PetscInt);
91 
92 static char help[] = "";
93 
94 int main(int argc, char **argv) {
95   Vec      x, x0;
96   Tao      tao;
97   AppCtx   user;
98   IS       is_allstate, is_alldesign;
99   PetscInt lo, hi, hi2, lo2, ksp_old;
100   PetscInt ntests = 1;
101   PetscInt i;
102 #if defined(PETSC_USE_LOG)
103   PetscLogStage stages[1];
104 #endif
105 
106   PetscFunctionBeginUser;
107   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
108   user.mx = 32;
109   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "hyperbolic example", NULL);
110   PetscCall(PetscOptionsInt("-mx", "Number of grid points in each direction", "", user.mx, &user.mx, NULL));
111   user.nt = 16;
112   PetscCall(PetscOptionsInt("-nt", "Number of time steps", "", user.nt, &user.nt, NULL));
113   user.ndata = 64;
114   PetscCall(PetscOptionsInt("-ndata", "Numbers of data points per sample", "", user.ndata, &user.ndata, NULL));
115   user.alpha = 10.0;
116   PetscCall(PetscOptionsReal("-alpha", "Regularization parameter", "", user.alpha, &user.alpha, NULL));
117   user.T = 1.0 / 32.0;
118   PetscCall(PetscOptionsReal("-Tfinal", "Final time", "", user.T, &user.T, NULL));
119   PetscCall(PetscOptionsInt("-ntests", "Number of times to repeat TaoSolve", "", ntests, &ntests, NULL));
120   PetscOptionsEnd();
121 
122   user.m     = user.mx * user.mx * user.nt;     /*  number of constraints */
123   user.n     = user.mx * user.mx * 3 * user.nt; /*  number of variables */
124   user.ht    = user.T / user.nt;                /*  Time step */
125   user.gamma = user.T * user.ht / (user.mx * user.mx);
126 
127   PetscCall(VecCreate(PETSC_COMM_WORLD, &user.u));
128   PetscCall(VecCreate(PETSC_COMM_WORLD, &user.y));
129   PetscCall(VecCreate(PETSC_COMM_WORLD, &user.c));
130   PetscCall(VecSetSizes(user.u, PETSC_DECIDE, user.n - user.m));
131   PetscCall(VecSetSizes(user.y, PETSC_DECIDE, user.m));
132   PetscCall(VecSetSizes(user.c, PETSC_DECIDE, user.m));
133   PetscCall(VecSetFromOptions(user.u));
134   PetscCall(VecSetFromOptions(user.y));
135   PetscCall(VecSetFromOptions(user.c));
136 
137   /* Create scatters for reduced spaces.
138      If the state vector y and design vector u are partitioned as
139      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
140      then the solution vector x is organized as
141      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
142      The index sets user.s_is and user.d_is correspond to the indices of the
143      state and design variables owned by the current processor.
144   */
145   PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
146 
147   PetscCall(VecGetOwnershipRange(user.y, &lo, &hi));
148   PetscCall(VecGetOwnershipRange(user.u, &lo2, &hi2));
149 
150   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_allstate));
151   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + lo2, 1, &user.s_is));
152 
153   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, lo2, 1, &is_alldesign));
154   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, hi + lo2, 1, &user.d_is));
155 
156   PetscCall(VecSetSizes(x, hi - lo + hi2 - lo2, user.n));
157   PetscCall(VecSetFromOptions(x));
158 
159   PetscCall(VecScatterCreate(x, user.s_is, user.y, is_allstate, &user.state_scatter));
160   PetscCall(VecScatterCreate(x, user.d_is, user.u, is_alldesign, &user.design_scatter));
161   PetscCall(ISDestroy(&is_alldesign));
162   PetscCall(ISDestroy(&is_allstate));
163 
164   /* Create TAO solver and set desired solution method */
165   PetscCall(TaoCreate(PETSC_COMM_WORLD, &tao));
166   PetscCall(TaoSetType(tao, TAOLCL));
167 
168   /* Set up initial vectors and matrices */
169   PetscCall(HyperbolicInitialize(&user));
170 
171   PetscCall(Gather(x, user.y, user.state_scatter, user.u, user.design_scatter));
172   PetscCall(VecDuplicate(x, &x0));
173   PetscCall(VecCopy(x, x0));
174 
175   /* Set solution vector with an initial guess */
176   PetscCall(TaoSetSolution(tao, x));
177   PetscCall(TaoSetObjective(tao, FormFunction, &user));
178   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
179   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
180   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, user.Js, user.JsInv, FormJacobianState, &user));
181   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
182   PetscCall(TaoSetFromOptions(tao));
183   PetscCall(TaoSetStateDesignIS(tao, user.s_is, user.d_is));
184 
185   /* SOLVE THE APPLICATION */
186   PetscCall(PetscLogStageRegister("Trials", &stages[0]));
187   PetscCall(PetscLogStagePush(stages[0]));
188   user.ksp_its_initial = user.ksp_its;
189   ksp_old              = user.ksp_its;
190   for (i = 0; i < ntests; i++) {
191     PetscCall(TaoSolve(tao));
192     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "KSP Iterations = %" PetscInt_FMT "\n", user.ksp_its - ksp_old));
193     PetscCall(VecCopy(x0, x));
194     PetscCall(TaoSetSolution(tao, x));
195   }
196   PetscCall(PetscLogStagePop());
197   PetscCall(PetscBarrier((PetscObject)x));
198   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "KSP iterations within initialization: "));
199   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", user.ksp_its_initial));
200   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Total KSP iterations over %" PetscInt_FMT " trial(s): ", ntests));
201   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", user.ksp_its));
202   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "KSP iterations per trial: "));
203   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", (user.ksp_its - user.ksp_its_initial) / ntests));
204 
205   PetscCall(TaoDestroy(&tao));
206   PetscCall(VecDestroy(&x));
207   PetscCall(VecDestroy(&x0));
208   PetscCall(HyperbolicDestroy(&user));
209   PetscCall(PetscFinalize());
210   return 0;
211 }
212 /* ------------------------------------------------------------------- */
213 /*
214    dwork = Qy - d
215    lwork = L*(u-ur).^2
216    f = 1/2 * (dwork.dork + alpha*y.lwork)
217 */
218 PetscErrorCode FormFunction(Tao tao, Vec X, PetscReal *f, void *ptr) {
219   PetscReal d1 = 0, d2 = 0;
220   AppCtx   *user = (AppCtx *)ptr;
221 
222   PetscFunctionBegin;
223   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
224   PetscCall(MatMult(user->Q, user->y, user->dwork));
225   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
226   PetscCall(VecDot(user->dwork, user->dwork, &d1));
227 
228   PetscCall(VecWAXPY(user->uwork, -1.0, user->ur, user->u));
229   PetscCall(VecPointwiseMult(user->uwork, user->uwork, user->uwork));
230   PetscCall(MatMult(user->L, user->uwork, user->lwork));
231   PetscCall(VecDot(user->y, user->lwork, &d2));
232   *f = 0.5 * (d1 + user->alpha * d2);
233   PetscFunctionReturn(0);
234 }
235 
236 /* ------------------------------------------------------------------- */
237 /*
238     state: g_s = Q' *(Qy - d) + 0.5*alpha*L*(u-ur).^2
239     design: g_d = alpha*(L'y).*(u-ur)
240 */
241 PetscErrorCode FormGradient(Tao tao, Vec X, Vec G, void *ptr) {
242   AppCtx *user = (AppCtx *)ptr;
243 
244   PetscFunctionBegin;
245   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
246   PetscCall(MatMult(user->Q, user->y, user->dwork));
247   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
248 
249   PetscCall(MatMult(user->QT, user->dwork, user->ywork));
250 
251   PetscCall(MatMult(user->LT, user->y, user->uwork));
252   PetscCall(VecWAXPY(user->vwork, -1.0, user->ur, user->u));
253   PetscCall(VecPointwiseMult(user->uwork, user->vwork, user->uwork));
254   PetscCall(VecScale(user->uwork, user->alpha));
255 
256   PetscCall(VecPointwiseMult(user->vwork, user->vwork, user->vwork));
257   PetscCall(MatMult(user->L, user->vwork, user->lwork));
258   PetscCall(VecAXPY(user->ywork, 0.5 * user->alpha, user->lwork));
259 
260   PetscCall(Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr) {
265   PetscReal d1, d2;
266   AppCtx   *user = (AppCtx *)ptr;
267 
268   PetscFunctionBegin;
269   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
270   PetscCall(MatMult(user->Q, user->y, user->dwork));
271   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
272 
273   PetscCall(MatMult(user->QT, user->dwork, user->ywork));
274 
275   PetscCall(VecDot(user->dwork, user->dwork, &d1));
276 
277   PetscCall(MatMult(user->LT, user->y, user->uwork));
278   PetscCall(VecWAXPY(user->vwork, -1.0, user->ur, user->u));
279   PetscCall(VecPointwiseMult(user->uwork, user->vwork, user->uwork));
280   PetscCall(VecScale(user->uwork, user->alpha));
281 
282   PetscCall(VecPointwiseMult(user->vwork, user->vwork, user->vwork));
283   PetscCall(MatMult(user->L, user->vwork, user->lwork));
284   PetscCall(VecAXPY(user->ywork, 0.5 * user->alpha, user->lwork));
285 
286   PetscCall(VecDot(user->y, user->lwork, &d2));
287 
288   *f = 0.5 * (d1 + user->alpha * d2);
289   PetscCall(Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
290   PetscFunctionReturn(0);
291 }
292 
293 /* ------------------------------------------------------------------- */
294 /* A
295 MatShell object
296 */
297 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr) {
298   PetscInt i;
299   AppCtx  *user = (AppCtx *)ptr;
300 
301   PetscFunctionBegin;
302   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
303   PetscCall(Scatter_yi(user->u, user->ui, user->ui_scatter, user->nt));
304   PetscCall(Scatter_uxi_uyi(user->u, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
305   for (i = 0; i < user->nt; i++) {
306     PetscCall(MatCopy(user->Divxy[0], user->C[i], SUBSET_NONZERO_PATTERN));
307     PetscCall(MatCopy(user->Divxy[1], user->Cwork[i], SAME_NONZERO_PATTERN));
308 
309     PetscCall(MatDiagonalScale(user->C[i], NULL, user->uxi[i]));
310     PetscCall(MatDiagonalScale(user->Cwork[i], NULL, user->uyi[i]));
311     PetscCall(MatAXPY(user->C[i], 1.0, user->Cwork[i], SUBSET_NONZERO_PATTERN));
312     PetscCall(MatScale(user->C[i], user->ht));
313     PetscCall(MatShift(user->C[i], 1.0));
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 /* ------------------------------------------------------------------- */
319 /* B */
320 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr) {
321   AppCtx *user = (AppCtx *)ptr;
322 
323   PetscFunctionBegin;
324   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
325   PetscFunctionReturn(0);
326 }
327 
328 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y) {
329   PetscInt i;
330   AppCtx  *user;
331 
332   PetscFunctionBegin;
333   PetscCall(MatShellGetContext(J_shell, &user));
334   PetscCall(Scatter_yi(X, user->yi, user->yi_scatter, user->nt));
335   user->block_index = 0;
336   PetscCall(MatMult(user->JsBlock, user->yi[0], user->yiwork[0]));
337 
338   for (i = 1; i < user->nt; i++) {
339     user->block_index = i;
340     PetscCall(MatMult(user->JsBlock, user->yi[i], user->yiwork[i]));
341     PetscCall(MatMult(user->M, user->yi[i - 1], user->ziwork[i - 1]));
342     PetscCall(VecAXPY(user->yiwork[i], -1.0, user->ziwork[i - 1]));
343   }
344   PetscCall(Gather_yi(Y, user->yiwork, user->yi_scatter, user->nt));
345   PetscFunctionReturn(0);
346 }
347 
348 PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y) {
349   PetscInt i;
350   AppCtx  *user;
351 
352   PetscFunctionBegin;
353   PetscCall(MatShellGetContext(J_shell, &user));
354   PetscCall(Scatter_yi(X, user->yi, user->yi_scatter, user->nt));
355 
356   for (i = 0; i < user->nt - 1; i++) {
357     user->block_index = i;
358     PetscCall(MatMultTranspose(user->JsBlock, user->yi[i], user->yiwork[i]));
359     PetscCall(MatMult(user->M, user->yi[i + 1], user->ziwork[i + 1]));
360     PetscCall(VecAXPY(user->yiwork[i], -1.0, user->ziwork[i + 1]));
361   }
362 
363   i                 = user->nt - 1;
364   user->block_index = i;
365   PetscCall(MatMultTranspose(user->JsBlock, user->yi[i], user->yiwork[i]));
366   PetscCall(Gather_yi(Y, user->yiwork, user->yi_scatter, user->nt));
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y) {
371   PetscInt i;
372   AppCtx  *user;
373 
374   PetscFunctionBegin;
375   PetscCall(MatShellGetContext(J_shell, &user));
376   i = user->block_index;
377   PetscCall(VecPointwiseMult(user->uxiwork[i], X, user->uxi[i]));
378   PetscCall(VecPointwiseMult(user->uyiwork[i], X, user->uyi[i]));
379   PetscCall(Gather(user->uiwork[i], user->uxiwork[i], user->ux_scatter[i], user->uyiwork[i], user->uy_scatter[i]));
380   PetscCall(MatMult(user->Div, user->uiwork[i], Y));
381   PetscCall(VecAYPX(Y, user->ht, X));
382   PetscFunctionReturn(0);
383 }
384 
385 PetscErrorCode StateMatBlockMultTranspose(Mat J_shell, Vec X, Vec Y) {
386   PetscInt i;
387   AppCtx  *user;
388 
389   PetscFunctionBegin;
390   PetscCall(MatShellGetContext(J_shell, &user));
391   i = user->block_index;
392   PetscCall(MatMult(user->Grad, X, user->uiwork[i]));
393   PetscCall(Scatter(user->uiwork[i], user->uxiwork[i], user->ux_scatter[i], user->uyiwork[i], user->uy_scatter[i]));
394   PetscCall(VecPointwiseMult(user->uxiwork[i], user->uxi[i], user->uxiwork[i]));
395   PetscCall(VecPointwiseMult(user->uyiwork[i], user->uyi[i], user->uyiwork[i]));
396   PetscCall(VecWAXPY(Y, 1.0, user->uxiwork[i], user->uyiwork[i]));
397   PetscCall(VecAYPX(Y, user->ht, X));
398   PetscFunctionReturn(0);
399 }
400 
401 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y) {
402   PetscInt i;
403   AppCtx  *user;
404 
405   PetscFunctionBegin;
406   PetscCall(MatShellGetContext(J_shell, &user));
407   PetscCall(Scatter_yi(user->y, user->yi, user->yi_scatter, user->nt));
408   PetscCall(Scatter_uxi_uyi(X, user->uxiwork, user->uxi_scatter, user->uyiwork, user->uyi_scatter, user->nt));
409   for (i = 0; i < user->nt; i++) {
410     PetscCall(VecPointwiseMult(user->uxiwork[i], user->yi[i], user->uxiwork[i]));
411     PetscCall(VecPointwiseMult(user->uyiwork[i], user->yi[i], user->uyiwork[i]));
412     PetscCall(Gather(user->uiwork[i], user->uxiwork[i], user->ux_scatter[i], user->uyiwork[i], user->uy_scatter[i]));
413     PetscCall(MatMult(user->Div, user->uiwork[i], user->ziwork[i]));
414     PetscCall(VecScale(user->ziwork[i], user->ht));
415   }
416   PetscCall(Gather_yi(Y, user->ziwork, user->yi_scatter, user->nt));
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y) {
421   PetscInt i;
422   AppCtx  *user;
423 
424   PetscFunctionBegin;
425   PetscCall(MatShellGetContext(J_shell, &user));
426   PetscCall(Scatter_yi(user->y, user->yi, user->yi_scatter, user->nt));
427   PetscCall(Scatter_yi(X, user->yiwork, user->yi_scatter, user->nt));
428   for (i = 0; i < user->nt; i++) {
429     PetscCall(MatMult(user->Grad, user->yiwork[i], user->uiwork[i]));
430     PetscCall(Scatter(user->uiwork[i], user->uxiwork[i], user->ux_scatter[i], user->uyiwork[i], user->uy_scatter[i]));
431     PetscCall(VecPointwiseMult(user->uxiwork[i], user->yi[i], user->uxiwork[i]));
432     PetscCall(VecPointwiseMult(user->uyiwork[i], user->yi[i], user->uyiwork[i]));
433     PetscCall(Gather(user->uiwork[i], user->uxiwork[i], user->ux_scatter[i], user->uyiwork[i], user->uy_scatter[i]));
434     PetscCall(VecScale(user->uiwork[i], user->ht));
435   }
436   PetscCall(Gather_yi(Y, user->uiwork, user->ui_scatter, user->nt));
437   PetscFunctionReturn(0);
438 }
439 
440 PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y) {
441   PetscInt i;
442   AppCtx  *user;
443 
444   PetscFunctionBegin;
445   PetscCall(PCShellGetContext(PC_shell, &user));
446   i = user->block_index;
447   if (user->c_formed) {
448     PetscCall(MatSOR(user->C[i], X, 1.0, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP), 0.0, 1, 1, Y));
449   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not formed");
450   PetscFunctionReturn(0);
451 }
452 
453 PetscErrorCode StateMatBlockPrecMultTranspose(PC PC_shell, Vec X, Vec Y) {
454   PetscInt i;
455   AppCtx  *user;
456 
457   PetscFunctionBegin;
458   PetscCall(PCShellGetContext(PC_shell, &user));
459 
460   i = user->block_index;
461   if (user->c_formed) {
462     PetscCall(MatSOR(user->C[i], X, 1.0, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP), 0.0, 1, 1, Y));
463   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not formed");
464   PetscFunctionReturn(0);
465 }
466 
467 PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y) {
468   AppCtx  *user;
469   PetscInt its, i;
470 
471   PetscFunctionBegin;
472   PetscCall(MatShellGetContext(J_shell, &user));
473 
474   if (Y == user->ytrue) {
475     /* First solve is done using true solution to set up problem */
476     PetscCall(KSPSetTolerances(user->solver, 1e-4, 1e-20, PETSC_DEFAULT, PETSC_DEFAULT));
477   } else {
478     PetscCall(KSPSetTolerances(user->solver, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
479   }
480   PetscCall(Scatter_yi(X, user->yi, user->yi_scatter, user->nt));
481   PetscCall(Scatter_yi(Y, user->yiwork, user->yi_scatter, user->nt));
482   PetscCall(Scatter_uxi_uyi(user->u, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
483 
484   user->block_index = 0;
485   PetscCall(KSPSolve(user->solver, user->yi[0], user->yiwork[0]));
486 
487   PetscCall(KSPGetIterationNumber(user->solver, &its));
488   user->ksp_its = user->ksp_its + its;
489   for (i = 1; i < user->nt; i++) {
490     PetscCall(MatMult(user->M, user->yiwork[i - 1], user->ziwork[i - 1]));
491     PetscCall(VecAXPY(user->yi[i], 1.0, user->ziwork[i - 1]));
492     user->block_index = i;
493     PetscCall(KSPSolve(user->solver, user->yi[i], user->yiwork[i]));
494 
495     PetscCall(KSPGetIterationNumber(user->solver, &its));
496     user->ksp_its = user->ksp_its + its;
497   }
498   PetscCall(Gather_yi(Y, user->yiwork, user->yi_scatter, user->nt));
499   PetscFunctionReturn(0);
500 }
501 
502 PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y) {
503   AppCtx  *user;
504   PetscInt its, i;
505 
506   PetscFunctionBegin;
507   PetscCall(MatShellGetContext(J_shell, &user));
508 
509   PetscCall(Scatter_yi(X, user->yi, user->yi_scatter, user->nt));
510   PetscCall(Scatter_yi(Y, user->yiwork, user->yi_scatter, user->nt));
511   PetscCall(Scatter_uxi_uyi(user->u, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
512 
513   i                 = user->nt - 1;
514   user->block_index = i;
515   PetscCall(KSPSolveTranspose(user->solver, user->yi[i], user->yiwork[i]));
516 
517   PetscCall(KSPGetIterationNumber(user->solver, &its));
518   user->ksp_its = user->ksp_its + its;
519 
520   for (i = user->nt - 2; i >= 0; i--) {
521     PetscCall(MatMult(user->M, user->yiwork[i + 1], user->ziwork[i + 1]));
522     PetscCall(VecAXPY(user->yi[i], 1.0, user->ziwork[i + 1]));
523     user->block_index = i;
524     PetscCall(KSPSolveTranspose(user->solver, user->yi[i], user->yiwork[i]));
525 
526     PetscCall(KSPGetIterationNumber(user->solver, &its));
527     user->ksp_its = user->ksp_its + its;
528   }
529   PetscCall(Gather_yi(Y, user->yiwork, user->yi_scatter, user->nt));
530   PetscFunctionReturn(0);
531 }
532 
533 PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell) {
534   AppCtx *user;
535 
536   PetscFunctionBegin;
537   PetscCall(MatShellGetContext(J_shell, &user));
538 
539   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->m, user, new_shell));
540   PetscCall(MatShellSetOperation(*new_shell, MATOP_MULT, (void (*)(void))StateMatMult));
541   PetscCall(MatShellSetOperation(*new_shell, MATOP_DUPLICATE, (void (*)(void))StateMatDuplicate));
542   PetscCall(MatShellSetOperation(*new_shell, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatMultTranspose));
543   PetscCall(MatShellSetOperation(*new_shell, MATOP_GET_DIAGONAL, (void (*)(void))StateMatGetDiagonal));
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X) {
548   AppCtx *user;
549 
550   PetscFunctionBegin;
551   PetscCall(MatShellGetContext(J_shell, &user));
552   PetscCall(VecCopy(user->js_diag, X));
553   PetscFunctionReturn(0);
554 }
555 
556 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr) {
557   /* con = Ay - q, A = [C(u1)  0     0     ...   0;
558                          -M  C(u2)   0     ...   0;
559                           0   -M   C(u3)   ...   0;
560                                       ...         ;
561                           0    ...      -M C(u_nt)]
562      C(u) = eye + ht*Div*[diag(u1); diag(u2)]       */
563   PetscInt i;
564   AppCtx  *user = (AppCtx *)ptr;
565 
566   PetscFunctionBegin;
567   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
568   PetscCall(Scatter_yi(user->y, user->yi, user->yi_scatter, user->nt));
569   PetscCall(Scatter_uxi_uyi(user->u, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
570 
571   user->block_index = 0;
572   PetscCall(MatMult(user->JsBlock, user->yi[0], user->yiwork[0]));
573 
574   for (i = 1; i < user->nt; i++) {
575     user->block_index = i;
576     PetscCall(MatMult(user->JsBlock, user->yi[i], user->yiwork[i]));
577     PetscCall(MatMult(user->M, user->yi[i - 1], user->ziwork[i - 1]));
578     PetscCall(VecAXPY(user->yiwork[i], -1.0, user->ziwork[i - 1]));
579   }
580 
581   PetscCall(Gather_yi(C, user->yiwork, user->yi_scatter, user->nt));
582   PetscCall(VecAXPY(C, -1.0, user->q));
583 
584   PetscFunctionReturn(0);
585 }
586 
587 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat) {
588   PetscFunctionBegin;
589   PetscCall(VecScatterBegin(s_scat, x, state, INSERT_VALUES, SCATTER_FORWARD));
590   PetscCall(VecScatterEnd(s_scat, x, state, INSERT_VALUES, SCATTER_FORWARD));
591   PetscCall(VecScatterBegin(d_scat, x, design, INSERT_VALUES, SCATTER_FORWARD));
592   PetscCall(VecScatterEnd(d_scat, x, design, INSERT_VALUES, SCATTER_FORWARD));
593   PetscFunctionReturn(0);
594 }
595 
596 PetscErrorCode Scatter_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt) {
597   PetscInt i;
598 
599   PetscFunctionBegin;
600   for (i = 0; i < nt; i++) {
601     PetscCall(VecScatterBegin(scatx[i], u, uxi[i], INSERT_VALUES, SCATTER_FORWARD));
602     PetscCall(VecScatterEnd(scatx[i], u, uxi[i], INSERT_VALUES, SCATTER_FORWARD));
603     PetscCall(VecScatterBegin(scaty[i], u, uyi[i], INSERT_VALUES, SCATTER_FORWARD));
604     PetscCall(VecScatterEnd(scaty[i], u, uyi[i], INSERT_VALUES, SCATTER_FORWARD));
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat) {
610   PetscFunctionBegin;
611   PetscCall(VecScatterBegin(s_scat, state, x, INSERT_VALUES, SCATTER_REVERSE));
612   PetscCall(VecScatterEnd(s_scat, state, x, INSERT_VALUES, SCATTER_REVERSE));
613   PetscCall(VecScatterBegin(d_scat, design, x, INSERT_VALUES, SCATTER_REVERSE));
614   PetscCall(VecScatterEnd(d_scat, design, x, INSERT_VALUES, SCATTER_REVERSE));
615   PetscFunctionReturn(0);
616 }
617 
618 PetscErrorCode Gather_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt) {
619   PetscInt i;
620 
621   PetscFunctionBegin;
622   for (i = 0; i < nt; i++) {
623     PetscCall(VecScatterBegin(scatx[i], uxi[i], u, INSERT_VALUES, SCATTER_REVERSE));
624     PetscCall(VecScatterEnd(scatx[i], uxi[i], u, INSERT_VALUES, SCATTER_REVERSE));
625     PetscCall(VecScatterBegin(scaty[i], uyi[i], u, INSERT_VALUES, SCATTER_REVERSE));
626     PetscCall(VecScatterEnd(scaty[i], uyi[i], u, INSERT_VALUES, SCATTER_REVERSE));
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 PetscErrorCode Scatter_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt) {
632   PetscInt i;
633 
634   PetscFunctionBegin;
635   for (i = 0; i < nt; i++) {
636     PetscCall(VecScatterBegin(scat[i], y, yi[i], INSERT_VALUES, SCATTER_FORWARD));
637     PetscCall(VecScatterEnd(scat[i], y, yi[i], INSERT_VALUES, SCATTER_FORWARD));
638   }
639   PetscFunctionReturn(0);
640 }
641 
642 PetscErrorCode Gather_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt) {
643   PetscInt i;
644 
645   PetscFunctionBegin;
646   for (i = 0; i < nt; i++) {
647     PetscCall(VecScatterBegin(scat[i], yi[i], y, INSERT_VALUES, SCATTER_REVERSE));
648     PetscCall(VecScatterEnd(scat[i], yi[i], y, INSERT_VALUES, SCATTER_REVERSE));
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 PetscErrorCode HyperbolicInitialize(AppCtx *user) {
654   PetscInt    n, i, j, linear_index, istart, iend, iblock, lo, hi;
655   Vec         XX, YY, XXwork, YYwork, yi, uxi, ui, bc;
656   PetscReal   h, sum;
657   PetscScalar hinv, neg_hinv, quarter = 0.25, one = 1.0, half_hinv, neg_half_hinv;
658   PetscScalar vx, vy, zero = 0.0;
659   IS          is_from_y, is_to_yi, is_from_u, is_to_uxi, is_to_uyi;
660 
661   PetscFunctionBegin;
662   user->jformed  = PETSC_FALSE;
663   user->c_formed = PETSC_FALSE;
664 
665   user->ksp_its         = 0;
666   user->ksp_its_initial = 0;
667 
668   n = user->mx * user->mx;
669 
670   h             = 1.0 / user->mx;
671   hinv          = user->mx;
672   neg_hinv      = -hinv;
673   half_hinv     = hinv / 2.0;
674   neg_half_hinv = neg_hinv / 2.0;
675 
676   /* Generate Grad matrix */
677   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Grad));
678   PetscCall(MatSetSizes(user->Grad, PETSC_DECIDE, PETSC_DECIDE, 2 * n, n));
679   PetscCall(MatSetFromOptions(user->Grad));
680   PetscCall(MatMPIAIJSetPreallocation(user->Grad, 3, NULL, 3, NULL));
681   PetscCall(MatSeqAIJSetPreallocation(user->Grad, 3, NULL));
682   PetscCall(MatGetOwnershipRange(user->Grad, &istart, &iend));
683 
684   for (i = istart; i < iend; i++) {
685     if (i < n) {
686       iblock = i / user->mx;
687       j      = iblock * user->mx + ((i + user->mx - 1) % user->mx);
688       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &half_hinv, INSERT_VALUES));
689       j = iblock * user->mx + ((i + 1) % user->mx);
690       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &neg_half_hinv, INSERT_VALUES));
691     }
692     if (i >= n) {
693       j = (i - user->mx) % n;
694       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &half_hinv, INSERT_VALUES));
695       j = (j + 2 * user->mx) % n;
696       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &neg_half_hinv, INSERT_VALUES));
697     }
698   }
699 
700   PetscCall(MatAssemblyBegin(user->Grad, MAT_FINAL_ASSEMBLY));
701   PetscCall(MatAssemblyEnd(user->Grad, MAT_FINAL_ASSEMBLY));
702 
703   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Gradxy[0]));
704   PetscCall(MatSetSizes(user->Gradxy[0], PETSC_DECIDE, PETSC_DECIDE, n, n));
705   PetscCall(MatSetFromOptions(user->Gradxy[0]));
706   PetscCall(MatMPIAIJSetPreallocation(user->Gradxy[0], 3, NULL, 3, NULL));
707   PetscCall(MatSeqAIJSetPreallocation(user->Gradxy[0], 3, NULL));
708   PetscCall(MatGetOwnershipRange(user->Gradxy[0], &istart, &iend));
709 
710   for (i = istart; i < iend; i++) {
711     iblock = i / user->mx;
712     j      = iblock * user->mx + ((i + user->mx - 1) % user->mx);
713     PetscCall(MatSetValues(user->Gradxy[0], 1, &i, 1, &j, &half_hinv, INSERT_VALUES));
714     j = iblock * user->mx + ((i + 1) % user->mx);
715     PetscCall(MatSetValues(user->Gradxy[0], 1, &i, 1, &j, &neg_half_hinv, INSERT_VALUES));
716     PetscCall(MatSetValues(user->Gradxy[0], 1, &i, 1, &i, &zero, INSERT_VALUES));
717   }
718   PetscCall(MatAssemblyBegin(user->Gradxy[0], MAT_FINAL_ASSEMBLY));
719   PetscCall(MatAssemblyEnd(user->Gradxy[0], MAT_FINAL_ASSEMBLY));
720 
721   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Gradxy[1]));
722   PetscCall(MatSetSizes(user->Gradxy[1], PETSC_DECIDE, PETSC_DECIDE, n, n));
723   PetscCall(MatSetFromOptions(user->Gradxy[1]));
724   PetscCall(MatMPIAIJSetPreallocation(user->Gradxy[1], 3, NULL, 3, NULL));
725   PetscCall(MatSeqAIJSetPreallocation(user->Gradxy[1], 3, NULL));
726   PetscCall(MatGetOwnershipRange(user->Gradxy[1], &istart, &iend));
727 
728   for (i = istart; i < iend; i++) {
729     j = (i + n - user->mx) % n;
730     PetscCall(MatSetValues(user->Gradxy[1], 1, &i, 1, &j, &half_hinv, INSERT_VALUES));
731     j = (j + 2 * user->mx) % n;
732     PetscCall(MatSetValues(user->Gradxy[1], 1, &i, 1, &j, &neg_half_hinv, INSERT_VALUES));
733     PetscCall(MatSetValues(user->Gradxy[1], 1, &i, 1, &i, &zero, INSERT_VALUES));
734   }
735   PetscCall(MatAssemblyBegin(user->Gradxy[1], MAT_FINAL_ASSEMBLY));
736   PetscCall(MatAssemblyEnd(user->Gradxy[1], MAT_FINAL_ASSEMBLY));
737 
738   /* Generate Div matrix */
739   PetscCall(MatTranspose(user->Grad, MAT_INITIAL_MATRIX, &user->Div));
740   PetscCall(MatTranspose(user->Gradxy[0], MAT_INITIAL_MATRIX, &user->Divxy[0]));
741   PetscCall(MatTranspose(user->Gradxy[1], MAT_INITIAL_MATRIX, &user->Divxy[1]));
742 
743   /* Off-diagonal averaging matrix */
744   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->M));
745   PetscCall(MatSetSizes(user->M, PETSC_DECIDE, PETSC_DECIDE, n, n));
746   PetscCall(MatSetFromOptions(user->M));
747   PetscCall(MatMPIAIJSetPreallocation(user->M, 4, NULL, 4, NULL));
748   PetscCall(MatSeqAIJSetPreallocation(user->M, 4, NULL));
749   PetscCall(MatGetOwnershipRange(user->M, &istart, &iend));
750 
751   for (i = istart; i < iend; i++) {
752     /* kron(Id,Av) */
753     iblock = i / user->mx;
754     j      = iblock * user->mx + ((i + user->mx - 1) % user->mx);
755     PetscCall(MatSetValues(user->M, 1, &i, 1, &j, &quarter, INSERT_VALUES));
756     j = iblock * user->mx + ((i + 1) % user->mx);
757     PetscCall(MatSetValues(user->M, 1, &i, 1, &j, &quarter, INSERT_VALUES));
758 
759     /* kron(Av,Id) */
760     j = (i + user->mx) % n;
761     PetscCall(MatSetValues(user->M, 1, &i, 1, &j, &quarter, INSERT_VALUES));
762     j = (i + n - user->mx) % n;
763     PetscCall(MatSetValues(user->M, 1, &i, 1, &j, &quarter, INSERT_VALUES));
764   }
765   PetscCall(MatAssemblyBegin(user->M, MAT_FINAL_ASSEMBLY));
766   PetscCall(MatAssemblyEnd(user->M, MAT_FINAL_ASSEMBLY));
767 
768   /* Generate 2D grid */
769   PetscCall(VecCreate(PETSC_COMM_WORLD, &XX));
770   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->q));
771   PetscCall(VecSetSizes(XX, PETSC_DECIDE, n));
772   PetscCall(VecSetSizes(user->q, PETSC_DECIDE, n * user->nt));
773   PetscCall(VecSetFromOptions(XX));
774   PetscCall(VecSetFromOptions(user->q));
775 
776   PetscCall(VecDuplicate(XX, &YY));
777   PetscCall(VecDuplicate(XX, &XXwork));
778   PetscCall(VecDuplicate(XX, &YYwork));
779   PetscCall(VecDuplicate(XX, &user->d));
780   PetscCall(VecDuplicate(XX, &user->dwork));
781 
782   PetscCall(VecGetOwnershipRange(XX, &istart, &iend));
783   for (linear_index = istart; linear_index < iend; linear_index++) {
784     i  = linear_index % user->mx;
785     j  = (linear_index - i) / user->mx;
786     vx = h * (i + 0.5);
787     vy = h * (j + 0.5);
788     PetscCall(VecSetValues(XX, 1, &linear_index, &vx, INSERT_VALUES));
789     PetscCall(VecSetValues(YY, 1, &linear_index, &vy, INSERT_VALUES));
790   }
791 
792   PetscCall(VecAssemblyBegin(XX));
793   PetscCall(VecAssemblyEnd(XX));
794   PetscCall(VecAssemblyBegin(YY));
795   PetscCall(VecAssemblyEnd(YY));
796 
797   /* Compute final density function yT
798      yT = 1.0 + exp(-30*((x-0.25)^2+(y-0.25)^2)) + exp(-30*((x-0.75)^2+(y-0.75)^2))
799      yT = yT / (h^2*sum(yT)) */
800   PetscCall(VecCopy(XX, XXwork));
801   PetscCall(VecCopy(YY, YYwork));
802 
803   PetscCall(VecShift(XXwork, -0.25));
804   PetscCall(VecShift(YYwork, -0.25));
805 
806   PetscCall(VecPointwiseMult(XXwork, XXwork, XXwork));
807   PetscCall(VecPointwiseMult(YYwork, YYwork, YYwork));
808 
809   PetscCall(VecCopy(XXwork, user->dwork));
810   PetscCall(VecAXPY(user->dwork, 1.0, YYwork));
811   PetscCall(VecScale(user->dwork, -30.0));
812   PetscCall(VecExp(user->dwork));
813   PetscCall(VecCopy(user->dwork, user->d));
814 
815   PetscCall(VecCopy(XX, XXwork));
816   PetscCall(VecCopy(YY, YYwork));
817 
818   PetscCall(VecShift(XXwork, -0.75));
819   PetscCall(VecShift(YYwork, -0.75));
820 
821   PetscCall(VecPointwiseMult(XXwork, XXwork, XXwork));
822   PetscCall(VecPointwiseMult(YYwork, YYwork, YYwork));
823 
824   PetscCall(VecCopy(XXwork, user->dwork));
825   PetscCall(VecAXPY(user->dwork, 1.0, YYwork));
826   PetscCall(VecScale(user->dwork, -30.0));
827   PetscCall(VecExp(user->dwork));
828 
829   PetscCall(VecAXPY(user->d, 1.0, user->dwork));
830   PetscCall(VecShift(user->d, 1.0));
831   PetscCall(VecSum(user->d, &sum));
832   PetscCall(VecScale(user->d, 1.0 / (h * h * sum)));
833 
834   /* Initial conditions of forward problem */
835   PetscCall(VecDuplicate(XX, &bc));
836   PetscCall(VecCopy(XX, XXwork));
837   PetscCall(VecCopy(YY, YYwork));
838 
839   PetscCall(VecShift(XXwork, -0.5));
840   PetscCall(VecShift(YYwork, -0.5));
841 
842   PetscCall(VecPointwiseMult(XXwork, XXwork, XXwork));
843   PetscCall(VecPointwiseMult(YYwork, YYwork, YYwork));
844 
845   PetscCall(VecWAXPY(bc, 1.0, XXwork, YYwork));
846   PetscCall(VecScale(bc, -50.0));
847   PetscCall(VecExp(bc));
848   PetscCall(VecShift(bc, 1.0));
849   PetscCall(VecSum(bc, &sum));
850   PetscCall(VecScale(bc, 1.0 / (h * h * sum)));
851 
852   /* Create scatter from y to y_1,y_2,...,y_nt */
853   /*  TODO: Reorder for better parallelism. (This will require reordering Q and L as well.) */
854   PetscCall(PetscMalloc1(user->nt * user->mx * user->mx, &user->yi_scatter));
855   PetscCall(VecCreate(PETSC_COMM_WORLD, &yi));
856   PetscCall(VecSetSizes(yi, PETSC_DECIDE, user->mx * user->mx));
857   PetscCall(VecSetFromOptions(yi));
858   PetscCall(VecDuplicateVecs(yi, user->nt, &user->yi));
859   PetscCall(VecDuplicateVecs(yi, user->nt, &user->yiwork));
860   PetscCall(VecDuplicateVecs(yi, user->nt, &user->ziwork));
861   for (i = 0; i < user->nt; i++) {
862     PetscCall(VecGetOwnershipRange(user->yi[i], &lo, &hi));
863     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_yi));
864     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + i * user->mx * user->mx, 1, &is_from_y));
865     PetscCall(VecScatterCreate(user->y, is_from_y, user->yi[i], is_to_yi, &user->yi_scatter[i]));
866     PetscCall(ISDestroy(&is_to_yi));
867     PetscCall(ISDestroy(&is_from_y));
868   }
869 
870   /* Create scatter from u to ux_1,uy_1,ux_2,uy_2,...,ux_nt,uy_nt */
871   /*  TODO: reorder for better parallelism */
872   PetscCall(PetscMalloc1(user->nt * user->mx * user->mx, &user->uxi_scatter));
873   PetscCall(PetscMalloc1(user->nt * user->mx * user->mx, &user->uyi_scatter));
874   PetscCall(PetscMalloc1(user->nt * user->mx * user->mx, &user->ux_scatter));
875   PetscCall(PetscMalloc1(user->nt * user->mx * user->mx, &user->uy_scatter));
876   PetscCall(PetscMalloc1(2 * user->nt * user->mx * user->mx, &user->ui_scatter));
877   PetscCall(VecCreate(PETSC_COMM_WORLD, &uxi));
878   PetscCall(VecCreate(PETSC_COMM_WORLD, &ui));
879   PetscCall(VecSetSizes(uxi, PETSC_DECIDE, user->mx * user->mx));
880   PetscCall(VecSetSizes(ui, PETSC_DECIDE, 2 * user->mx * user->mx));
881   PetscCall(VecSetFromOptions(uxi));
882   PetscCall(VecSetFromOptions(ui));
883   PetscCall(VecDuplicateVecs(uxi, user->nt, &user->uxi));
884   PetscCall(VecDuplicateVecs(uxi, user->nt, &user->uyi));
885   PetscCall(VecDuplicateVecs(uxi, user->nt, &user->uxiwork));
886   PetscCall(VecDuplicateVecs(uxi, user->nt, &user->uyiwork));
887   PetscCall(VecDuplicateVecs(ui, user->nt, &user->ui));
888   PetscCall(VecDuplicateVecs(ui, user->nt, &user->uiwork));
889   for (i = 0; i < user->nt; i++) {
890     PetscCall(VecGetOwnershipRange(user->uxi[i], &lo, &hi));
891     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_uxi));
892     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + 2 * i * user->mx * user->mx, 1, &is_from_u));
893     PetscCall(VecScatterCreate(user->u, is_from_u, user->uxi[i], is_to_uxi, &user->uxi_scatter[i]));
894 
895     PetscCall(ISDestroy(&is_to_uxi));
896     PetscCall(ISDestroy(&is_from_u));
897 
898     PetscCall(VecGetOwnershipRange(user->uyi[i], &lo, &hi));
899     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_uyi));
900     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + (2 * i + 1) * user->mx * user->mx, 1, &is_from_u));
901     PetscCall(VecScatterCreate(user->u, is_from_u, user->uyi[i], is_to_uyi, &user->uyi_scatter[i]));
902 
903     PetscCall(ISDestroy(&is_to_uyi));
904     PetscCall(ISDestroy(&is_from_u));
905 
906     PetscCall(VecGetOwnershipRange(user->uxi[i], &lo, &hi));
907     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_uxi));
908     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_from_u));
909     PetscCall(VecScatterCreate(user->ui[i], is_from_u, user->uxi[i], is_to_uxi, &user->ux_scatter[i]));
910 
911     PetscCall(ISDestroy(&is_to_uxi));
912     PetscCall(ISDestroy(&is_from_u));
913 
914     PetscCall(VecGetOwnershipRange(user->uyi[i], &lo, &hi));
915     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_uyi));
916     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + user->mx * user->mx, 1, &is_from_u));
917     PetscCall(VecScatterCreate(user->ui[i], is_from_u, user->uyi[i], is_to_uyi, &user->uy_scatter[i]));
918 
919     PetscCall(ISDestroy(&is_to_uyi));
920     PetscCall(ISDestroy(&is_from_u));
921 
922     PetscCall(VecGetOwnershipRange(user->ui[i], &lo, &hi));
923     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_uxi));
924     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + 2 * i * user->mx * user->mx, 1, &is_from_u));
925     PetscCall(VecScatterCreate(user->u, is_from_u, user->ui[i], is_to_uxi, &user->ui_scatter[i]));
926 
927     PetscCall(ISDestroy(&is_to_uxi));
928     PetscCall(ISDestroy(&is_from_u));
929   }
930 
931   /* RHS of forward problem */
932   PetscCall(MatMult(user->M, bc, user->yiwork[0]));
933   for (i = 1; i < user->nt; i++) PetscCall(VecSet(user->yiwork[i], 0.0));
934   PetscCall(Gather_yi(user->q, user->yiwork, user->yi_scatter, user->nt));
935 
936   /* Compute true velocity field utrue */
937   PetscCall(VecDuplicate(user->u, &user->utrue));
938   for (i = 0; i < user->nt; i++) {
939     PetscCall(VecCopy(YY, user->uxi[i]));
940     PetscCall(VecScale(user->uxi[i], 150.0 * i * user->ht));
941     PetscCall(VecCopy(XX, user->uyi[i]));
942     PetscCall(VecShift(user->uyi[i], -10.0));
943     PetscCall(VecScale(user->uyi[i], 15.0 * i * user->ht));
944   }
945   PetscCall(Gather_uxi_uyi(user->utrue, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
946 
947   /* Initial guess and reference model */
948   PetscCall(VecDuplicate(user->utrue, &user->ur));
949   for (i = 0; i < user->nt; i++) {
950     PetscCall(VecCopy(XX, user->uxi[i]));
951     PetscCall(VecShift(user->uxi[i], i * user->ht));
952     PetscCall(VecCopy(YY, user->uyi[i]));
953     PetscCall(VecShift(user->uyi[i], -i * user->ht));
954   }
955   PetscCall(Gather_uxi_uyi(user->ur, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
956 
957   /* Generate regularization matrix L */
958   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->LT));
959   PetscCall(MatSetSizes(user->LT, PETSC_DECIDE, PETSC_DECIDE, 2 * n * user->nt, n * user->nt));
960   PetscCall(MatSetFromOptions(user->LT));
961   PetscCall(MatMPIAIJSetPreallocation(user->LT, 1, NULL, 1, NULL));
962   PetscCall(MatSeqAIJSetPreallocation(user->LT, 1, NULL));
963   PetscCall(MatGetOwnershipRange(user->LT, &istart, &iend));
964 
965   for (i = istart; i < iend; i++) {
966     iblock = (i + n) / (2 * n);
967     j      = i - iblock * n;
968     PetscCall(MatSetValues(user->LT, 1, &i, 1, &j, &user->gamma, INSERT_VALUES));
969   }
970 
971   PetscCall(MatAssemblyBegin(user->LT, MAT_FINAL_ASSEMBLY));
972   PetscCall(MatAssemblyEnd(user->LT, MAT_FINAL_ASSEMBLY));
973 
974   PetscCall(MatTranspose(user->LT, MAT_INITIAL_MATRIX, &user->L));
975 
976   /* Build work vectors and matrices */
977   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->lwork));
978   PetscCall(VecSetType(user->lwork, VECMPI));
979   PetscCall(VecSetSizes(user->lwork, PETSC_DECIDE, user->m));
980   PetscCall(VecSetFromOptions(user->lwork));
981 
982   PetscCall(MatDuplicate(user->Div, MAT_SHARE_NONZERO_PATTERN, &user->Divwork));
983 
984   PetscCall(VecDuplicate(user->y, &user->ywork));
985   PetscCall(VecDuplicate(user->u, &user->uwork));
986   PetscCall(VecDuplicate(user->u, &user->vwork));
987   PetscCall(VecDuplicate(user->u, &user->js_diag));
988   PetscCall(VecDuplicate(user->c, &user->cwork));
989 
990   /* Create matrix-free shell user->Js for computing A*x */
991   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->m, user, &user->Js));
992   PetscCall(MatShellSetOperation(user->Js, MATOP_MULT, (void (*)(void))StateMatMult));
993   PetscCall(MatShellSetOperation(user->Js, MATOP_DUPLICATE, (void (*)(void))StateMatDuplicate));
994   PetscCall(MatShellSetOperation(user->Js, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatMultTranspose));
995   PetscCall(MatShellSetOperation(user->Js, MATOP_GET_DIAGONAL, (void (*)(void))StateMatGetDiagonal));
996 
997   /* Diagonal blocks of user->Js */
998   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, n, n, user, &user->JsBlock));
999   PetscCall(MatShellSetOperation(user->JsBlock, MATOP_MULT, (void (*)(void))StateMatBlockMult));
1000   PetscCall(MatShellSetOperation(user->JsBlock, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatBlockMultTranspose));
1001 
1002   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
1003      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
1004      This is an SOR preconditioner for user->JsBlock. */
1005   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, n, n, user, &user->JsBlockPrec));
1006   PetscCall(MatShellSetOperation(user->JsBlockPrec, MATOP_MULT, (void (*)(void))StateMatBlockPrecMult));
1007   PetscCall(MatShellSetOperation(user->JsBlockPrec, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatBlockPrecMultTranspose));
1008 
1009   /* Create a matrix-free shell user->Jd for computing B*x */
1010   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->n - user->m, user, &user->Jd));
1011   PetscCall(MatShellSetOperation(user->Jd, MATOP_MULT, (void (*)(void))DesignMatMult));
1012   PetscCall(MatShellSetOperation(user->Jd, MATOP_MULT_TRANSPOSE, (void (*)(void))DesignMatMultTranspose));
1013 
1014   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
1015   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->m, user, &user->JsInv));
1016   PetscCall(MatShellSetOperation(user->JsInv, MATOP_MULT, (void (*)(void))StateMatInvMult));
1017   PetscCall(MatShellSetOperation(user->JsInv, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatInvTransposeMult));
1018 
1019   /* Build matrices for SOR preconditioner */
1020   PetscCall(Scatter_uxi_uyi(user->u, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
1021   PetscCall(PetscMalloc1(5 * n, &user->C));
1022   PetscCall(PetscMalloc1(2 * n, &user->Cwork));
1023   for (i = 0; i < user->nt; i++) {
1024     PetscCall(MatDuplicate(user->Divxy[0], MAT_COPY_VALUES, &user->C[i]));
1025     PetscCall(MatDuplicate(user->Divxy[1], MAT_COPY_VALUES, &user->Cwork[i]));
1026 
1027     PetscCall(MatDiagonalScale(user->C[i], NULL, user->uxi[i]));
1028     PetscCall(MatDiagonalScale(user->Cwork[i], NULL, user->uyi[i]));
1029     PetscCall(MatAXPY(user->C[i], 1.0, user->Cwork[i], DIFFERENT_NONZERO_PATTERN));
1030     PetscCall(MatScale(user->C[i], user->ht));
1031     PetscCall(MatShift(user->C[i], 1.0));
1032   }
1033 
1034   /* Solver options and tolerances */
1035   PetscCall(KSPCreate(PETSC_COMM_WORLD, &user->solver));
1036   PetscCall(KSPSetType(user->solver, KSPGMRES));
1037   PetscCall(KSPSetOperators(user->solver, user->JsBlock, user->JsBlockPrec));
1038   PetscCall(KSPSetTolerances(user->solver, 1e-4, 1e-20, 1e3, 500));
1039   /* PetscCall(KSPSetTolerances(user->solver,1e-8,1e-16,1e3,500)); */
1040   PetscCall(KSPGetPC(user->solver, &user->prec));
1041   PetscCall(PCSetType(user->prec, PCSHELL));
1042 
1043   PetscCall(PCShellSetApply(user->prec, StateMatBlockPrecMult));
1044   PetscCall(PCShellSetApplyTranspose(user->prec, StateMatBlockPrecMultTranspose));
1045   PetscCall(PCShellSetContext(user->prec, user));
1046 
1047   /* Compute true state function yt given ut */
1048   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->ytrue));
1049   PetscCall(VecSetSizes(user->ytrue, PETSC_DECIDE, n * user->nt));
1050   PetscCall(VecSetFromOptions(user->ytrue));
1051   user->c_formed = PETSC_TRUE;
1052   PetscCall(VecCopy(user->utrue, user->u)); /*  Set u=utrue temporarily for StateMatInv */
1053   PetscCall(VecSet(user->ytrue, 0.0));      /*  Initial guess */
1054   PetscCall(StateMatInvMult(user->Js, user->q, user->ytrue));
1055   PetscCall(VecCopy(user->ur, user->u)); /*  Reset u=ur */
1056 
1057   /* Initial guess y0 for state given u0 */
1058   PetscCall(StateMatInvMult(user->Js, user->q, user->y));
1059 
1060   /* Data discretization */
1061   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Q));
1062   PetscCall(MatSetSizes(user->Q, PETSC_DECIDE, PETSC_DECIDE, user->mx * user->mx, user->m));
1063   PetscCall(MatSetFromOptions(user->Q));
1064   PetscCall(MatMPIAIJSetPreallocation(user->Q, 0, NULL, 1, NULL));
1065   PetscCall(MatSeqAIJSetPreallocation(user->Q, 1, NULL));
1066 
1067   PetscCall(MatGetOwnershipRange(user->Q, &istart, &iend));
1068 
1069   for (i = istart; i < iend; i++) {
1070     j = i + user->m - user->mx * user->mx;
1071     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &one, INSERT_VALUES));
1072   }
1073 
1074   PetscCall(MatAssemblyBegin(user->Q, MAT_FINAL_ASSEMBLY));
1075   PetscCall(MatAssemblyEnd(user->Q, MAT_FINAL_ASSEMBLY));
1076 
1077   PetscCall(MatTranspose(user->Q, MAT_INITIAL_MATRIX, &user->QT));
1078 
1079   PetscCall(VecDestroy(&XX));
1080   PetscCall(VecDestroy(&YY));
1081   PetscCall(VecDestroy(&XXwork));
1082   PetscCall(VecDestroy(&YYwork));
1083   PetscCall(VecDestroy(&yi));
1084   PetscCall(VecDestroy(&uxi));
1085   PetscCall(VecDestroy(&ui));
1086   PetscCall(VecDestroy(&bc));
1087 
1088   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1089   PetscCall(KSPSetFromOptions(user->solver));
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 PetscErrorCode HyperbolicDestroy(AppCtx *user) {
1094   PetscInt i;
1095 
1096   PetscFunctionBegin;
1097   PetscCall(MatDestroy(&user->Q));
1098   PetscCall(MatDestroy(&user->QT));
1099   PetscCall(MatDestroy(&user->Div));
1100   PetscCall(MatDestroy(&user->Divwork));
1101   PetscCall(MatDestroy(&user->Grad));
1102   PetscCall(MatDestroy(&user->L));
1103   PetscCall(MatDestroy(&user->LT));
1104   PetscCall(KSPDestroy(&user->solver));
1105   PetscCall(MatDestroy(&user->Js));
1106   PetscCall(MatDestroy(&user->Jd));
1107   PetscCall(MatDestroy(&user->JsBlockPrec));
1108   PetscCall(MatDestroy(&user->JsInv));
1109   PetscCall(MatDestroy(&user->JsBlock));
1110   PetscCall(MatDestroy(&user->Divxy[0]));
1111   PetscCall(MatDestroy(&user->Divxy[1]));
1112   PetscCall(MatDestroy(&user->Gradxy[0]));
1113   PetscCall(MatDestroy(&user->Gradxy[1]));
1114   PetscCall(MatDestroy(&user->M));
1115   for (i = 0; i < user->nt; i++) {
1116     PetscCall(MatDestroy(&user->C[i]));
1117     PetscCall(MatDestroy(&user->Cwork[i]));
1118   }
1119   PetscCall(PetscFree(user->C));
1120   PetscCall(PetscFree(user->Cwork));
1121   PetscCall(VecDestroy(&user->u));
1122   PetscCall(VecDestroy(&user->uwork));
1123   PetscCall(VecDestroy(&user->vwork));
1124   PetscCall(VecDestroy(&user->utrue));
1125   PetscCall(VecDestroy(&user->y));
1126   PetscCall(VecDestroy(&user->ywork));
1127   PetscCall(VecDestroy(&user->ytrue));
1128   PetscCall(VecDestroyVecs(user->nt, &user->yi));
1129   PetscCall(VecDestroyVecs(user->nt, &user->yiwork));
1130   PetscCall(VecDestroyVecs(user->nt, &user->ziwork));
1131   PetscCall(VecDestroyVecs(user->nt, &user->uxi));
1132   PetscCall(VecDestroyVecs(user->nt, &user->uyi));
1133   PetscCall(VecDestroyVecs(user->nt, &user->uxiwork));
1134   PetscCall(VecDestroyVecs(user->nt, &user->uyiwork));
1135   PetscCall(VecDestroyVecs(user->nt, &user->ui));
1136   PetscCall(VecDestroyVecs(user->nt, &user->uiwork));
1137   PetscCall(VecDestroy(&user->c));
1138   PetscCall(VecDestroy(&user->cwork));
1139   PetscCall(VecDestroy(&user->ur));
1140   PetscCall(VecDestroy(&user->q));
1141   PetscCall(VecDestroy(&user->d));
1142   PetscCall(VecDestroy(&user->dwork));
1143   PetscCall(VecDestroy(&user->lwork));
1144   PetscCall(VecDestroy(&user->js_diag));
1145   PetscCall(ISDestroy(&user->s_is));
1146   PetscCall(ISDestroy(&user->d_is));
1147   PetscCall(VecScatterDestroy(&user->state_scatter));
1148   PetscCall(VecScatterDestroy(&user->design_scatter));
1149   for (i = 0; i < user->nt; i++) {
1150     PetscCall(VecScatterDestroy(&user->uxi_scatter[i]));
1151     PetscCall(VecScatterDestroy(&user->uyi_scatter[i]));
1152     PetscCall(VecScatterDestroy(&user->ux_scatter[i]));
1153     PetscCall(VecScatterDestroy(&user->uy_scatter[i]));
1154     PetscCall(VecScatterDestroy(&user->ui_scatter[i]));
1155     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1156   }
1157   PetscCall(PetscFree(user->uxi_scatter));
1158   PetscCall(PetscFree(user->uyi_scatter));
1159   PetscCall(PetscFree(user->ux_scatter));
1160   PetscCall(PetscFree(user->uy_scatter));
1161   PetscCall(PetscFree(user->ui_scatter));
1162   PetscCall(PetscFree(user->yi_scatter));
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 PetscErrorCode HyperbolicMonitor(Tao tao, void *ptr) {
1167   Vec       X;
1168   PetscReal unorm, ynorm;
1169   AppCtx   *user = (AppCtx *)ptr;
1170 
1171   PetscFunctionBegin;
1172   PetscCall(TaoGetSolution(tao, &X));
1173   PetscCall(Scatter(X, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
1174   PetscCall(VecAXPY(user->ywork, -1.0, user->ytrue));
1175   PetscCall(VecAXPY(user->uwork, -1.0, user->utrue));
1176   PetscCall(VecNorm(user->uwork, NORM_2, &unorm));
1177   PetscCall(VecNorm(user->ywork, NORM_2, &ynorm));
1178   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n", (double)unorm, (double)ynorm));
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 /*TEST
1183 
1184    build:
1185       requires: !complex
1186 
1187    test:
1188       requires: !single
1189       args: -tao_cmonitor -tao_max_funcs 10 -tao_type lcl -tao_gatol 1.e-5
1190 
1191    test:
1192       suffix: guess_pod
1193       requires: !single
1194       args: -tao_cmonitor -tao_max_funcs 10 -tao_type lcl -ksp_guess_type pod -tao_gatol 1.e-5
1195 
1196 TEST*/
1197