xref: /petsc/src/tao/pde_constrained/tutorials/hyperbolic.c (revision 1b37a2a7cc4a4fb30c3e967db1c694c0a1013f51)
1 #include <petsctao.h>
2 
3 typedef struct {
4   PetscInt    n;     /*  Number of variables */
5   PetscInt    m;     /*  Number of constraints */
6   PetscInt    mx;    /*  grid points in each direction */
7   PetscInt    nt;    /*  Number of time steps */
8   PetscInt    ndata; /*  Number of data points per sample */
9   IS          s_is;
10   IS          d_is;
11   VecScatter  state_scatter;
12   VecScatter  design_scatter;
13   VecScatter *uxi_scatter, *uyi_scatter, *ux_scatter, *uy_scatter, *ui_scatter;
14   VecScatter *yi_scatter;
15 
16   Mat       Js, Jd, JsBlockPrec, JsInv, JsBlock;
17   PetscBool jformed, c_formed;
18 
19   PetscReal alpha; /*  Regularization parameter */
20   PetscReal gamma;
21   PetscReal ht; /*  Time step */
22   PetscReal T;  /*  Final time */
23   Mat       Q, QT;
24   Mat       L, LT;
25   Mat       Div, Divwork, Divxy[2];
26   Mat       Grad, Gradxy[2];
27   Mat       M;
28   Mat      *C, *Cwork;
29   /* Mat Hs,Hd,Hsd; */
30   Vec q;
31   Vec ur; /*  reference */
32 
33   Vec d;
34   Vec dwork;
35 
36   Vec  y; /*  state variables */
37   Vec  ywork;
38   Vec  ytrue;
39   Vec *yi, *yiwork, *ziwork;
40   Vec *uxi, *uyi, *uxiwork, *uyiwork, *ui, *uiwork;
41 
42   Vec u; /*  design variables */
43   Vec uwork, vwork;
44   Vec utrue;
45 
46   Vec js_diag;
47 
48   Vec c; /*  constraint vector */
49   Vec cwork;
50 
51   Vec lwork;
52 
53   KSP      solver;
54   PC       prec;
55   PetscInt block_index;
56 
57   PetscInt ksp_its;
58   PetscInt ksp_its_initial;
59 } AppCtx;
60 
61 PetscErrorCode FormFunction(Tao, Vec, PetscReal *, void *);
62 PetscErrorCode FormGradient(Tao, Vec, Vec, void *);
63 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal *, Vec, void *);
64 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void *);
65 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat, void *);
66 PetscErrorCode FormConstraints(Tao, Vec, Vec, void *);
67 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void *);
68 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
69 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
70 PetscErrorCode HyperbolicInitialize(AppCtx *user);
71 PetscErrorCode HyperbolicDestroy(AppCtx *user);
72 PetscErrorCode HyperbolicMonitor(Tao, void *);
73 
74 PetscErrorCode StateMatMult(Mat, Vec, Vec);
75 PetscErrorCode StateMatBlockMult(Mat, Vec, Vec);
76 PetscErrorCode StateMatBlockMultTranspose(Mat, Vec, Vec);
77 PetscErrorCode StateMatMultTranspose(Mat, Vec, Vec);
78 PetscErrorCode StateMatGetDiagonal(Mat, Vec);
79 PetscErrorCode StateMatDuplicate(Mat, MatDuplicateOption, Mat *);
80 PetscErrorCode StateMatInvMult(Mat, Vec, Vec);
81 PetscErrorCode StateMatInvTransposeMult(Mat, Vec, Vec);
82 PetscErrorCode StateMatBlockPrecMult(PC, Vec, Vec);
83 
84 PetscErrorCode DesignMatMult(Mat, Vec, Vec);
85 PetscErrorCode DesignMatMultTranspose(Mat, Vec, Vec);
86 
87 PetscErrorCode Scatter_yi(Vec, Vec *, VecScatter *, PetscInt); /*  y to y1,y2,...,y_nt */
88 PetscErrorCode Gather_yi(Vec, Vec *, VecScatter *, PetscInt);
89 PetscErrorCode Scatter_uxi_uyi(Vec, Vec *, VecScatter *, Vec *, VecScatter *, PetscInt); /*  u to ux_1,uy_1,ux_2,uy_2,...,u */
90 PetscErrorCode Gather_uxi_uyi(Vec, Vec *, VecScatter *, Vec *, VecScatter *, PetscInt);
91 
92 static char help[] = "";
93 
94 int main(int argc, char **argv)
95 {
96   Vec           x, x0;
97   Tao           tao;
98   AppCtx        user;
99   IS            is_allstate, is_alldesign;
100   PetscInt      lo, hi, hi2, lo2, ksp_old;
101   PetscInt      ntests = 1;
102   PetscInt      i;
103   PetscLogStage stages[1];
104 
105   PetscFunctionBeginUser;
106   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
107   user.mx = 32;
108   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "hyperbolic example", NULL);
109   PetscCall(PetscOptionsInt("-mx", "Number of grid points in each direction", "", user.mx, &user.mx, NULL));
110   user.nt = 16;
111   PetscCall(PetscOptionsInt("-nt", "Number of time steps", "", user.nt, &user.nt, NULL));
112   user.ndata = 64;
113   PetscCall(PetscOptionsInt("-ndata", "Numbers of data points per sample", "", user.ndata, &user.ndata, NULL));
114   user.alpha = 10.0;
115   PetscCall(PetscOptionsReal("-alpha", "Regularization parameter", "", user.alpha, &user.alpha, NULL));
116   user.T = 1.0 / 32.0;
117   PetscCall(PetscOptionsReal("-Tfinal", "Final time", "", user.T, &user.T, NULL));
118   PetscCall(PetscOptionsInt("-ntests", "Number of times to repeat TaoSolve", "", ntests, &ntests, NULL));
119   PetscOptionsEnd();
120 
121   user.m     = user.mx * user.mx * user.nt;     /*  number of constraints */
122   user.n     = user.mx * user.mx * 3 * user.nt; /*  number of variables */
123   user.ht    = user.T / user.nt;                /*  Time step */
124   user.gamma = user.T * user.ht / (user.mx * user.mx);
125 
126   PetscCall(VecCreate(PETSC_COMM_WORLD, &user.u));
127   PetscCall(VecCreate(PETSC_COMM_WORLD, &user.y));
128   PetscCall(VecCreate(PETSC_COMM_WORLD, &user.c));
129   PetscCall(VecSetSizes(user.u, PETSC_DECIDE, user.n - user.m));
130   PetscCall(VecSetSizes(user.y, PETSC_DECIDE, user.m));
131   PetscCall(VecSetSizes(user.c, PETSC_DECIDE, user.m));
132   PetscCall(VecSetFromOptions(user.u));
133   PetscCall(VecSetFromOptions(user.y));
134   PetscCall(VecSetFromOptions(user.c));
135 
136   /* Create scatters for reduced spaces.
137      If the state vector y and design vector u are partitioned as
138      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
139      then the solution vector x is organized as
140      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
141      The index sets user.s_is and user.d_is correspond to the indices of the
142      state and design variables owned by the current processor.
143   */
144   PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
145 
146   PetscCall(VecGetOwnershipRange(user.y, &lo, &hi));
147   PetscCall(VecGetOwnershipRange(user.u, &lo2, &hi2));
148 
149   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_allstate));
150   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + lo2, 1, &user.s_is));
151 
152   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, lo2, 1, &is_alldesign));
153   PetscCall(ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, hi + lo2, 1, &user.d_is));
154 
155   PetscCall(VecSetSizes(x, hi - lo + hi2 - lo2, user.n));
156   PetscCall(VecSetFromOptions(x));
157 
158   PetscCall(VecScatterCreate(x, user.s_is, user.y, is_allstate, &user.state_scatter));
159   PetscCall(VecScatterCreate(x, user.d_is, user.u, is_alldesign, &user.design_scatter));
160   PetscCall(ISDestroy(&is_alldesign));
161   PetscCall(ISDestroy(&is_allstate));
162 
163   /* Create TAO solver and set desired solution method */
164   PetscCall(TaoCreate(PETSC_COMM_WORLD, &tao));
165   PetscCall(TaoSetType(tao, TAOLCL));
166 
167   /* Set up initial vectors and matrices */
168   PetscCall(HyperbolicInitialize(&user));
169 
170   PetscCall(Gather(x, user.y, user.state_scatter, user.u, user.design_scatter));
171   PetscCall(VecDuplicate(x, &x0));
172   PetscCall(VecCopy(x, x0));
173 
174   /* Set solution vector with an initial guess */
175   PetscCall(TaoSetSolution(tao, x));
176   PetscCall(TaoSetObjective(tao, FormFunction, &user));
177   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
178   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
179   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, user.Js, user.JsInv, FormJacobianState, &user));
180   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
181   PetscCall(TaoSetFromOptions(tao));
182   PetscCall(TaoSetStateDesignIS(tao, user.s_is, user.d_is));
183 
184   /* SOLVE THE APPLICATION */
185   PetscCall(PetscLogStageRegister("Trials", &stages[0]));
186   PetscCall(PetscLogStagePush(stages[0]));
187   user.ksp_its_initial = user.ksp_its;
188   ksp_old              = user.ksp_its;
189   for (i = 0; i < ntests; i++) {
190     PetscCall(TaoSolve(tao));
191     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "KSP Iterations = %" PetscInt_FMT "\n", user.ksp_its - ksp_old));
192     PetscCall(VecCopy(x0, x));
193     PetscCall(TaoSetSolution(tao, x));
194   }
195   PetscCall(PetscLogStagePop());
196   PetscCall(PetscBarrier((PetscObject)x));
197   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "KSP iterations within initialization: "));
198   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", user.ksp_its_initial));
199   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Total KSP iterations over %" PetscInt_FMT " trial(s): ", ntests));
200   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", user.ksp_its));
201   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "KSP iterations per trial: "));
202   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", (user.ksp_its - user.ksp_its_initial) / ntests));
203 
204   PetscCall(TaoDestroy(&tao));
205   PetscCall(VecDestroy(&x));
206   PetscCall(VecDestroy(&x0));
207   PetscCall(HyperbolicDestroy(&user));
208   PetscCall(PetscFinalize());
209   return 0;
210 }
211 /* ------------------------------------------------------------------- */
212 /*
213    dwork = Qy - d
214    lwork = L*(u-ur).^2
215    f = 1/2 * (dwork.dork + alpha*y.lwork)
216 */
217 PetscErrorCode FormFunction(Tao tao, Vec X, PetscReal *f, void *ptr)
218 {
219   PetscReal d1 = 0, d2 = 0;
220   AppCtx   *user = (AppCtx *)ptr;
221 
222   PetscFunctionBegin;
223   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
224   PetscCall(MatMult(user->Q, user->y, user->dwork));
225   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
226   PetscCall(VecDot(user->dwork, user->dwork, &d1));
227 
228   PetscCall(VecWAXPY(user->uwork, -1.0, user->ur, user->u));
229   PetscCall(VecPointwiseMult(user->uwork, user->uwork, user->uwork));
230   PetscCall(MatMult(user->L, user->uwork, user->lwork));
231   PetscCall(VecDot(user->y, user->lwork, &d2));
232   *f = 0.5 * (d1 + user->alpha * d2);
233   PetscFunctionReturn(PETSC_SUCCESS);
234 }
235 
236 /* ------------------------------------------------------------------- */
237 /*
238     state: g_s = Q' *(Qy - d) + 0.5*alpha*L*(u-ur).^2
239     design: g_d = alpha*(L'y).*(u-ur)
240 */
241 PetscErrorCode FormGradient(Tao tao, Vec X, Vec G, void *ptr)
242 {
243   AppCtx *user = (AppCtx *)ptr;
244 
245   PetscFunctionBegin;
246   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
247   PetscCall(MatMult(user->Q, user->y, user->dwork));
248   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
249 
250   PetscCall(MatMult(user->QT, user->dwork, user->ywork));
251 
252   PetscCall(MatMult(user->LT, user->y, user->uwork));
253   PetscCall(VecWAXPY(user->vwork, -1.0, user->ur, user->u));
254   PetscCall(VecPointwiseMult(user->uwork, user->vwork, user->uwork));
255   PetscCall(VecScale(user->uwork, user->alpha));
256 
257   PetscCall(VecPointwiseMult(user->vwork, user->vwork, user->vwork));
258   PetscCall(MatMult(user->L, user->vwork, user->lwork));
259   PetscCall(VecAXPY(user->ywork, 0.5 * user->alpha, user->lwork));
260 
261   PetscCall(Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
266 {
267   PetscReal d1, d2;
268   AppCtx   *user = (AppCtx *)ptr;
269 
270   PetscFunctionBegin;
271   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
272   PetscCall(MatMult(user->Q, user->y, user->dwork));
273   PetscCall(VecAXPY(user->dwork, -1.0, user->d));
274 
275   PetscCall(MatMult(user->QT, user->dwork, user->ywork));
276 
277   PetscCall(VecDot(user->dwork, user->dwork, &d1));
278 
279   PetscCall(MatMult(user->LT, user->y, user->uwork));
280   PetscCall(VecWAXPY(user->vwork, -1.0, user->ur, user->u));
281   PetscCall(VecPointwiseMult(user->uwork, user->vwork, user->uwork));
282   PetscCall(VecScale(user->uwork, user->alpha));
283 
284   PetscCall(VecPointwiseMult(user->vwork, user->vwork, user->vwork));
285   PetscCall(MatMult(user->L, user->vwork, user->lwork));
286   PetscCall(VecAXPY(user->ywork, 0.5 * user->alpha, user->lwork));
287 
288   PetscCall(VecDot(user->y, user->lwork, &d2));
289 
290   *f = 0.5 * (d1 + user->alpha * d2);
291   PetscCall(Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
292   PetscFunctionReturn(PETSC_SUCCESS);
293 }
294 
295 /* ------------------------------------------------------------------- */
296 /* A
297 MatShell object
298 */
299 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
300 {
301   PetscInt i;
302   AppCtx  *user = (AppCtx *)ptr;
303 
304   PetscFunctionBegin;
305   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
306   PetscCall(Scatter_yi(user->u, user->ui, user->ui_scatter, user->nt));
307   PetscCall(Scatter_uxi_uyi(user->u, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
308   for (i = 0; i < user->nt; i++) {
309     PetscCall(MatCopy(user->Divxy[0], user->C[i], SUBSET_NONZERO_PATTERN));
310     PetscCall(MatCopy(user->Divxy[1], user->Cwork[i], SAME_NONZERO_PATTERN));
311 
312     PetscCall(MatDiagonalScale(user->C[i], NULL, user->uxi[i]));
313     PetscCall(MatDiagonalScale(user->Cwork[i], NULL, user->uyi[i]));
314     PetscCall(MatAXPY(user->C[i], 1.0, user->Cwork[i], SUBSET_NONZERO_PATTERN));
315     PetscCall(MatScale(user->C[i], user->ht));
316     PetscCall(MatShift(user->C[i], 1.0));
317   }
318   PetscFunctionReturn(PETSC_SUCCESS);
319 }
320 
321 /* ------------------------------------------------------------------- */
322 /* B */
323 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
324 {
325   AppCtx *user = (AppCtx *)ptr;
326 
327   PetscFunctionBegin;
328   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
329   PetscFunctionReturn(PETSC_SUCCESS);
330 }
331 
332 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
333 {
334   PetscInt i;
335   AppCtx  *user;
336 
337   PetscFunctionBegin;
338   PetscCall(MatShellGetContext(J_shell, &user));
339   PetscCall(Scatter_yi(X, user->yi, user->yi_scatter, user->nt));
340   user->block_index = 0;
341   PetscCall(MatMult(user->JsBlock, user->yi[0], user->yiwork[0]));
342 
343   for (i = 1; i < user->nt; i++) {
344     user->block_index = i;
345     PetscCall(MatMult(user->JsBlock, user->yi[i], user->yiwork[i]));
346     PetscCall(MatMult(user->M, user->yi[i - 1], user->ziwork[i - 1]));
347     PetscCall(VecAXPY(user->yiwork[i], -1.0, user->ziwork[i - 1]));
348   }
349   PetscCall(Gather_yi(Y, user->yiwork, user->yi_scatter, user->nt));
350   PetscFunctionReturn(PETSC_SUCCESS);
351 }
352 
353 PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y)
354 {
355   PetscInt i;
356   AppCtx  *user;
357 
358   PetscFunctionBegin;
359   PetscCall(MatShellGetContext(J_shell, &user));
360   PetscCall(Scatter_yi(X, user->yi, user->yi_scatter, user->nt));
361 
362   for (i = 0; i < user->nt - 1; i++) {
363     user->block_index = i;
364     PetscCall(MatMultTranspose(user->JsBlock, user->yi[i], user->yiwork[i]));
365     PetscCall(MatMult(user->M, user->yi[i + 1], user->ziwork[i + 1]));
366     PetscCall(VecAXPY(user->yiwork[i], -1.0, user->ziwork[i + 1]));
367   }
368 
369   i                 = user->nt - 1;
370   user->block_index = i;
371   PetscCall(MatMultTranspose(user->JsBlock, user->yi[i], user->yiwork[i]));
372   PetscCall(Gather_yi(Y, user->yiwork, user->yi_scatter, user->nt));
373   PetscFunctionReturn(PETSC_SUCCESS);
374 }
375 
376 PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y)
377 {
378   PetscInt i;
379   AppCtx  *user;
380 
381   PetscFunctionBegin;
382   PetscCall(MatShellGetContext(J_shell, &user));
383   i = user->block_index;
384   PetscCall(VecPointwiseMult(user->uxiwork[i], X, user->uxi[i]));
385   PetscCall(VecPointwiseMult(user->uyiwork[i], X, user->uyi[i]));
386   PetscCall(Gather(user->uiwork[i], user->uxiwork[i], user->ux_scatter[i], user->uyiwork[i], user->uy_scatter[i]));
387   PetscCall(MatMult(user->Div, user->uiwork[i], Y));
388   PetscCall(VecAYPX(Y, user->ht, X));
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 PetscErrorCode StateMatBlockMultTranspose(Mat J_shell, Vec X, Vec Y)
393 {
394   PetscInt i;
395   AppCtx  *user;
396 
397   PetscFunctionBegin;
398   PetscCall(MatShellGetContext(J_shell, &user));
399   i = user->block_index;
400   PetscCall(MatMult(user->Grad, X, user->uiwork[i]));
401   PetscCall(Scatter(user->uiwork[i], user->uxiwork[i], user->ux_scatter[i], user->uyiwork[i], user->uy_scatter[i]));
402   PetscCall(VecPointwiseMult(user->uxiwork[i], user->uxi[i], user->uxiwork[i]));
403   PetscCall(VecPointwiseMult(user->uyiwork[i], user->uyi[i], user->uyiwork[i]));
404   PetscCall(VecWAXPY(Y, 1.0, user->uxiwork[i], user->uyiwork[i]));
405   PetscCall(VecAYPX(Y, user->ht, X));
406   PetscFunctionReturn(PETSC_SUCCESS);
407 }
408 
409 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
410 {
411   PetscInt i;
412   AppCtx  *user;
413 
414   PetscFunctionBegin;
415   PetscCall(MatShellGetContext(J_shell, &user));
416   PetscCall(Scatter_yi(user->y, user->yi, user->yi_scatter, user->nt));
417   PetscCall(Scatter_uxi_uyi(X, user->uxiwork, user->uxi_scatter, user->uyiwork, user->uyi_scatter, user->nt));
418   for (i = 0; i < user->nt; i++) {
419     PetscCall(VecPointwiseMult(user->uxiwork[i], user->yi[i], user->uxiwork[i]));
420     PetscCall(VecPointwiseMult(user->uyiwork[i], user->yi[i], user->uyiwork[i]));
421     PetscCall(Gather(user->uiwork[i], user->uxiwork[i], user->ux_scatter[i], user->uyiwork[i], user->uy_scatter[i]));
422     PetscCall(MatMult(user->Div, user->uiwork[i], user->ziwork[i]));
423     PetscCall(VecScale(user->ziwork[i], user->ht));
424   }
425   PetscCall(Gather_yi(Y, user->ziwork, user->yi_scatter, user->nt));
426   PetscFunctionReturn(PETSC_SUCCESS);
427 }
428 
429 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
430 {
431   PetscInt i;
432   AppCtx  *user;
433 
434   PetscFunctionBegin;
435   PetscCall(MatShellGetContext(J_shell, &user));
436   PetscCall(Scatter_yi(user->y, user->yi, user->yi_scatter, user->nt));
437   PetscCall(Scatter_yi(X, user->yiwork, user->yi_scatter, user->nt));
438   for (i = 0; i < user->nt; i++) {
439     PetscCall(MatMult(user->Grad, user->yiwork[i], user->uiwork[i]));
440     PetscCall(Scatter(user->uiwork[i], user->uxiwork[i], user->ux_scatter[i], user->uyiwork[i], user->uy_scatter[i]));
441     PetscCall(VecPointwiseMult(user->uxiwork[i], user->yi[i], user->uxiwork[i]));
442     PetscCall(VecPointwiseMult(user->uyiwork[i], user->yi[i], user->uyiwork[i]));
443     PetscCall(Gather(user->uiwork[i], user->uxiwork[i], user->ux_scatter[i], user->uyiwork[i], user->uy_scatter[i]));
444     PetscCall(VecScale(user->uiwork[i], user->ht));
445   }
446   PetscCall(Gather_yi(Y, user->uiwork, user->ui_scatter, user->nt));
447   PetscFunctionReturn(PETSC_SUCCESS);
448 }
449 
450 PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y)
451 {
452   PetscInt i;
453   AppCtx  *user;
454 
455   PetscFunctionBegin;
456   PetscCall(PCShellGetContext(PC_shell, &user));
457   i = user->block_index;
458   if (user->c_formed) {
459     PetscCall(MatSOR(user->C[i], X, 1.0, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP), 0.0, 1, 1, Y));
460   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not formed");
461   PetscFunctionReturn(PETSC_SUCCESS);
462 }
463 
464 PetscErrorCode StateMatBlockPrecMultTranspose(PC PC_shell, Vec X, Vec Y)
465 {
466   PetscInt i;
467   AppCtx  *user;
468 
469   PetscFunctionBegin;
470   PetscCall(PCShellGetContext(PC_shell, &user));
471 
472   i = user->block_index;
473   if (user->c_formed) {
474     PetscCall(MatSOR(user->C[i], X, 1.0, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP), 0.0, 1, 1, Y));
475   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not formed");
476   PetscFunctionReturn(PETSC_SUCCESS);
477 }
478 
479 PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
480 {
481   AppCtx  *user;
482   PetscInt its, i;
483 
484   PetscFunctionBegin;
485   PetscCall(MatShellGetContext(J_shell, &user));
486 
487   if (Y == user->ytrue) {
488     /* First solve is done using true solution to set up problem */
489     PetscCall(KSPSetTolerances(user->solver, 1e-4, 1e-20, PETSC_DEFAULT, PETSC_DEFAULT));
490   } else {
491     PetscCall(KSPSetTolerances(user->solver, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
492   }
493   PetscCall(Scatter_yi(X, user->yi, user->yi_scatter, user->nt));
494   PetscCall(Scatter_yi(Y, user->yiwork, user->yi_scatter, user->nt));
495   PetscCall(Scatter_uxi_uyi(user->u, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
496 
497   user->block_index = 0;
498   PetscCall(KSPSolve(user->solver, user->yi[0], user->yiwork[0]));
499 
500   PetscCall(KSPGetIterationNumber(user->solver, &its));
501   user->ksp_its = user->ksp_its + its;
502   for (i = 1; i < user->nt; i++) {
503     PetscCall(MatMult(user->M, user->yiwork[i - 1], user->ziwork[i - 1]));
504     PetscCall(VecAXPY(user->yi[i], 1.0, user->ziwork[i - 1]));
505     user->block_index = i;
506     PetscCall(KSPSolve(user->solver, user->yi[i], user->yiwork[i]));
507 
508     PetscCall(KSPGetIterationNumber(user->solver, &its));
509     user->ksp_its = user->ksp_its + its;
510   }
511   PetscCall(Gather_yi(Y, user->yiwork, user->yi_scatter, user->nt));
512   PetscFunctionReturn(PETSC_SUCCESS);
513 }
514 
515 PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
516 {
517   AppCtx  *user;
518   PetscInt its, i;
519 
520   PetscFunctionBegin;
521   PetscCall(MatShellGetContext(J_shell, &user));
522 
523   PetscCall(Scatter_yi(X, user->yi, user->yi_scatter, user->nt));
524   PetscCall(Scatter_yi(Y, user->yiwork, user->yi_scatter, user->nt));
525   PetscCall(Scatter_uxi_uyi(user->u, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
526 
527   i                 = user->nt - 1;
528   user->block_index = i;
529   PetscCall(KSPSolveTranspose(user->solver, user->yi[i], user->yiwork[i]));
530 
531   PetscCall(KSPGetIterationNumber(user->solver, &its));
532   user->ksp_its = user->ksp_its + its;
533 
534   for (i = user->nt - 2; i >= 0; i--) {
535     PetscCall(MatMult(user->M, user->yiwork[i + 1], user->ziwork[i + 1]));
536     PetscCall(VecAXPY(user->yi[i], 1.0, user->ziwork[i + 1]));
537     user->block_index = i;
538     PetscCall(KSPSolveTranspose(user->solver, user->yi[i], user->yiwork[i]));
539 
540     PetscCall(KSPGetIterationNumber(user->solver, &its));
541     user->ksp_its = user->ksp_its + its;
542   }
543   PetscCall(Gather_yi(Y, user->yiwork, user->yi_scatter, user->nt));
544   PetscFunctionReturn(PETSC_SUCCESS);
545 }
546 
547 PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
548 {
549   AppCtx *user;
550 
551   PetscFunctionBegin;
552   PetscCall(MatShellGetContext(J_shell, &user));
553 
554   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->m, user, new_shell));
555   PetscCall(MatShellSetOperation(*new_shell, MATOP_MULT, (void (*)(void))StateMatMult));
556   PetscCall(MatShellSetOperation(*new_shell, MATOP_DUPLICATE, (void (*)(void))StateMatDuplicate));
557   PetscCall(MatShellSetOperation(*new_shell, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatMultTranspose));
558   PetscCall(MatShellSetOperation(*new_shell, MATOP_GET_DIAGONAL, (void (*)(void))StateMatGetDiagonal));
559   PetscFunctionReturn(PETSC_SUCCESS);
560 }
561 
562 PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
563 {
564   AppCtx *user;
565 
566   PetscFunctionBegin;
567   PetscCall(MatShellGetContext(J_shell, &user));
568   PetscCall(VecCopy(user->js_diag, X));
569   PetscFunctionReturn(PETSC_SUCCESS);
570 }
571 
572 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
573 {
574   /* con = Ay - q, A = [C(u1)  0     0     ...   0;
575                          -M  C(u2)   0     ...   0;
576                           0   -M   C(u3)   ...   0;
577                                       ...         ;
578                           0    ...      -M C(u_nt)]
579      C(u) = eye + ht*Div*[diag(u1); diag(u2)]       */
580   PetscInt i;
581   AppCtx  *user = (AppCtx *)ptr;
582 
583   PetscFunctionBegin;
584   PetscCall(Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter));
585   PetscCall(Scatter_yi(user->y, user->yi, user->yi_scatter, user->nt));
586   PetscCall(Scatter_uxi_uyi(user->u, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
587 
588   user->block_index = 0;
589   PetscCall(MatMult(user->JsBlock, user->yi[0], user->yiwork[0]));
590 
591   for (i = 1; i < user->nt; i++) {
592     user->block_index = i;
593     PetscCall(MatMult(user->JsBlock, user->yi[i], user->yiwork[i]));
594     PetscCall(MatMult(user->M, user->yi[i - 1], user->ziwork[i - 1]));
595     PetscCall(VecAXPY(user->yiwork[i], -1.0, user->ziwork[i - 1]));
596   }
597 
598   PetscCall(Gather_yi(C, user->yiwork, user->yi_scatter, user->nt));
599   PetscCall(VecAXPY(C, -1.0, user->q));
600 
601   PetscFunctionReturn(PETSC_SUCCESS);
602 }
603 
604 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
605 {
606   PetscFunctionBegin;
607   PetscCall(VecScatterBegin(s_scat, x, state, INSERT_VALUES, SCATTER_FORWARD));
608   PetscCall(VecScatterEnd(s_scat, x, state, INSERT_VALUES, SCATTER_FORWARD));
609   PetscCall(VecScatterBegin(d_scat, x, design, INSERT_VALUES, SCATTER_FORWARD));
610   PetscCall(VecScatterEnd(d_scat, x, design, INSERT_VALUES, SCATTER_FORWARD));
611   PetscFunctionReturn(PETSC_SUCCESS);
612 }
613 
614 PetscErrorCode Scatter_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
615 {
616   PetscInt i;
617 
618   PetscFunctionBegin;
619   for (i = 0; i < nt; i++) {
620     PetscCall(VecScatterBegin(scatx[i], u, uxi[i], INSERT_VALUES, SCATTER_FORWARD));
621     PetscCall(VecScatterEnd(scatx[i], u, uxi[i], INSERT_VALUES, SCATTER_FORWARD));
622     PetscCall(VecScatterBegin(scaty[i], u, uyi[i], INSERT_VALUES, SCATTER_FORWARD));
623     PetscCall(VecScatterEnd(scaty[i], u, uyi[i], INSERT_VALUES, SCATTER_FORWARD));
624   }
625   PetscFunctionReturn(PETSC_SUCCESS);
626 }
627 
628 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
629 {
630   PetscFunctionBegin;
631   PetscCall(VecScatterBegin(s_scat, state, x, INSERT_VALUES, SCATTER_REVERSE));
632   PetscCall(VecScatterEnd(s_scat, state, x, INSERT_VALUES, SCATTER_REVERSE));
633   PetscCall(VecScatterBegin(d_scat, design, x, INSERT_VALUES, SCATTER_REVERSE));
634   PetscCall(VecScatterEnd(d_scat, design, x, INSERT_VALUES, SCATTER_REVERSE));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 PetscErrorCode Gather_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
639 {
640   PetscInt i;
641 
642   PetscFunctionBegin;
643   for (i = 0; i < nt; i++) {
644     PetscCall(VecScatterBegin(scatx[i], uxi[i], u, INSERT_VALUES, SCATTER_REVERSE));
645     PetscCall(VecScatterEnd(scatx[i], uxi[i], u, INSERT_VALUES, SCATTER_REVERSE));
646     PetscCall(VecScatterBegin(scaty[i], uyi[i], u, INSERT_VALUES, SCATTER_REVERSE));
647     PetscCall(VecScatterEnd(scaty[i], uyi[i], u, INSERT_VALUES, SCATTER_REVERSE));
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 PetscErrorCode Scatter_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
653 {
654   PetscInt i;
655 
656   PetscFunctionBegin;
657   for (i = 0; i < nt; i++) {
658     PetscCall(VecScatterBegin(scat[i], y, yi[i], INSERT_VALUES, SCATTER_FORWARD));
659     PetscCall(VecScatterEnd(scat[i], y, yi[i], INSERT_VALUES, SCATTER_FORWARD));
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 PetscErrorCode Gather_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
665 {
666   PetscInt i;
667 
668   PetscFunctionBegin;
669   for (i = 0; i < nt; i++) {
670     PetscCall(VecScatterBegin(scat[i], yi[i], y, INSERT_VALUES, SCATTER_REVERSE));
671     PetscCall(VecScatterEnd(scat[i], yi[i], y, INSERT_VALUES, SCATTER_REVERSE));
672   }
673   PetscFunctionReturn(PETSC_SUCCESS);
674 }
675 
676 PetscErrorCode HyperbolicInitialize(AppCtx *user)
677 {
678   PetscInt    n, i, j, linear_index, istart, iend, iblock, lo, hi;
679   Vec         XX, YY, XXwork, YYwork, yi, uxi, ui, bc;
680   PetscReal   h, sum;
681   PetscScalar hinv, neg_hinv, quarter = 0.25, one = 1.0, half_hinv, neg_half_hinv;
682   PetscScalar vx, vy, zero = 0.0;
683   IS          is_from_y, is_to_yi, is_from_u, is_to_uxi, is_to_uyi;
684 
685   PetscFunctionBegin;
686   user->jformed  = PETSC_FALSE;
687   user->c_formed = PETSC_FALSE;
688 
689   user->ksp_its         = 0;
690   user->ksp_its_initial = 0;
691 
692   n = user->mx * user->mx;
693 
694   h             = 1.0 / user->mx;
695   hinv          = user->mx;
696   neg_hinv      = -hinv;
697   half_hinv     = hinv / 2.0;
698   neg_half_hinv = neg_hinv / 2.0;
699 
700   /* Generate Grad matrix */
701   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Grad));
702   PetscCall(MatSetSizes(user->Grad, PETSC_DECIDE, PETSC_DECIDE, 2 * n, n));
703   PetscCall(MatSetFromOptions(user->Grad));
704   PetscCall(MatMPIAIJSetPreallocation(user->Grad, 3, NULL, 3, NULL));
705   PetscCall(MatSeqAIJSetPreallocation(user->Grad, 3, NULL));
706   PetscCall(MatGetOwnershipRange(user->Grad, &istart, &iend));
707 
708   for (i = istart; i < iend; i++) {
709     if (i < n) {
710       iblock = i / user->mx;
711       j      = iblock * user->mx + ((i + user->mx - 1) % user->mx);
712       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &half_hinv, INSERT_VALUES));
713       j = iblock * user->mx + ((i + 1) % user->mx);
714       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &neg_half_hinv, INSERT_VALUES));
715     }
716     if (i >= n) {
717       j = (i - user->mx) % n;
718       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &half_hinv, INSERT_VALUES));
719       j = (j + 2 * user->mx) % n;
720       PetscCall(MatSetValues(user->Grad, 1, &i, 1, &j, &neg_half_hinv, INSERT_VALUES));
721     }
722   }
723 
724   PetscCall(MatAssemblyBegin(user->Grad, MAT_FINAL_ASSEMBLY));
725   PetscCall(MatAssemblyEnd(user->Grad, MAT_FINAL_ASSEMBLY));
726 
727   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Gradxy[0]));
728   PetscCall(MatSetSizes(user->Gradxy[0], PETSC_DECIDE, PETSC_DECIDE, n, n));
729   PetscCall(MatSetFromOptions(user->Gradxy[0]));
730   PetscCall(MatMPIAIJSetPreallocation(user->Gradxy[0], 3, NULL, 3, NULL));
731   PetscCall(MatSeqAIJSetPreallocation(user->Gradxy[0], 3, NULL));
732   PetscCall(MatGetOwnershipRange(user->Gradxy[0], &istart, &iend));
733 
734   for (i = istart; i < iend; i++) {
735     iblock = i / user->mx;
736     j      = iblock * user->mx + ((i + user->mx - 1) % user->mx);
737     PetscCall(MatSetValues(user->Gradxy[0], 1, &i, 1, &j, &half_hinv, INSERT_VALUES));
738     j = iblock * user->mx + ((i + 1) % user->mx);
739     PetscCall(MatSetValues(user->Gradxy[0], 1, &i, 1, &j, &neg_half_hinv, INSERT_VALUES));
740     PetscCall(MatSetValues(user->Gradxy[0], 1, &i, 1, &i, &zero, INSERT_VALUES));
741   }
742   PetscCall(MatAssemblyBegin(user->Gradxy[0], MAT_FINAL_ASSEMBLY));
743   PetscCall(MatAssemblyEnd(user->Gradxy[0], MAT_FINAL_ASSEMBLY));
744 
745   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Gradxy[1]));
746   PetscCall(MatSetSizes(user->Gradxy[1], PETSC_DECIDE, PETSC_DECIDE, n, n));
747   PetscCall(MatSetFromOptions(user->Gradxy[1]));
748   PetscCall(MatMPIAIJSetPreallocation(user->Gradxy[1], 3, NULL, 3, NULL));
749   PetscCall(MatSeqAIJSetPreallocation(user->Gradxy[1], 3, NULL));
750   PetscCall(MatGetOwnershipRange(user->Gradxy[1], &istart, &iend));
751 
752   for (i = istart; i < iend; i++) {
753     j = (i + n - user->mx) % n;
754     PetscCall(MatSetValues(user->Gradxy[1], 1, &i, 1, &j, &half_hinv, INSERT_VALUES));
755     j = (j + 2 * user->mx) % n;
756     PetscCall(MatSetValues(user->Gradxy[1], 1, &i, 1, &j, &neg_half_hinv, INSERT_VALUES));
757     PetscCall(MatSetValues(user->Gradxy[1], 1, &i, 1, &i, &zero, INSERT_VALUES));
758   }
759   PetscCall(MatAssemblyBegin(user->Gradxy[1], MAT_FINAL_ASSEMBLY));
760   PetscCall(MatAssemblyEnd(user->Gradxy[1], MAT_FINAL_ASSEMBLY));
761 
762   /* Generate Div matrix */
763   PetscCall(MatTranspose(user->Grad, MAT_INITIAL_MATRIX, &user->Div));
764   PetscCall(MatTranspose(user->Gradxy[0], MAT_INITIAL_MATRIX, &user->Divxy[0]));
765   PetscCall(MatTranspose(user->Gradxy[1], MAT_INITIAL_MATRIX, &user->Divxy[1]));
766 
767   /* Off-diagonal averaging matrix */
768   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->M));
769   PetscCall(MatSetSizes(user->M, PETSC_DECIDE, PETSC_DECIDE, n, n));
770   PetscCall(MatSetFromOptions(user->M));
771   PetscCall(MatMPIAIJSetPreallocation(user->M, 4, NULL, 4, NULL));
772   PetscCall(MatSeqAIJSetPreallocation(user->M, 4, NULL));
773   PetscCall(MatGetOwnershipRange(user->M, &istart, &iend));
774 
775   for (i = istart; i < iend; i++) {
776     /* kron(Id,Av) */
777     iblock = i / user->mx;
778     j      = iblock * user->mx + ((i + user->mx - 1) % user->mx);
779     PetscCall(MatSetValues(user->M, 1, &i, 1, &j, &quarter, INSERT_VALUES));
780     j = iblock * user->mx + ((i + 1) % user->mx);
781     PetscCall(MatSetValues(user->M, 1, &i, 1, &j, &quarter, INSERT_VALUES));
782 
783     /* kron(Av,Id) */
784     j = (i + user->mx) % n;
785     PetscCall(MatSetValues(user->M, 1, &i, 1, &j, &quarter, INSERT_VALUES));
786     j = (i + n - user->mx) % n;
787     PetscCall(MatSetValues(user->M, 1, &i, 1, &j, &quarter, INSERT_VALUES));
788   }
789   PetscCall(MatAssemblyBegin(user->M, MAT_FINAL_ASSEMBLY));
790   PetscCall(MatAssemblyEnd(user->M, MAT_FINAL_ASSEMBLY));
791 
792   /* Generate 2D grid */
793   PetscCall(VecCreate(PETSC_COMM_WORLD, &XX));
794   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->q));
795   PetscCall(VecSetSizes(XX, PETSC_DECIDE, n));
796   PetscCall(VecSetSizes(user->q, PETSC_DECIDE, n * user->nt));
797   PetscCall(VecSetFromOptions(XX));
798   PetscCall(VecSetFromOptions(user->q));
799 
800   PetscCall(VecDuplicate(XX, &YY));
801   PetscCall(VecDuplicate(XX, &XXwork));
802   PetscCall(VecDuplicate(XX, &YYwork));
803   PetscCall(VecDuplicate(XX, &user->d));
804   PetscCall(VecDuplicate(XX, &user->dwork));
805 
806   PetscCall(VecGetOwnershipRange(XX, &istart, &iend));
807   for (linear_index = istart; linear_index < iend; linear_index++) {
808     i  = linear_index % user->mx;
809     j  = (linear_index - i) / user->mx;
810     vx = h * (i + 0.5);
811     vy = h * (j + 0.5);
812     PetscCall(VecSetValues(XX, 1, &linear_index, &vx, INSERT_VALUES));
813     PetscCall(VecSetValues(YY, 1, &linear_index, &vy, INSERT_VALUES));
814   }
815 
816   PetscCall(VecAssemblyBegin(XX));
817   PetscCall(VecAssemblyEnd(XX));
818   PetscCall(VecAssemblyBegin(YY));
819   PetscCall(VecAssemblyEnd(YY));
820 
821   /* Compute final density function yT
822      yT = 1.0 + exp(-30*((x-0.25)^2+(y-0.25)^2)) + exp(-30*((x-0.75)^2+(y-0.75)^2))
823      yT = yT / (h^2*sum(yT)) */
824   PetscCall(VecCopy(XX, XXwork));
825   PetscCall(VecCopy(YY, YYwork));
826 
827   PetscCall(VecShift(XXwork, -0.25));
828   PetscCall(VecShift(YYwork, -0.25));
829 
830   PetscCall(VecPointwiseMult(XXwork, XXwork, XXwork));
831   PetscCall(VecPointwiseMult(YYwork, YYwork, YYwork));
832 
833   PetscCall(VecCopy(XXwork, user->dwork));
834   PetscCall(VecAXPY(user->dwork, 1.0, YYwork));
835   PetscCall(VecScale(user->dwork, -30.0));
836   PetscCall(VecExp(user->dwork));
837   PetscCall(VecCopy(user->dwork, user->d));
838 
839   PetscCall(VecCopy(XX, XXwork));
840   PetscCall(VecCopy(YY, YYwork));
841 
842   PetscCall(VecShift(XXwork, -0.75));
843   PetscCall(VecShift(YYwork, -0.75));
844 
845   PetscCall(VecPointwiseMult(XXwork, XXwork, XXwork));
846   PetscCall(VecPointwiseMult(YYwork, YYwork, YYwork));
847 
848   PetscCall(VecCopy(XXwork, user->dwork));
849   PetscCall(VecAXPY(user->dwork, 1.0, YYwork));
850   PetscCall(VecScale(user->dwork, -30.0));
851   PetscCall(VecExp(user->dwork));
852 
853   PetscCall(VecAXPY(user->d, 1.0, user->dwork));
854   PetscCall(VecShift(user->d, 1.0));
855   PetscCall(VecSum(user->d, &sum));
856   PetscCall(VecScale(user->d, 1.0 / (h * h * sum)));
857 
858   /* Initial conditions of forward problem */
859   PetscCall(VecDuplicate(XX, &bc));
860   PetscCall(VecCopy(XX, XXwork));
861   PetscCall(VecCopy(YY, YYwork));
862 
863   PetscCall(VecShift(XXwork, -0.5));
864   PetscCall(VecShift(YYwork, -0.5));
865 
866   PetscCall(VecPointwiseMult(XXwork, XXwork, XXwork));
867   PetscCall(VecPointwiseMult(YYwork, YYwork, YYwork));
868 
869   PetscCall(VecWAXPY(bc, 1.0, XXwork, YYwork));
870   PetscCall(VecScale(bc, -50.0));
871   PetscCall(VecExp(bc));
872   PetscCall(VecShift(bc, 1.0));
873   PetscCall(VecSum(bc, &sum));
874   PetscCall(VecScale(bc, 1.0 / (h * h * sum)));
875 
876   /* Create scatter from y to y_1,y_2,...,y_nt */
877   /*  TODO: Reorder for better parallelism. (This will require reordering Q and L as well.) */
878   PetscCall(PetscMalloc1(user->nt * user->mx * user->mx, &user->yi_scatter));
879   PetscCall(VecCreate(PETSC_COMM_WORLD, &yi));
880   PetscCall(VecSetSizes(yi, PETSC_DECIDE, user->mx * user->mx));
881   PetscCall(VecSetFromOptions(yi));
882   PetscCall(VecDuplicateVecs(yi, user->nt, &user->yi));
883   PetscCall(VecDuplicateVecs(yi, user->nt, &user->yiwork));
884   PetscCall(VecDuplicateVecs(yi, user->nt, &user->ziwork));
885   for (i = 0; i < user->nt; i++) {
886     PetscCall(VecGetOwnershipRange(user->yi[i], &lo, &hi));
887     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_yi));
888     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + i * user->mx * user->mx, 1, &is_from_y));
889     PetscCall(VecScatterCreate(user->y, is_from_y, user->yi[i], is_to_yi, &user->yi_scatter[i]));
890     PetscCall(ISDestroy(&is_to_yi));
891     PetscCall(ISDestroy(&is_from_y));
892   }
893 
894   /* Create scatter from u to ux_1,uy_1,ux_2,uy_2,...,ux_nt,uy_nt */
895   /*  TODO: reorder for better parallelism */
896   PetscCall(PetscMalloc1(user->nt * user->mx * user->mx, &user->uxi_scatter));
897   PetscCall(PetscMalloc1(user->nt * user->mx * user->mx, &user->uyi_scatter));
898   PetscCall(PetscMalloc1(user->nt * user->mx * user->mx, &user->ux_scatter));
899   PetscCall(PetscMalloc1(user->nt * user->mx * user->mx, &user->uy_scatter));
900   PetscCall(PetscMalloc1(2 * user->nt * user->mx * user->mx, &user->ui_scatter));
901   PetscCall(VecCreate(PETSC_COMM_WORLD, &uxi));
902   PetscCall(VecCreate(PETSC_COMM_WORLD, &ui));
903   PetscCall(VecSetSizes(uxi, PETSC_DECIDE, user->mx * user->mx));
904   PetscCall(VecSetSizes(ui, PETSC_DECIDE, 2 * user->mx * user->mx));
905   PetscCall(VecSetFromOptions(uxi));
906   PetscCall(VecSetFromOptions(ui));
907   PetscCall(VecDuplicateVecs(uxi, user->nt, &user->uxi));
908   PetscCall(VecDuplicateVecs(uxi, user->nt, &user->uyi));
909   PetscCall(VecDuplicateVecs(uxi, user->nt, &user->uxiwork));
910   PetscCall(VecDuplicateVecs(uxi, user->nt, &user->uyiwork));
911   PetscCall(VecDuplicateVecs(ui, user->nt, &user->ui));
912   PetscCall(VecDuplicateVecs(ui, user->nt, &user->uiwork));
913   for (i = 0; i < user->nt; i++) {
914     PetscCall(VecGetOwnershipRange(user->uxi[i], &lo, &hi));
915     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_uxi));
916     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + 2 * i * user->mx * user->mx, 1, &is_from_u));
917     PetscCall(VecScatterCreate(user->u, is_from_u, user->uxi[i], is_to_uxi, &user->uxi_scatter[i]));
918 
919     PetscCall(ISDestroy(&is_to_uxi));
920     PetscCall(ISDestroy(&is_from_u));
921 
922     PetscCall(VecGetOwnershipRange(user->uyi[i], &lo, &hi));
923     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_uyi));
924     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + (2 * i + 1) * user->mx * user->mx, 1, &is_from_u));
925     PetscCall(VecScatterCreate(user->u, is_from_u, user->uyi[i], is_to_uyi, &user->uyi_scatter[i]));
926 
927     PetscCall(ISDestroy(&is_to_uyi));
928     PetscCall(ISDestroy(&is_from_u));
929 
930     PetscCall(VecGetOwnershipRange(user->uxi[i], &lo, &hi));
931     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_uxi));
932     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_from_u));
933     PetscCall(VecScatterCreate(user->ui[i], is_from_u, user->uxi[i], is_to_uxi, &user->ux_scatter[i]));
934 
935     PetscCall(ISDestroy(&is_to_uxi));
936     PetscCall(ISDestroy(&is_from_u));
937 
938     PetscCall(VecGetOwnershipRange(user->uyi[i], &lo, &hi));
939     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_uyi));
940     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + user->mx * user->mx, 1, &is_from_u));
941     PetscCall(VecScatterCreate(user->ui[i], is_from_u, user->uyi[i], is_to_uyi, &user->uy_scatter[i]));
942 
943     PetscCall(ISDestroy(&is_to_uyi));
944     PetscCall(ISDestroy(&is_from_u));
945 
946     PetscCall(VecGetOwnershipRange(user->ui[i], &lo, &hi));
947     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_uxi));
948     PetscCall(ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + 2 * i * user->mx * user->mx, 1, &is_from_u));
949     PetscCall(VecScatterCreate(user->u, is_from_u, user->ui[i], is_to_uxi, &user->ui_scatter[i]));
950 
951     PetscCall(ISDestroy(&is_to_uxi));
952     PetscCall(ISDestroy(&is_from_u));
953   }
954 
955   /* RHS of forward problem */
956   PetscCall(MatMult(user->M, bc, user->yiwork[0]));
957   for (i = 1; i < user->nt; i++) PetscCall(VecSet(user->yiwork[i], 0.0));
958   PetscCall(Gather_yi(user->q, user->yiwork, user->yi_scatter, user->nt));
959 
960   /* Compute true velocity field utrue */
961   PetscCall(VecDuplicate(user->u, &user->utrue));
962   for (i = 0; i < user->nt; i++) {
963     PetscCall(VecCopy(YY, user->uxi[i]));
964     PetscCall(VecScale(user->uxi[i], 150.0 * i * user->ht));
965     PetscCall(VecCopy(XX, user->uyi[i]));
966     PetscCall(VecShift(user->uyi[i], -10.0));
967     PetscCall(VecScale(user->uyi[i], 15.0 * i * user->ht));
968   }
969   PetscCall(Gather_uxi_uyi(user->utrue, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
970 
971   /* Initial guess and reference model */
972   PetscCall(VecDuplicate(user->utrue, &user->ur));
973   for (i = 0; i < user->nt; i++) {
974     PetscCall(VecCopy(XX, user->uxi[i]));
975     PetscCall(VecShift(user->uxi[i], i * user->ht));
976     PetscCall(VecCopy(YY, user->uyi[i]));
977     PetscCall(VecShift(user->uyi[i], -i * user->ht));
978   }
979   PetscCall(Gather_uxi_uyi(user->ur, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
980 
981   /* Generate regularization matrix L */
982   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->LT));
983   PetscCall(MatSetSizes(user->LT, PETSC_DECIDE, PETSC_DECIDE, 2 * n * user->nt, n * user->nt));
984   PetscCall(MatSetFromOptions(user->LT));
985   PetscCall(MatMPIAIJSetPreallocation(user->LT, 1, NULL, 1, NULL));
986   PetscCall(MatSeqAIJSetPreallocation(user->LT, 1, NULL));
987   PetscCall(MatGetOwnershipRange(user->LT, &istart, &iend));
988 
989   for (i = istart; i < iend; i++) {
990     iblock = (i + n) / (2 * n);
991     j      = i - iblock * n;
992     PetscCall(MatSetValues(user->LT, 1, &i, 1, &j, &user->gamma, INSERT_VALUES));
993   }
994 
995   PetscCall(MatAssemblyBegin(user->LT, MAT_FINAL_ASSEMBLY));
996   PetscCall(MatAssemblyEnd(user->LT, MAT_FINAL_ASSEMBLY));
997 
998   PetscCall(MatTranspose(user->LT, MAT_INITIAL_MATRIX, &user->L));
999 
1000   /* Build work vectors and matrices */
1001   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->lwork));
1002   PetscCall(VecSetType(user->lwork, VECMPI));
1003   PetscCall(VecSetSizes(user->lwork, PETSC_DECIDE, user->m));
1004   PetscCall(VecSetFromOptions(user->lwork));
1005 
1006   PetscCall(MatDuplicate(user->Div, MAT_SHARE_NONZERO_PATTERN, &user->Divwork));
1007 
1008   PetscCall(VecDuplicate(user->y, &user->ywork));
1009   PetscCall(VecDuplicate(user->u, &user->uwork));
1010   PetscCall(VecDuplicate(user->u, &user->vwork));
1011   PetscCall(VecDuplicate(user->u, &user->js_diag));
1012   PetscCall(VecDuplicate(user->c, &user->cwork));
1013 
1014   /* Create matrix-free shell user->Js for computing A*x */
1015   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->m, user, &user->Js));
1016   PetscCall(MatShellSetOperation(user->Js, MATOP_MULT, (void (*)(void))StateMatMult));
1017   PetscCall(MatShellSetOperation(user->Js, MATOP_DUPLICATE, (void (*)(void))StateMatDuplicate));
1018   PetscCall(MatShellSetOperation(user->Js, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatMultTranspose));
1019   PetscCall(MatShellSetOperation(user->Js, MATOP_GET_DIAGONAL, (void (*)(void))StateMatGetDiagonal));
1020 
1021   /* Diagonal blocks of user->Js */
1022   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, n, n, user, &user->JsBlock));
1023   PetscCall(MatShellSetOperation(user->JsBlock, MATOP_MULT, (void (*)(void))StateMatBlockMult));
1024   PetscCall(MatShellSetOperation(user->JsBlock, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatBlockMultTranspose));
1025 
1026   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
1027      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
1028      This is an SOR preconditioner for user->JsBlock. */
1029   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, n, n, user, &user->JsBlockPrec));
1030   PetscCall(MatShellSetOperation(user->JsBlockPrec, MATOP_MULT, (void (*)(void))StateMatBlockPrecMult));
1031   PetscCall(MatShellSetOperation(user->JsBlockPrec, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatBlockPrecMultTranspose));
1032 
1033   /* Create a matrix-free shell user->Jd for computing B*x */
1034   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->n - user->m, user, &user->Jd));
1035   PetscCall(MatShellSetOperation(user->Jd, MATOP_MULT, (void (*)(void))DesignMatMult));
1036   PetscCall(MatShellSetOperation(user->Jd, MATOP_MULT_TRANSPOSE, (void (*)(void))DesignMatMultTranspose));
1037 
1038   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
1039   PetscCall(MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->m, user, &user->JsInv));
1040   PetscCall(MatShellSetOperation(user->JsInv, MATOP_MULT, (void (*)(void))StateMatInvMult));
1041   PetscCall(MatShellSetOperation(user->JsInv, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatInvTransposeMult));
1042 
1043   /* Build matrices for SOR preconditioner */
1044   PetscCall(Scatter_uxi_uyi(user->u, user->uxi, user->uxi_scatter, user->uyi, user->uyi_scatter, user->nt));
1045   PetscCall(PetscMalloc1(5 * n, &user->C));
1046   PetscCall(PetscMalloc1(2 * n, &user->Cwork));
1047   for (i = 0; i < user->nt; i++) {
1048     PetscCall(MatDuplicate(user->Divxy[0], MAT_COPY_VALUES, &user->C[i]));
1049     PetscCall(MatDuplicate(user->Divxy[1], MAT_COPY_VALUES, &user->Cwork[i]));
1050 
1051     PetscCall(MatDiagonalScale(user->C[i], NULL, user->uxi[i]));
1052     PetscCall(MatDiagonalScale(user->Cwork[i], NULL, user->uyi[i]));
1053     PetscCall(MatAXPY(user->C[i], 1.0, user->Cwork[i], DIFFERENT_NONZERO_PATTERN));
1054     PetscCall(MatScale(user->C[i], user->ht));
1055     PetscCall(MatShift(user->C[i], 1.0));
1056   }
1057 
1058   /* Solver options and tolerances */
1059   PetscCall(KSPCreate(PETSC_COMM_WORLD, &user->solver));
1060   PetscCall(KSPSetType(user->solver, KSPGMRES));
1061   PetscCall(KSPSetOperators(user->solver, user->JsBlock, user->JsBlockPrec));
1062   PetscCall(KSPSetTolerances(user->solver, 1e-4, 1e-20, 1e3, 500));
1063   /* PetscCall(KSPSetTolerances(user->solver,1e-8,1e-16,1e3,500)); */
1064   PetscCall(KSPGetPC(user->solver, &user->prec));
1065   PetscCall(PCSetType(user->prec, PCSHELL));
1066 
1067   PetscCall(PCShellSetApply(user->prec, StateMatBlockPrecMult));
1068   PetscCall(PCShellSetApplyTranspose(user->prec, StateMatBlockPrecMultTranspose));
1069   PetscCall(PCShellSetContext(user->prec, user));
1070 
1071   /* Compute true state function yt given ut */
1072   PetscCall(VecCreate(PETSC_COMM_WORLD, &user->ytrue));
1073   PetscCall(VecSetSizes(user->ytrue, PETSC_DECIDE, n * user->nt));
1074   PetscCall(VecSetFromOptions(user->ytrue));
1075   user->c_formed = PETSC_TRUE;
1076   PetscCall(VecCopy(user->utrue, user->u)); /*  Set u=utrue temporarily for StateMatInv */
1077   PetscCall(VecSet(user->ytrue, 0.0));      /*  Initial guess */
1078   PetscCall(StateMatInvMult(user->Js, user->q, user->ytrue));
1079   PetscCall(VecCopy(user->ur, user->u)); /*  Reset u=ur */
1080 
1081   /* Initial guess y0 for state given u0 */
1082   PetscCall(StateMatInvMult(user->Js, user->q, user->y));
1083 
1084   /* Data discretization */
1085   PetscCall(MatCreate(PETSC_COMM_WORLD, &user->Q));
1086   PetscCall(MatSetSizes(user->Q, PETSC_DECIDE, PETSC_DECIDE, user->mx * user->mx, user->m));
1087   PetscCall(MatSetFromOptions(user->Q));
1088   PetscCall(MatMPIAIJSetPreallocation(user->Q, 0, NULL, 1, NULL));
1089   PetscCall(MatSeqAIJSetPreallocation(user->Q, 1, NULL));
1090 
1091   PetscCall(MatGetOwnershipRange(user->Q, &istart, &iend));
1092 
1093   for (i = istart; i < iend; i++) {
1094     j = i + user->m - user->mx * user->mx;
1095     PetscCall(MatSetValues(user->Q, 1, &i, 1, &j, &one, INSERT_VALUES));
1096   }
1097 
1098   PetscCall(MatAssemblyBegin(user->Q, MAT_FINAL_ASSEMBLY));
1099   PetscCall(MatAssemblyEnd(user->Q, MAT_FINAL_ASSEMBLY));
1100 
1101   PetscCall(MatTranspose(user->Q, MAT_INITIAL_MATRIX, &user->QT));
1102 
1103   PetscCall(VecDestroy(&XX));
1104   PetscCall(VecDestroy(&YY));
1105   PetscCall(VecDestroy(&XXwork));
1106   PetscCall(VecDestroy(&YYwork));
1107   PetscCall(VecDestroy(&yi));
1108   PetscCall(VecDestroy(&uxi));
1109   PetscCall(VecDestroy(&ui));
1110   PetscCall(VecDestroy(&bc));
1111 
1112   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1113   PetscCall(KSPSetFromOptions(user->solver));
1114   PetscFunctionReturn(PETSC_SUCCESS);
1115 }
1116 
1117 PetscErrorCode HyperbolicDestroy(AppCtx *user)
1118 {
1119   PetscInt i;
1120 
1121   PetscFunctionBegin;
1122   PetscCall(MatDestroy(&user->Q));
1123   PetscCall(MatDestroy(&user->QT));
1124   PetscCall(MatDestroy(&user->Div));
1125   PetscCall(MatDestroy(&user->Divwork));
1126   PetscCall(MatDestroy(&user->Grad));
1127   PetscCall(MatDestroy(&user->L));
1128   PetscCall(MatDestroy(&user->LT));
1129   PetscCall(KSPDestroy(&user->solver));
1130   PetscCall(MatDestroy(&user->Js));
1131   PetscCall(MatDestroy(&user->Jd));
1132   PetscCall(MatDestroy(&user->JsBlockPrec));
1133   PetscCall(MatDestroy(&user->JsInv));
1134   PetscCall(MatDestroy(&user->JsBlock));
1135   PetscCall(MatDestroy(&user->Divxy[0]));
1136   PetscCall(MatDestroy(&user->Divxy[1]));
1137   PetscCall(MatDestroy(&user->Gradxy[0]));
1138   PetscCall(MatDestroy(&user->Gradxy[1]));
1139   PetscCall(MatDestroy(&user->M));
1140   for (i = 0; i < user->nt; i++) {
1141     PetscCall(MatDestroy(&user->C[i]));
1142     PetscCall(MatDestroy(&user->Cwork[i]));
1143   }
1144   PetscCall(PetscFree(user->C));
1145   PetscCall(PetscFree(user->Cwork));
1146   PetscCall(VecDestroy(&user->u));
1147   PetscCall(VecDestroy(&user->uwork));
1148   PetscCall(VecDestroy(&user->vwork));
1149   PetscCall(VecDestroy(&user->utrue));
1150   PetscCall(VecDestroy(&user->y));
1151   PetscCall(VecDestroy(&user->ywork));
1152   PetscCall(VecDestroy(&user->ytrue));
1153   PetscCall(VecDestroyVecs(user->nt, &user->yi));
1154   PetscCall(VecDestroyVecs(user->nt, &user->yiwork));
1155   PetscCall(VecDestroyVecs(user->nt, &user->ziwork));
1156   PetscCall(VecDestroyVecs(user->nt, &user->uxi));
1157   PetscCall(VecDestroyVecs(user->nt, &user->uyi));
1158   PetscCall(VecDestroyVecs(user->nt, &user->uxiwork));
1159   PetscCall(VecDestroyVecs(user->nt, &user->uyiwork));
1160   PetscCall(VecDestroyVecs(user->nt, &user->ui));
1161   PetscCall(VecDestroyVecs(user->nt, &user->uiwork));
1162   PetscCall(VecDestroy(&user->c));
1163   PetscCall(VecDestroy(&user->cwork));
1164   PetscCall(VecDestroy(&user->ur));
1165   PetscCall(VecDestroy(&user->q));
1166   PetscCall(VecDestroy(&user->d));
1167   PetscCall(VecDestroy(&user->dwork));
1168   PetscCall(VecDestroy(&user->lwork));
1169   PetscCall(VecDestroy(&user->js_diag));
1170   PetscCall(ISDestroy(&user->s_is));
1171   PetscCall(ISDestroy(&user->d_is));
1172   PetscCall(VecScatterDestroy(&user->state_scatter));
1173   PetscCall(VecScatterDestroy(&user->design_scatter));
1174   for (i = 0; i < user->nt; i++) {
1175     PetscCall(VecScatterDestroy(&user->uxi_scatter[i]));
1176     PetscCall(VecScatterDestroy(&user->uyi_scatter[i]));
1177     PetscCall(VecScatterDestroy(&user->ux_scatter[i]));
1178     PetscCall(VecScatterDestroy(&user->uy_scatter[i]));
1179     PetscCall(VecScatterDestroy(&user->ui_scatter[i]));
1180     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1181   }
1182   PetscCall(PetscFree(user->uxi_scatter));
1183   PetscCall(PetscFree(user->uyi_scatter));
1184   PetscCall(PetscFree(user->ux_scatter));
1185   PetscCall(PetscFree(user->uy_scatter));
1186   PetscCall(PetscFree(user->ui_scatter));
1187   PetscCall(PetscFree(user->yi_scatter));
1188   PetscFunctionReturn(PETSC_SUCCESS);
1189 }
1190 
1191 PetscErrorCode HyperbolicMonitor(Tao tao, void *ptr)
1192 {
1193   Vec       X;
1194   PetscReal unorm, ynorm;
1195   AppCtx   *user = (AppCtx *)ptr;
1196 
1197   PetscFunctionBegin;
1198   PetscCall(TaoGetSolution(tao, &X));
1199   PetscCall(Scatter(X, user->ywork, user->state_scatter, user->uwork, user->design_scatter));
1200   PetscCall(VecAXPY(user->ywork, -1.0, user->ytrue));
1201   PetscCall(VecAXPY(user->uwork, -1.0, user->utrue));
1202   PetscCall(VecNorm(user->uwork, NORM_2, &unorm));
1203   PetscCall(VecNorm(user->ywork, NORM_2, &ynorm));
1204   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n", (double)unorm, (double)ynorm));
1205   PetscFunctionReturn(PETSC_SUCCESS);
1206 }
1207 
1208 /*TEST
1209 
1210    build:
1211       requires: !complex
1212 
1213    test:
1214       requires: !single
1215       args: -tao_monitor_constraint_norm -tao_max_funcs 10 -tao_type lcl -tao_gatol 1.e-5
1216 
1217    test:
1218       suffix: guess_pod
1219       requires: !single
1220       args: -tao_monitor_constraint_norm -tao_max_funcs 10 -tao_type lcl -ksp_guess_type pod -tao_gatol 1.e-5
1221 
1222 TEST*/
1223