xref: /petsc/src/tao/pde_constrained/tutorials/hyperbolic.c (revision 0e03b746557e2551025fde0294144c0532d12f68)
1 #include <petsctao.h>
2 
3 /*T
4    Concepts: TAO^Solving a system of nonlinear equations, nonlinear least squares
5    Routines: TaoCreate();
6    Routines: TaoSetType();
7    Routines: TaoSetInitialVector();
8    Routines: TaoSetObjectiveRoutine();
9    Routines: TaoSetGradientRoutine();
10    Routines: TaoSetConstraintsRoutine();
11    Routines: TaoSetJacobianStateRoutine();
12    Routines: TaoSetJacobianDesignRoutine();
13    Routines: TaoSetStateDesignIS();
14    Routines: TaoSetFromOptions();
15    Routines: TaoSolve();
16    Routines: TaoDestroy();
17    Processors: 1
18 T*/
19 
20 
21 
22 typedef struct {
23   PetscInt n; /*  Number of variables */
24   PetscInt m; /*  Number of constraints */
25   PetscInt mx; /*  grid points in each direction */
26   PetscInt nt; /*  Number of time steps */
27   PetscInt ndata; /*  Number of data points per sample */
28   IS       s_is;
29   IS       d_is;
30   VecScatter state_scatter;
31   VecScatter design_scatter;
32   VecScatter *uxi_scatter,*uyi_scatter,*ux_scatter,*uy_scatter,*ui_scatter;
33   VecScatter *yi_scatter;
34 
35   Mat       Js,Jd,JsBlockPrec,JsInv,JsBlock;
36   PetscBool jformed,c_formed;
37 
38   PetscReal alpha; /*  Regularization parameter */
39   PetscReal gamma;
40   PetscReal ht; /*  Time step */
41   PetscReal T; /*  Final time */
42   Mat Q,QT;
43   Mat L,LT;
44   Mat Div,Divwork,Divxy[2];
45   Mat Grad,Gradxy[2];
46   Mat M;
47   Mat *C,*Cwork;
48   /* Mat Hs,Hd,Hsd; */
49   Vec q;
50   Vec ur; /*  reference */
51 
52   Vec d;
53   Vec dwork;
54 
55   Vec y; /*  state variables */
56   Vec ywork;
57   Vec ytrue;
58   Vec *yi,*yiwork,*ziwork;
59   Vec *uxi,*uyi,*uxiwork,*uyiwork,*ui,*uiwork;
60 
61   Vec u; /*  design variables */
62   Vec uwork,vwork;
63   Vec utrue;
64 
65   Vec js_diag;
66 
67   Vec c; /*  constraint vector */
68   Vec cwork;
69 
70   Vec lwork;
71 
72   KSP      solver;
73   PC       prec;
74   PetscInt block_index;
75 
76   PetscInt ksp_its;
77   PetscInt ksp_its_initial;
78 } AppCtx;
79 
80 
81 PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
82 PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
83 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
84 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
85 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*);
86 PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
87 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
88 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
89 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
90 PetscErrorCode HyperbolicInitialize(AppCtx *user);
91 PetscErrorCode HyperbolicDestroy(AppCtx *user);
92 PetscErrorCode HyperbolicMonitor(Tao, void*);
93 
94 PetscErrorCode StateMatMult(Mat,Vec,Vec);
95 PetscErrorCode StateMatBlockMult(Mat,Vec,Vec);
96 PetscErrorCode StateMatBlockMultTranspose(Mat,Vec,Vec);
97 PetscErrorCode StateMatMultTranspose(Mat,Vec,Vec);
98 PetscErrorCode StateMatGetDiagonal(Mat,Vec);
99 PetscErrorCode StateMatDuplicate(Mat,MatDuplicateOption,Mat*);
100 PetscErrorCode StateMatInvMult(Mat,Vec,Vec);
101 PetscErrorCode StateMatInvTransposeMult(Mat,Vec,Vec);
102 PetscErrorCode StateMatBlockPrecMult(PC,Vec,Vec);
103 
104 PetscErrorCode DesignMatMult(Mat,Vec,Vec);
105 PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);
106 
107 PetscErrorCode Scatter_yi(Vec,Vec*,VecScatter*,PetscInt); /*  y to y1,y2,...,y_nt */
108 PetscErrorCode Gather_yi(Vec,Vec*,VecScatter*,PetscInt);
109 PetscErrorCode Scatter_uxi_uyi(Vec,Vec*,VecScatter*,Vec*,VecScatter*,PetscInt); /*  u to ux_1,uy_1,ux_2,uy_2,...,u */
110 PetscErrorCode Gather_uxi_uyi(Vec,Vec*,VecScatter*,Vec*,VecScatter*,PetscInt);
111 
112 static  char help[]="";
113 
114 int main(int argc, char **argv)
115 {
116   PetscErrorCode     ierr;
117   Vec                x,x0;
118   Tao                tao;
119   AppCtx             user;
120   IS                 is_allstate,is_alldesign;
121   PetscInt           lo,hi,hi2,lo2,ksp_old;
122   PetscInt           ntests = 1;
123   PetscInt           i;
124 #if defined(PETSC_USE_LOG)
125   PetscLogStage      stages[1];
126 #endif
127 
128   ierr = PetscInitialize(&argc, &argv, (char*)0,help);if (ierr) return ierr;
129   user.mx = 32;
130   ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,NULL,NULL);CHKERRQ(ierr);
131   ierr = PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL);CHKERRQ(ierr);
132   user.nt = 16;
133   ierr = PetscOptionsInt("-nt","Number of time steps","",user.nt,&user.nt,NULL);CHKERRQ(ierr);
134   user.ndata = 64;
135   ierr = PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL);CHKERRQ(ierr);
136   user.alpha = 10.0;
137   ierr = PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL);CHKERRQ(ierr);
138   user.T = 1.0/32.0;
139   ierr = PetscOptionsReal("-Tfinal","Final time","",user.T,&user.T,NULL);CHKERRQ(ierr);
140 
141   user.m = user.mx*user.mx*user.nt; /*  number of constraints */
142   user.n = user.mx*user.mx*3*user.nt; /*  number of variables */
143   user.ht = user.T/user.nt; /*  Time step */
144   user.gamma = user.T*user.ht / (user.mx*user.mx);
145 
146   ierr = VecCreate(PETSC_COMM_WORLD,&user.u);CHKERRQ(ierr);
147   ierr = VecCreate(PETSC_COMM_WORLD,&user.y);CHKERRQ(ierr);
148   ierr = VecCreate(PETSC_COMM_WORLD,&user.c);CHKERRQ(ierr);
149   ierr = VecSetSizes(user.u,PETSC_DECIDE,user.n-user.m);CHKERRQ(ierr);
150   ierr = VecSetSizes(user.y,PETSC_DECIDE,user.m);CHKERRQ(ierr);
151   ierr = VecSetSizes(user.c,PETSC_DECIDE,user.m);CHKERRQ(ierr);
152   ierr = VecSetFromOptions(user.u);CHKERRQ(ierr);
153   ierr = VecSetFromOptions(user.y);CHKERRQ(ierr);
154   ierr = VecSetFromOptions(user.c);CHKERRQ(ierr);
155 
156   /* Create scatters for reduced spaces.
157      If the state vector y and design vector u are partitioned as
158      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
159      then the solution vector x is organized as
160      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
161      The index sets user.s_is and user.d_is correspond to the indices of the
162      state and design variables owned by the current processor.
163   */
164   ierr = VecCreate(PETSC_COMM_WORLD,&x);CHKERRQ(ierr);
165 
166   ierr = VecGetOwnershipRange(user.y,&lo,&hi);CHKERRQ(ierr);
167   ierr = VecGetOwnershipRange(user.u,&lo2,&hi2);CHKERRQ(ierr);
168 
169   ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate);CHKERRQ(ierr);
170   ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user.s_is);CHKERRQ(ierr);
171 
172   ierr = ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign);CHKERRQ(ierr);
173   ierr = ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user.d_is);CHKERRQ(ierr);
174 
175   ierr = VecSetSizes(x,hi-lo+hi2-lo2,user.n);CHKERRQ(ierr);
176   ierr = VecSetFromOptions(x);CHKERRQ(ierr);
177 
178   ierr = VecScatterCreate(x,user.s_is,user.y,is_allstate,&user.state_scatter);CHKERRQ(ierr);
179   ierr = VecScatterCreate(x,user.d_is,user.u,is_alldesign,&user.design_scatter);CHKERRQ(ierr);
180   ierr = ISDestroy(&is_alldesign);CHKERRQ(ierr);
181   ierr = ISDestroy(&is_allstate);CHKERRQ(ierr);
182 
183   /* Create TAO solver and set desired solution method */
184   ierr = TaoCreate(PETSC_COMM_WORLD,&tao);CHKERRQ(ierr);
185   ierr = TaoSetType(tao,TAOLCL);CHKERRQ(ierr);
186 
187   /* Set up initial vectors and matrices */
188   ierr = HyperbolicInitialize(&user);CHKERRQ(ierr);
189 
190   ierr = Gather(x,user.y,user.state_scatter,user.u,user.design_scatter);CHKERRQ(ierr);
191   ierr = VecDuplicate(x,&x0);CHKERRQ(ierr);
192   ierr = VecCopy(x,x0);CHKERRQ(ierr);
193 
194   /* Set solution vector with an initial guess */
195   ierr = TaoSetInitialVector(tao,x);CHKERRQ(ierr);
196   ierr = TaoSetObjectiveRoutine(tao, FormFunction, (void *)&user);CHKERRQ(ierr);
197   ierr = TaoSetGradientRoutine(tao, FormGradient, (void *)&user);CHKERRQ(ierr);
198   ierr = TaoSetConstraintsRoutine(tao, user.c, FormConstraints, (void *)&user);CHKERRQ(ierr);
199   ierr = TaoSetJacobianStateRoutine(tao, user.Js, user.Js, user.JsInv, FormJacobianState, (void *)&user);CHKERRQ(ierr);
200   ierr = TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, (void *)&user);CHKERRQ(ierr);
201   ierr = TaoSetFromOptions(tao);CHKERRQ(ierr);
202   ierr = TaoSetStateDesignIS(tao,user.s_is,user.d_is);CHKERRQ(ierr);
203 
204   /* SOLVE THE APPLICATION */
205   ierr = PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL);CHKERRQ(ierr);
206   ierr = PetscOptionsEnd();CHKERRQ(ierr);
207   ierr = PetscLogStageRegister("Trials",&stages[0]);CHKERRQ(ierr);
208   ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr);
209   user.ksp_its_initial = user.ksp_its;
210   ksp_old = user.ksp_its;
211   for (i=0; i<ntests; i++){
212     ierr = TaoSolve(tao);CHKERRQ(ierr);
213     ierr = PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its-ksp_old);CHKERRQ(ierr);
214     ierr = VecCopy(x0,x);CHKERRQ(ierr);
215     ierr = TaoSetInitialVector(tao,x);CHKERRQ(ierr);
216   }
217   ierr = PetscLogStagePop();CHKERRQ(ierr);
218   ierr = PetscBarrier((PetscObject)x);CHKERRQ(ierr);
219   ierr = PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");CHKERRQ(ierr);
220   ierr = PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);CHKERRQ(ierr);
221   ierr = PetscPrintf(PETSC_COMM_WORLD,"Total KSP iterations over %D trial(s): ",ntests);CHKERRQ(ierr);
222   ierr = PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its);CHKERRQ(ierr);
223   ierr = PetscPrintf(PETSC_COMM_WORLD,"KSP iterations per trial: ");CHKERRQ(ierr);
224   ierr = PetscPrintf(PETSC_COMM_WORLD,"%D\n",(user.ksp_its-user.ksp_its_initial)/ntests);CHKERRQ(ierr);
225 
226   ierr = TaoDestroy(&tao);CHKERRQ(ierr);
227   ierr = VecDestroy(&x);CHKERRQ(ierr);
228   ierr = VecDestroy(&x0);CHKERRQ(ierr);
229   ierr = HyperbolicDestroy(&user);CHKERRQ(ierr);
230   ierr = PetscFinalize();
231   return ierr;
232 }
233 /* ------------------------------------------------------------------- */
234 /*
235    dwork = Qy - d
236    lwork = L*(u-ur).^2
237    f = 1/2 * (dwork.dork + alpha*y.lwork)
238 */
239 PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
240 {
241   PetscErrorCode ierr;
242   PetscReal      d1=0,d2=0;
243   AppCtx         *user = (AppCtx*)ptr;
244 
245   PetscFunctionBegin;
246   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
247   ierr = MatMult(user->Q,user->y,user->dwork);CHKERRQ(ierr);
248   ierr = VecAXPY(user->dwork,-1.0,user->d);CHKERRQ(ierr);
249   ierr = VecDot(user->dwork,user->dwork,&d1);CHKERRQ(ierr);
250 
251   ierr = VecWAXPY(user->uwork,-1.0,user->ur,user->u);CHKERRQ(ierr);
252   ierr = VecPointwiseMult(user->uwork,user->uwork,user->uwork);CHKERRQ(ierr);
253   ierr = MatMult(user->L,user->uwork,user->lwork);CHKERRQ(ierr);
254   ierr = VecDot(user->y,user->lwork,&d2);CHKERRQ(ierr);
255   *f = 0.5 * (d1 + user->alpha*d2);
256   PetscFunctionReturn(0);
257 }
258 
259 /* ------------------------------------------------------------------- */
260 /*
261     state: g_s = Q' *(Qy - d) + 0.5*alpha*L*(u-ur).^2
262     design: g_d = alpha*(L'y).*(u-ur)
263 */
264 PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
265 {
266   PetscErrorCode ierr;
267   AppCtx         *user = (AppCtx*)ptr;
268 
269   PetscFunctionBegin;
270   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
271   ierr = MatMult(user->Q,user->y,user->dwork);CHKERRQ(ierr);
272   ierr = VecAXPY(user->dwork,-1.0,user->d);CHKERRQ(ierr);
273 
274   ierr = MatMult(user->QT,user->dwork,user->ywork);CHKERRQ(ierr);
275 
276   ierr = MatMult(user->LT,user->y,user->uwork);CHKERRQ(ierr);
277   ierr = VecWAXPY(user->vwork,-1.0,user->ur,user->u);CHKERRQ(ierr);
278   ierr = VecPointwiseMult(user->uwork,user->vwork,user->uwork);CHKERRQ(ierr);
279   ierr = VecScale(user->uwork,user->alpha);CHKERRQ(ierr);
280 
281   ierr = VecPointwiseMult(user->vwork,user->vwork,user->vwork);CHKERRQ(ierr);
282   ierr = MatMult(user->L,user->vwork,user->lwork);CHKERRQ(ierr);
283   ierr = VecAXPY(user->ywork,0.5*user->alpha,user->lwork);CHKERRQ(ierr);
284 
285   ierr = Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
290 {
291   PetscErrorCode ierr;
292   PetscReal      d1,d2;
293   AppCtx         *user = (AppCtx*)ptr;
294 
295   PetscFunctionBegin;
296   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
297   ierr = MatMult(user->Q,user->y,user->dwork);CHKERRQ(ierr);
298   ierr = VecAXPY(user->dwork,-1.0,user->d);CHKERRQ(ierr);
299 
300   ierr = MatMult(user->QT,user->dwork,user->ywork);CHKERRQ(ierr);
301 
302   ierr = VecDot(user->dwork,user->dwork,&d1);CHKERRQ(ierr);
303 
304   ierr = MatMult(user->LT,user->y,user->uwork);CHKERRQ(ierr);
305   ierr = VecWAXPY(user->vwork,-1.0,user->ur,user->u);CHKERRQ(ierr);
306   ierr = VecPointwiseMult(user->uwork,user->vwork,user->uwork);CHKERRQ(ierr);
307   ierr = VecScale(user->uwork,user->alpha);CHKERRQ(ierr);
308 
309   ierr = VecPointwiseMult(user->vwork,user->vwork,user->vwork);CHKERRQ(ierr);
310   ierr = MatMult(user->L,user->vwork,user->lwork);CHKERRQ(ierr);
311   ierr = VecAXPY(user->ywork,0.5*user->alpha,user->lwork);CHKERRQ(ierr);
312 
313   ierr = VecDot(user->y,user->lwork,&d2);CHKERRQ(ierr);
314 
315   *f = 0.5 * (d1 + user->alpha*d2);
316   ierr = Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 /* ------------------------------------------------------------------- */
321 /* A
322 MatShell object
323 */
324 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
325 {
326   PetscErrorCode ierr;
327   PetscInt       i;
328   AppCtx         *user = (AppCtx*)ptr;
329 
330   PetscFunctionBegin;
331   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
332   ierr = Scatter_yi(user->u,user->ui,user->ui_scatter,user->nt);CHKERRQ(ierr);
333   ierr = Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);CHKERRQ(ierr);
334   for (i=0; i<user->nt; i++){
335     ierr = MatCopy(user->Divxy[0],user->C[i],SUBSET_NONZERO_PATTERN);CHKERRQ(ierr);
336     ierr = MatCopy(user->Divxy[1],user->Cwork[i],SAME_NONZERO_PATTERN);CHKERRQ(ierr);
337 
338     ierr = MatDiagonalScale(user->C[i],NULL,user->uxi[i]);CHKERRQ(ierr);
339     ierr = MatDiagonalScale(user->Cwork[i],NULL,user->uyi[i]);CHKERRQ(ierr);
340     ierr = MatAXPY(user->C[i],1.0,user->Cwork[i],SUBSET_NONZERO_PATTERN);CHKERRQ(ierr);
341     ierr = MatScale(user->C[i],user->ht);CHKERRQ(ierr);
342     ierr = MatShift(user->C[i],1.0);CHKERRQ(ierr);
343   }
344   PetscFunctionReturn(0);
345 }
346 
347 /* ------------------------------------------------------------------- */
348 /* B */
349 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
350 {
351   PetscErrorCode ierr;
352   AppCtx         *user = (AppCtx*)ptr;
353 
354   PetscFunctionBegin;
355   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
356   PetscFunctionReturn(0);
357 }
358 
359 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
360 {
361   PetscErrorCode ierr;
362   PetscInt       i;
363   AppCtx         *user;
364 
365   PetscFunctionBegin;
366   ierr = MatShellGetContext(J_shell,(void**)&user);CHKERRQ(ierr);
367   ierr = Scatter_yi(X,user->yi,user->yi_scatter,user->nt);CHKERRQ(ierr);
368   user->block_index = 0;
369   ierr = MatMult(user->JsBlock,user->yi[0],user->yiwork[0]);CHKERRQ(ierr);
370 
371   for (i=1; i<user->nt; i++){
372     user->block_index = i;
373     ierr = MatMult(user->JsBlock,user->yi[i],user->yiwork[i]);CHKERRQ(ierr);
374     ierr = MatMult(user->M,user->yi[i-1],user->ziwork[i-1]);CHKERRQ(ierr);
375     ierr = VecAXPY(user->yiwork[i],-1.0,user->ziwork[i-1]);CHKERRQ(ierr);
376   }
377   ierr = Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);CHKERRQ(ierr);
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y)
382 {
383   PetscErrorCode ierr;
384   PetscInt       i;
385   AppCtx         *user;
386 
387   PetscFunctionBegin;
388   ierr = MatShellGetContext(J_shell,(void**)&user);CHKERRQ(ierr);
389   ierr = Scatter_yi(X,user->yi,user->yi_scatter,user->nt);CHKERRQ(ierr);
390 
391   for (i=0; i<user->nt-1; i++){
392     user->block_index = i;
393     ierr = MatMultTranspose(user->JsBlock,user->yi[i],user->yiwork[i]);CHKERRQ(ierr);
394     ierr = MatMult(user->M,user->yi[i+1],user->ziwork[i+1]);CHKERRQ(ierr);
395     ierr = VecAXPY(user->yiwork[i],-1.0,user->ziwork[i+1]);CHKERRQ(ierr);
396   }
397 
398   i = user->nt-1;
399   user->block_index = i;
400   ierr = MatMultTranspose(user->JsBlock,user->yi[i],user->yiwork[i]);CHKERRQ(ierr);
401   ierr = Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y)
406 {
407   PetscErrorCode ierr;
408   PetscInt       i;
409   AppCtx         *user;
410 
411   PetscFunctionBegin;
412   ierr = MatShellGetContext(J_shell,(void**)&user);CHKERRQ(ierr);
413   i = user->block_index;
414   ierr = VecPointwiseMult(user->uxiwork[i],X,user->uxi[i]);CHKERRQ(ierr);
415   ierr = VecPointwiseMult(user->uyiwork[i],X,user->uyi[i]);CHKERRQ(ierr);
416   ierr = Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);CHKERRQ(ierr);
417   ierr = MatMult(user->Div,user->uiwork[i],Y);CHKERRQ(ierr);
418   ierr = VecAYPX(Y,user->ht,X);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 PetscErrorCode StateMatBlockMultTranspose(Mat J_shell, Vec X, Vec Y)
423 {
424   PetscErrorCode ierr;
425   PetscInt       i;
426   AppCtx         *user;
427 
428   PetscFunctionBegin;
429   ierr = MatShellGetContext(J_shell,(void**)&user);CHKERRQ(ierr);
430   i = user->block_index;
431   ierr = MatMult(user->Grad,X,user->uiwork[i]);CHKERRQ(ierr);
432   ierr = Scatter(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);CHKERRQ(ierr);
433   ierr = VecPointwiseMult(user->uxiwork[i],user->uxi[i],user->uxiwork[i]);CHKERRQ(ierr);
434   ierr = VecPointwiseMult(user->uyiwork[i],user->uyi[i],user->uyiwork[i]);CHKERRQ(ierr);
435   ierr = VecWAXPY(Y,1.0,user->uxiwork[i],user->uyiwork[i]);CHKERRQ(ierr);
436   ierr = VecAYPX(Y,user->ht,X);CHKERRQ(ierr);
437   PetscFunctionReturn(0);
438 }
439 
440 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
441 {
442   PetscErrorCode ierr;
443   PetscInt       i;
444   AppCtx         *user;
445 
446   PetscFunctionBegin;
447   ierr = MatShellGetContext(J_shell,(void**)&user);CHKERRQ(ierr);
448   ierr = Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt);CHKERRQ(ierr);
449   ierr = Scatter_uxi_uyi(X,user->uxiwork,user->uxi_scatter,user->uyiwork,user->uyi_scatter,user->nt);CHKERRQ(ierr);
450   for (i=0; i<user->nt; i++){
451     ierr = VecPointwiseMult(user->uxiwork[i],user->yi[i],user->uxiwork[i]);CHKERRQ(ierr);
452     ierr = VecPointwiseMult(user->uyiwork[i],user->yi[i],user->uyiwork[i]);CHKERRQ(ierr);
453     ierr = Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);CHKERRQ(ierr);
454     ierr = MatMult(user->Div,user->uiwork[i],user->ziwork[i]);CHKERRQ(ierr);
455     ierr = VecScale(user->ziwork[i],user->ht);CHKERRQ(ierr);
456   }
457   ierr = Gather_yi(Y,user->ziwork,user->yi_scatter,user->nt);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
462 {
463   PetscErrorCode ierr;
464   PetscInt       i;
465   AppCtx         *user;
466 
467   PetscFunctionBegin;
468   ierr = MatShellGetContext(J_shell,(void**)&user);CHKERRQ(ierr);
469   ierr = Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt);CHKERRQ(ierr);
470   ierr = Scatter_yi(X,user->yiwork,user->yi_scatter,user->nt);CHKERRQ(ierr);
471   for (i=0; i<user->nt; i++){
472     ierr = MatMult(user->Grad,user->yiwork[i],user->uiwork[i]);CHKERRQ(ierr);
473     ierr = Scatter(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);CHKERRQ(ierr);
474     ierr = VecPointwiseMult(user->uxiwork[i],user->yi[i],user->uxiwork[i]);CHKERRQ(ierr);
475     ierr = VecPointwiseMult(user->uyiwork[i],user->yi[i],user->uyiwork[i]);CHKERRQ(ierr);
476     ierr = Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);CHKERRQ(ierr);
477     ierr = VecScale(user->uiwork[i],user->ht);CHKERRQ(ierr);
478   }
479   ierr = Gather_yi(Y,user->uiwork,user->ui_scatter,user->nt);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y)
484 {
485   PetscErrorCode ierr;
486   PetscInt       i;
487   AppCtx         *user;
488 
489   PetscFunctionBegin;
490   ierr = PCShellGetContext(PC_shell,(void**)&user);CHKERRQ(ierr);
491   i = user->block_index;
492   if (user->c_formed) {
493     ierr = MatSOR(user->C[i],X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y);CHKERRQ(ierr);
494   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not formed");
495   PetscFunctionReturn(0);
496 }
497 
498 PetscErrorCode StateMatBlockPrecMultTranspose(PC PC_shell, Vec X, Vec Y)
499 {
500   PetscErrorCode ierr;
501   PetscInt       i;
502   AppCtx         *user;
503 
504   PetscFunctionBegin;
505   ierr = PCShellGetContext(PC_shell,(void**)&user);CHKERRQ(ierr);
506 
507   i = user->block_index;
508   if (user->c_formed) {
509     ierr = MatSOR(user->C[i],X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y);CHKERRQ(ierr);
510   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not formed");
511   PetscFunctionReturn(0);
512 }
513 
514 PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
515 {
516   PetscErrorCode ierr;
517   AppCtx         *user;
518   PetscInt       its,i;
519 
520   PetscFunctionBegin;
521   ierr = MatShellGetContext(J_shell,(void**)&user);CHKERRQ(ierr);
522 
523   if (Y == user->ytrue) {
524     /* First solve is done using true solution to set up problem */
525     ierr = KSPSetTolerances(user->solver,1e-4,1e-20,PETSC_DEFAULT,PETSC_DEFAULT);CHKERRQ(ierr);
526   } else {
527     ierr = KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);CHKERRQ(ierr);
528   }
529   ierr = Scatter_yi(X,user->yi,user->yi_scatter,user->nt);CHKERRQ(ierr);
530   ierr = Scatter_yi(Y,user->yiwork,user->yi_scatter,user->nt);CHKERRQ(ierr);
531   ierr = Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);CHKERRQ(ierr);
532 
533   user->block_index = 0;
534   ierr = KSPSolve(user->solver,user->yi[0],user->yiwork[0]);CHKERRQ(ierr);
535 
536   ierr = KSPGetIterationNumber(user->solver,&its);CHKERRQ(ierr);
537   user->ksp_its = user->ksp_its + its;
538   for (i=1; i<user->nt; i++){
539     ierr = MatMult(user->M,user->yiwork[i-1],user->ziwork[i-1]);CHKERRQ(ierr);
540     ierr = VecAXPY(user->yi[i],1.0,user->ziwork[i-1]);CHKERRQ(ierr);
541     user->block_index = i;
542     ierr = KSPSolve(user->solver,user->yi[i],user->yiwork[i]);CHKERRQ(ierr);
543 
544     ierr = KSPGetIterationNumber(user->solver,&its);CHKERRQ(ierr);
545     user->ksp_its = user->ksp_its + its;
546   }
547   ierr = Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
552 {
553   PetscErrorCode ierr;
554   AppCtx         *user;
555   PetscInt       its,i;
556 
557   PetscFunctionBegin;
558   ierr = MatShellGetContext(J_shell,(void**)&user);CHKERRQ(ierr);
559 
560   ierr = Scatter_yi(X,user->yi,user->yi_scatter,user->nt);CHKERRQ(ierr);
561   ierr = Scatter_yi(Y,user->yiwork,user->yi_scatter,user->nt);CHKERRQ(ierr);
562   ierr = Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);CHKERRQ(ierr);
563 
564   i = user->nt - 1;
565   user->block_index = i;
566   ierr = KSPSolveTranspose(user->solver,user->yi[i],user->yiwork[i]);CHKERRQ(ierr);
567 
568   ierr = KSPGetIterationNumber(user->solver,&its);CHKERRQ(ierr);
569   user->ksp_its = user->ksp_its + its;
570 
571   for (i=user->nt-2; i>=0; i--){
572     ierr = MatMult(user->M,user->yiwork[i+1],user->ziwork[i+1]);CHKERRQ(ierr);
573     ierr = VecAXPY(user->yi[i],1.0,user->ziwork[i+1]);CHKERRQ(ierr);
574     user->block_index = i;
575     ierr = KSPSolveTranspose(user->solver,user->yi[i],user->yiwork[i]);CHKERRQ(ierr);
576 
577     ierr = KSPGetIterationNumber(user->solver,&its);CHKERRQ(ierr);
578     user->ksp_its = user->ksp_its + its;
579   }
580   ierr = Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);CHKERRQ(ierr);
581   PetscFunctionReturn(0);
582 }
583 
584 PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
585 {
586   PetscErrorCode ierr;
587   AppCtx         *user;
588 
589   PetscFunctionBegin;
590   ierr = MatShellGetContext(J_shell,(void**)&user);CHKERRQ(ierr);
591 
592   ierr = MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,new_shell);CHKERRQ(ierr);
593   ierr = MatShellSetOperation(*new_shell,MATOP_MULT,(void(*)(void))StateMatMult);CHKERRQ(ierr);
594   ierr = MatShellSetOperation(*new_shell,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate);CHKERRQ(ierr);
595   ierr = MatShellSetOperation(*new_shell,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose);CHKERRQ(ierr);
596   ierr = MatShellSetOperation(*new_shell,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal);CHKERRQ(ierr);
597   PetscFunctionReturn(0);
598 }
599 
600 PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
601 {
602   PetscErrorCode ierr;
603   AppCtx         *user;
604 
605   PetscFunctionBegin;
606   ierr = MatShellGetContext(J_shell,(void**)&user);CHKERRQ(ierr);
607   ierr =  VecCopy(user->js_diag,X);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
612 {
613   /* con = Ay - q, A = [C(u1)  0     0     ...   0;
614                          -M  C(u2)   0     ...   0;
615                           0   -M   C(u3)   ...   0;
616                                       ...         ;
617                           0    ...      -M C(u_nt)]
618      C(u) = eye + ht*Div*[diag(u1); diag(u2)]       */
619   PetscErrorCode ierr;
620   PetscInt       i;
621   AppCtx         *user = (AppCtx*)ptr;
622 
623   PetscFunctionBegin;
624   ierr = Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);CHKERRQ(ierr);
625   ierr = Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt);CHKERRQ(ierr);
626   ierr = Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);CHKERRQ(ierr);
627 
628   user->block_index = 0;
629   ierr = MatMult(user->JsBlock,user->yi[0],user->yiwork[0]);CHKERRQ(ierr);
630 
631   for (i=1; i<user->nt; i++){
632     user->block_index = i;
633     ierr = MatMult(user->JsBlock,user->yi[i],user->yiwork[i]);CHKERRQ(ierr);
634     ierr = MatMult(user->M,user->yi[i-1],user->ziwork[i-1]);CHKERRQ(ierr);
635     ierr = VecAXPY(user->yiwork[i],-1.0,user->ziwork[i-1]);CHKERRQ(ierr);
636   }
637 
638   ierr = Gather_yi(C,user->yiwork,user->yi_scatter,user->nt);CHKERRQ(ierr);
639   ierr = VecAXPY(C,-1.0,user->q);CHKERRQ(ierr);
640 
641   PetscFunctionReturn(0);
642 }
643 
644 
645 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
646 {
647   PetscErrorCode ierr;
648 
649   PetscFunctionBegin;
650   ierr = VecScatterBegin(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
651   ierr = VecScatterEnd(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
652   ierr = VecScatterBegin(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
653   ierr = VecScatterEnd(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 PetscErrorCode Scatter_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
658 {
659   PetscErrorCode ierr;
660   PetscInt       i;
661 
662   PetscFunctionBegin;
663   for (i=0; i<nt; i++){
664     ierr = VecScatterBegin(scatx[i],u,uxi[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
665     ierr = VecScatterEnd(scatx[i],u,uxi[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
666     ierr = VecScatterBegin(scaty[i],u,uyi[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
667     ierr = VecScatterEnd(scaty[i],u,uyi[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
668   }
669   PetscFunctionReturn(0);
670 }
671 
672 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
673 {
674   PetscErrorCode ierr;
675 
676   PetscFunctionBegin;
677   ierr = VecScatterBegin(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
678   ierr = VecScatterEnd(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
679   ierr = VecScatterBegin(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
680   ierr = VecScatterEnd(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 PetscErrorCode Gather_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
685 {
686   PetscErrorCode ierr;
687   PetscInt       i;
688 
689   PetscFunctionBegin;
690   for (i=0; i<nt; i++){
691     ierr = VecScatterBegin(scatx[i],uxi[i],u,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
692     ierr = VecScatterEnd(scatx[i],uxi[i],u,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
693     ierr = VecScatterBegin(scaty[i],uyi[i],u,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
694     ierr = VecScatterEnd(scaty[i],uyi[i],u,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
695   }
696   PetscFunctionReturn(0);
697 }
698 
699 PetscErrorCode Scatter_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
700 {
701   PetscErrorCode ierr;
702   PetscInt       i;
703 
704   PetscFunctionBegin;
705   for (i=0; i<nt; i++){
706     ierr = VecScatterBegin(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
707     ierr = VecScatterEnd(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
708   }
709   PetscFunctionReturn(0);
710 }
711 
712 PetscErrorCode Gather_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
713 {
714   PetscErrorCode ierr;
715   PetscInt       i;
716 
717   PetscFunctionBegin;
718   for (i=0; i<nt; i++){
719     ierr = VecScatterBegin(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
720     ierr = VecScatterEnd(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 PetscErrorCode HyperbolicInitialize(AppCtx *user)
726 {
727   PetscErrorCode ierr;
728   PetscInt       n,i,j,linear_index,istart,iend,iblock,lo,hi;
729   Vec            XX,YY,XXwork,YYwork,yi,uxi,ui,bc;
730   PetscReal      h,sum;
731   PetscScalar    hinv,neg_hinv,quarter=0.25,one=1.0,half_hinv,neg_half_hinv;
732   PetscScalar    vx,vy,zero=0.0;
733   IS             is_from_y,is_to_yi,is_from_u,is_to_uxi,is_to_uyi;
734 
735   PetscFunctionBegin;
736   user->jformed = PETSC_FALSE;
737   user->c_formed = PETSC_FALSE;
738 
739   user->ksp_its = 0;
740   user->ksp_its_initial = 0;
741 
742   n = user->mx * user->mx;
743 
744   h = 1.0/user->mx;
745   hinv = user->mx;
746   neg_hinv = -hinv;
747   half_hinv = hinv / 2.0;
748   neg_half_hinv = neg_hinv / 2.0;
749 
750   /* Generate Grad matrix */
751   ierr = MatCreate(PETSC_COMM_WORLD,&user->Grad);CHKERRQ(ierr);
752   ierr = MatSetSizes(user->Grad,PETSC_DECIDE,PETSC_DECIDE,2*n,n);CHKERRQ(ierr);
753   ierr = MatSetFromOptions(user->Grad);CHKERRQ(ierr);
754   ierr = MatMPIAIJSetPreallocation(user->Grad,3,NULL,3,NULL);CHKERRQ(ierr);
755   ierr = MatSeqAIJSetPreallocation(user->Grad,3,NULL);CHKERRQ(ierr);
756   ierr = MatGetOwnershipRange(user->Grad,&istart,&iend);CHKERRQ(ierr);
757 
758   for (i=istart; i<iend; i++){
759     if (i<n){
760       iblock = i / user->mx;
761       j = iblock*user->mx + ((i+user->mx-1) % user->mx);
762       ierr = MatSetValues(user->Grad,1,&i,1,&j,&half_hinv,INSERT_VALUES);CHKERRQ(ierr);
763       j = iblock*user->mx + ((i+1) % user->mx);
764       ierr = MatSetValues(user->Grad,1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);CHKERRQ(ierr);
765     }
766     if (i>=n){
767       j = (i - user->mx) % n;
768       ierr = MatSetValues(user->Grad,1,&i,1,&j,&half_hinv,INSERT_VALUES);CHKERRQ(ierr);
769       j = (j + 2*user->mx) % n;
770       ierr = MatSetValues(user->Grad,1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);CHKERRQ(ierr);
771     }
772   }
773 
774   ierr = MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
775   ierr = MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
776 
777   ierr = MatCreate(PETSC_COMM_WORLD,&user->Gradxy[0]);CHKERRQ(ierr);
778   ierr = MatSetSizes(user->Gradxy[0],PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr);
779   ierr = MatSetFromOptions(user->Gradxy[0]);CHKERRQ(ierr);
780   ierr = MatMPIAIJSetPreallocation(user->Gradxy[0],3,NULL,3,NULL);CHKERRQ(ierr);
781   ierr = MatSeqAIJSetPreallocation(user->Gradxy[0],3,NULL);CHKERRQ(ierr);
782   ierr = MatGetOwnershipRange(user->Gradxy[0],&istart,&iend);CHKERRQ(ierr);
783 
784   for (i=istart; i<iend; i++){
785     iblock = i / user->mx;
786     j = iblock*user->mx + ((i+user->mx-1) % user->mx);
787     ierr = MatSetValues(user->Gradxy[0],1,&i,1,&j,&half_hinv,INSERT_VALUES);CHKERRQ(ierr);
788     j = iblock*user->mx + ((i+1) % user->mx);
789     ierr = MatSetValues(user->Gradxy[0],1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);CHKERRQ(ierr);
790     ierr = MatSetValues(user->Gradxy[0],1,&i,1,&i,&zero,INSERT_VALUES);CHKERRQ(ierr);
791   }
792   ierr = MatAssemblyBegin(user->Gradxy[0],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
793   ierr = MatAssemblyEnd(user->Gradxy[0],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
794 
795   ierr = MatCreate(PETSC_COMM_WORLD,&user->Gradxy[1]);CHKERRQ(ierr);
796   ierr = MatSetSizes(user->Gradxy[1],PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr);
797   ierr = MatSetFromOptions(user->Gradxy[1]);CHKERRQ(ierr);
798   ierr = MatMPIAIJSetPreallocation(user->Gradxy[1],3,NULL,3,NULL);CHKERRQ(ierr);
799   ierr = MatSeqAIJSetPreallocation(user->Gradxy[1],3,NULL);CHKERRQ(ierr);
800   ierr = MatGetOwnershipRange(user->Gradxy[1],&istart,&iend);CHKERRQ(ierr);
801 
802   for (i=istart; i<iend; i++){
803     j = (i + n - user->mx) % n;
804     ierr = MatSetValues(user->Gradxy[1],1,&i,1,&j,&half_hinv,INSERT_VALUES);CHKERRQ(ierr);
805     j = (j + 2*user->mx) % n;
806     ierr = MatSetValues(user->Gradxy[1],1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);CHKERRQ(ierr);
807     ierr = MatSetValues(user->Gradxy[1],1,&i,1,&i,&zero,INSERT_VALUES);CHKERRQ(ierr);
808   }
809   ierr = MatAssemblyBegin(user->Gradxy[1],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
810   ierr = MatAssemblyEnd(user->Gradxy[1],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
811 
812   /* Generate Div matrix */
813   ierr = MatTranspose(user->Grad,MAT_INITIAL_MATRIX,&user->Div);CHKERRQ(ierr);
814   ierr = MatTranspose(user->Gradxy[0],MAT_INITIAL_MATRIX,&user->Divxy[0]);CHKERRQ(ierr);
815   ierr = MatTranspose(user->Gradxy[1],MAT_INITIAL_MATRIX,&user->Divxy[1]);CHKERRQ(ierr);
816 
817   /* Off-diagonal averaging matrix */
818   ierr = MatCreate(PETSC_COMM_WORLD,&user->M);CHKERRQ(ierr);
819   ierr = MatSetSizes(user->M,PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr);
820   ierr = MatSetFromOptions(user->M);CHKERRQ(ierr);
821   ierr = MatMPIAIJSetPreallocation(user->M,4,NULL,4,NULL);CHKERRQ(ierr);
822   ierr = MatSeqAIJSetPreallocation(user->M,4,NULL);CHKERRQ(ierr);
823   ierr = MatGetOwnershipRange(user->M,&istart,&iend);CHKERRQ(ierr);
824 
825   for (i=istart; i<iend; i++){
826     /* kron(Id,Av) */
827     iblock = i / user->mx;
828     j = iblock*user->mx + ((i+user->mx-1) % user->mx);
829     ierr = MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);CHKERRQ(ierr);
830     j = iblock*user->mx + ((i+1) % user->mx);
831     ierr = MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);CHKERRQ(ierr);
832 
833     /* kron(Av,Id) */
834     j = (i + user->mx) % n;
835     ierr = MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);CHKERRQ(ierr);
836     j = (i + n - user->mx) % n;
837     ierr = MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);CHKERRQ(ierr);
838   }
839   ierr = MatAssemblyBegin(user->M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
840   ierr = MatAssemblyEnd(user->M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
841 
842   /* Generate 2D grid */
843   ierr = VecCreate(PETSC_COMM_WORLD,&XX);CHKERRQ(ierr);
844   ierr = VecCreate(PETSC_COMM_WORLD,&user->q);CHKERRQ(ierr);
845   ierr = VecSetSizes(XX,PETSC_DECIDE,n);CHKERRQ(ierr);
846   ierr = VecSetSizes(user->q,PETSC_DECIDE,n*user->nt);CHKERRQ(ierr);
847   ierr = VecSetFromOptions(XX);CHKERRQ(ierr);
848   ierr = VecSetFromOptions(user->q);CHKERRQ(ierr);
849 
850   ierr = VecDuplicate(XX,&YY);CHKERRQ(ierr);
851   ierr = VecDuplicate(XX,&XXwork);CHKERRQ(ierr);
852   ierr = VecDuplicate(XX,&YYwork);CHKERRQ(ierr);
853   ierr = VecDuplicate(XX,&user->d);CHKERRQ(ierr);
854   ierr = VecDuplicate(XX,&user->dwork);CHKERRQ(ierr);
855 
856   ierr = VecGetOwnershipRange(XX,&istart,&iend);CHKERRQ(ierr);
857   for (linear_index=istart; linear_index<iend; linear_index++){
858     i = linear_index % user->mx;
859     j = (linear_index-i)/user->mx;
860     vx = h*(i+0.5);
861     vy = h*(j+0.5);
862     ierr = VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES);CHKERRQ(ierr);
863     ierr = VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES);CHKERRQ(ierr);
864   }
865 
866   ierr = VecAssemblyBegin(XX);CHKERRQ(ierr);
867   ierr = VecAssemblyEnd(XX);CHKERRQ(ierr);
868   ierr = VecAssemblyBegin(YY);CHKERRQ(ierr);
869   ierr = VecAssemblyEnd(YY);CHKERRQ(ierr);
870 
871   /* Compute final density function yT
872      yT = 1.0 + exp(-30*((x-0.25)^2+(y-0.25)^2)) + exp(-30*((x-0.75)^2+(y-0.75)^2))
873      yT = yT / (h^2*sum(yT)) */
874   ierr = VecCopy(XX,XXwork);CHKERRQ(ierr);
875   ierr = VecCopy(YY,YYwork);CHKERRQ(ierr);
876 
877   ierr = VecShift(XXwork,-0.25);CHKERRQ(ierr);
878   ierr = VecShift(YYwork,-0.25);CHKERRQ(ierr);
879 
880   ierr = VecPointwiseMult(XXwork,XXwork,XXwork);CHKERRQ(ierr);
881   ierr = VecPointwiseMult(YYwork,YYwork,YYwork);CHKERRQ(ierr);
882 
883   ierr = VecCopy(XXwork,user->dwork);CHKERRQ(ierr);
884   ierr = VecAXPY(user->dwork,1.0,YYwork);CHKERRQ(ierr);
885   ierr = VecScale(user->dwork,-30.0);CHKERRQ(ierr);
886   ierr = VecExp(user->dwork);CHKERRQ(ierr);
887   ierr = VecCopy(user->dwork,user->d);CHKERRQ(ierr);
888 
889   ierr = VecCopy(XX,XXwork);CHKERRQ(ierr);
890   ierr = VecCopy(YY,YYwork);CHKERRQ(ierr);
891 
892   ierr = VecShift(XXwork,-0.75);CHKERRQ(ierr);
893   ierr = VecShift(YYwork,-0.75);CHKERRQ(ierr);
894 
895   ierr = VecPointwiseMult(XXwork,XXwork,XXwork);CHKERRQ(ierr);
896   ierr = VecPointwiseMult(YYwork,YYwork,YYwork);CHKERRQ(ierr);
897 
898   ierr = VecCopy(XXwork,user->dwork);CHKERRQ(ierr);
899   ierr = VecAXPY(user->dwork,1.0,YYwork);CHKERRQ(ierr);
900   ierr = VecScale(user->dwork,-30.0);CHKERRQ(ierr);
901   ierr = VecExp(user->dwork);CHKERRQ(ierr);
902 
903   ierr = VecAXPY(user->d,1.0,user->dwork);CHKERRQ(ierr);
904   ierr = VecShift(user->d,1.0);CHKERRQ(ierr);
905   ierr = VecSum(user->d,&sum);CHKERRQ(ierr);
906   ierr = VecScale(user->d,1.0/(h*h*sum));CHKERRQ(ierr);
907 
908   /* Initial conditions of forward problem */
909   ierr = VecDuplicate(XX,&bc);CHKERRQ(ierr);
910   ierr = VecCopy(XX,XXwork);CHKERRQ(ierr);
911   ierr = VecCopy(YY,YYwork);CHKERRQ(ierr);
912 
913   ierr = VecShift(XXwork,-0.5);CHKERRQ(ierr);
914   ierr = VecShift(YYwork,-0.5);CHKERRQ(ierr);
915 
916   ierr = VecPointwiseMult(XXwork,XXwork,XXwork);CHKERRQ(ierr);
917   ierr = VecPointwiseMult(YYwork,YYwork,YYwork);CHKERRQ(ierr);
918 
919   ierr = VecWAXPY(bc,1.0,XXwork,YYwork);CHKERRQ(ierr);
920   ierr = VecScale(bc,-50.0);CHKERRQ(ierr);
921   ierr = VecExp(bc);CHKERRQ(ierr);
922   ierr = VecShift(bc,1.0);CHKERRQ(ierr);
923   ierr = VecSum(bc,&sum);CHKERRQ(ierr);
924   ierr = VecScale(bc,1.0/(h*h*sum));CHKERRQ(ierr);
925 
926   /* Create scatter from y to y_1,y_2,...,y_nt */
927   /*  TODO: Reorder for better parallelism. (This will require reordering Q and L as well.) */
928   ierr = PetscMalloc1(user->nt*user->mx*user->mx,&user->yi_scatter);CHKERRQ(ierr);
929   ierr = VecCreate(PETSC_COMM_WORLD,&yi);CHKERRQ(ierr);
930   ierr = VecSetSizes(yi,PETSC_DECIDE,user->mx*user->mx);CHKERRQ(ierr);
931   ierr = VecSetFromOptions(yi);CHKERRQ(ierr);
932   ierr = VecDuplicateVecs(yi,user->nt,&user->yi);CHKERRQ(ierr);
933   ierr = VecDuplicateVecs(yi,user->nt,&user->yiwork);CHKERRQ(ierr);
934   ierr = VecDuplicateVecs(yi,user->nt,&user->ziwork);CHKERRQ(ierr);
935   for (i=0; i<user->nt; i++){
936     ierr = VecGetOwnershipRange(user->yi[i],&lo,&hi);CHKERRQ(ierr);
937     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_yi);CHKERRQ(ierr);
938     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+i*user->mx*user->mx,1,&is_from_y);CHKERRQ(ierr);
939     ierr = VecScatterCreate(user->y,is_from_y,user->yi[i],is_to_yi,&user->yi_scatter[i]);CHKERRQ(ierr);
940     ierr = ISDestroy(&is_to_yi);CHKERRQ(ierr);
941     ierr = ISDestroy(&is_from_y);CHKERRQ(ierr);
942   }
943 
944   /* Create scatter from u to ux_1,uy_1,ux_2,uy_2,...,ux_nt,uy_nt */
945   /*  TODO: reorder for better parallelism */
946   ierr = PetscMalloc1(user->nt*user->mx*user->mx,&user->uxi_scatter);CHKERRQ(ierr);
947   ierr = PetscMalloc1(user->nt*user->mx*user->mx,&user->uyi_scatter);CHKERRQ(ierr);
948   ierr = PetscMalloc1(user->nt*user->mx*user->mx,&user->ux_scatter);CHKERRQ(ierr);
949   ierr = PetscMalloc1(user->nt*user->mx*user->mx,&user->uy_scatter);CHKERRQ(ierr);
950   ierr = PetscMalloc1(2*user->nt*user->mx*user->mx,&user->ui_scatter);CHKERRQ(ierr);
951   ierr = VecCreate(PETSC_COMM_WORLD,&uxi);CHKERRQ(ierr);
952   ierr = VecCreate(PETSC_COMM_WORLD,&ui);CHKERRQ(ierr);
953   ierr = VecSetSizes(uxi,PETSC_DECIDE,user->mx*user->mx);CHKERRQ(ierr);
954   ierr = VecSetSizes(ui,PETSC_DECIDE,2*user->mx*user->mx);CHKERRQ(ierr);
955   ierr = VecSetFromOptions(uxi);CHKERRQ(ierr);
956   ierr = VecSetFromOptions(ui);CHKERRQ(ierr);
957   ierr = VecDuplicateVecs(uxi,user->nt,&user->uxi);CHKERRQ(ierr);
958   ierr = VecDuplicateVecs(uxi,user->nt,&user->uyi);CHKERRQ(ierr);
959   ierr = VecDuplicateVecs(uxi,user->nt,&user->uxiwork);CHKERRQ(ierr);
960   ierr = VecDuplicateVecs(uxi,user->nt,&user->uyiwork);CHKERRQ(ierr);
961   ierr = VecDuplicateVecs(ui,user->nt,&user->ui);CHKERRQ(ierr);
962   ierr = VecDuplicateVecs(ui,user->nt,&user->uiwork);CHKERRQ(ierr);
963   for (i=0; i<user->nt; i++){
964     ierr = VecGetOwnershipRange(user->uxi[i],&lo,&hi);CHKERRQ(ierr);
965     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi);CHKERRQ(ierr);
966     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+2*i*user->mx*user->mx,1,&is_from_u);CHKERRQ(ierr);
967     ierr = VecScatterCreate(user->u,is_from_u,user->uxi[i],is_to_uxi,&user->uxi_scatter[i]);CHKERRQ(ierr);
968 
969     ierr = ISDestroy(&is_to_uxi);CHKERRQ(ierr);
970     ierr = ISDestroy(&is_from_u);CHKERRQ(ierr);
971 
972     ierr = VecGetOwnershipRange(user->uyi[i],&lo,&hi);CHKERRQ(ierr);
973     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uyi);CHKERRQ(ierr);
974     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+(2*i+1)*user->mx*user->mx,1,&is_from_u);CHKERRQ(ierr);
975     ierr = VecScatterCreate(user->u,is_from_u,user->uyi[i],is_to_uyi,&user->uyi_scatter[i]);CHKERRQ(ierr);
976 
977     ierr = ISDestroy(&is_to_uyi);CHKERRQ(ierr);
978     ierr = ISDestroy(&is_from_u);CHKERRQ(ierr);
979 
980     ierr = VecGetOwnershipRange(user->uxi[i],&lo,&hi);CHKERRQ(ierr);
981     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi);CHKERRQ(ierr);
982     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_from_u);CHKERRQ(ierr);
983     ierr = VecScatterCreate(user->ui[i],is_from_u,user->uxi[i],is_to_uxi,&user->ux_scatter[i]);CHKERRQ(ierr);
984 
985     ierr = ISDestroy(&is_to_uxi);CHKERRQ(ierr);
986     ierr = ISDestroy(&is_from_u);CHKERRQ(ierr);
987 
988     ierr = VecGetOwnershipRange(user->uyi[i],&lo,&hi);CHKERRQ(ierr);
989     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uyi);CHKERRQ(ierr);
990     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+user->mx*user->mx,1,&is_from_u);CHKERRQ(ierr);
991     ierr = VecScatterCreate(user->ui[i],is_from_u,user->uyi[i],is_to_uyi,&user->uy_scatter[i]);CHKERRQ(ierr);
992 
993     ierr = ISDestroy(&is_to_uyi);CHKERRQ(ierr);
994     ierr = ISDestroy(&is_from_u);CHKERRQ(ierr);
995 
996     ierr = VecGetOwnershipRange(user->ui[i],&lo,&hi);CHKERRQ(ierr);
997     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi);CHKERRQ(ierr);
998     ierr = ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+2*i*user->mx*user->mx,1,&is_from_u);CHKERRQ(ierr);
999     ierr = VecScatterCreate(user->u,is_from_u,user->ui[i],is_to_uxi,&user->ui_scatter[i]);CHKERRQ(ierr);
1000 
1001     ierr = ISDestroy(&is_to_uxi);CHKERRQ(ierr);
1002     ierr = ISDestroy(&is_from_u);CHKERRQ(ierr);
1003   }
1004 
1005   /* RHS of forward problem */
1006   ierr = MatMult(user->M,bc,user->yiwork[0]);CHKERRQ(ierr);
1007   for (i=1; i<user->nt; i++){
1008     ierr = VecSet(user->yiwork[i],0.0);CHKERRQ(ierr);
1009   }
1010   ierr = Gather_yi(user->q,user->yiwork,user->yi_scatter,user->nt);CHKERRQ(ierr);
1011 
1012   /* Compute true velocity field utrue */
1013   ierr = VecDuplicate(user->u,&user->utrue);CHKERRQ(ierr);
1014   for (i=0; i<user->nt; i++){
1015     ierr = VecCopy(YY,user->uxi[i]);CHKERRQ(ierr);
1016     ierr = VecScale(user->uxi[i],150.0*i*user->ht);CHKERRQ(ierr);
1017     ierr = VecCopy(XX,user->uyi[i]);CHKERRQ(ierr);
1018     ierr = VecShift(user->uyi[i],-10.0);CHKERRQ(ierr);
1019     ierr = VecScale(user->uyi[i],15.0*i*user->ht);CHKERRQ(ierr);
1020   }
1021   ierr = Gather_uxi_uyi(user->utrue,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);CHKERRQ(ierr);
1022 
1023   /* Initial guess and reference model */
1024   ierr = VecDuplicate(user->utrue,&user->ur);CHKERRQ(ierr);
1025   for (i=0; i<user->nt; i++){
1026     ierr = VecCopy(XX,user->uxi[i]);CHKERRQ(ierr);
1027     ierr = VecShift(user->uxi[i],i*user->ht);CHKERRQ(ierr);
1028     ierr = VecCopy(YY,user->uyi[i]);CHKERRQ(ierr);
1029     ierr = VecShift(user->uyi[i],-i*user->ht);CHKERRQ(ierr);
1030   }
1031   ierr = Gather_uxi_uyi(user->ur,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);CHKERRQ(ierr);
1032 
1033   /* Generate regularization matrix L */
1034   ierr = MatCreate(PETSC_COMM_WORLD,&user->LT);CHKERRQ(ierr);
1035   ierr = MatSetSizes(user->LT,PETSC_DECIDE,PETSC_DECIDE,2*n*user->nt,n*user->nt);CHKERRQ(ierr);
1036   ierr = MatSetFromOptions(user->LT);CHKERRQ(ierr);
1037   ierr = MatMPIAIJSetPreallocation(user->LT,1,NULL,1,NULL);CHKERRQ(ierr);
1038   ierr = MatSeqAIJSetPreallocation(user->LT,1,NULL);CHKERRQ(ierr);
1039   ierr = MatGetOwnershipRange(user->LT,&istart,&iend);CHKERRQ(ierr);
1040 
1041   for (i=istart; i<iend; i++){
1042     iblock = (i+n) / (2*n);
1043     j = i - iblock*n;
1044     ierr = MatSetValues(user->LT,1,&i,1,&j,&user->gamma,INSERT_VALUES);CHKERRQ(ierr);
1045   }
1046 
1047   ierr = MatAssemblyBegin(user->LT,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1048   ierr = MatAssemblyEnd(user->LT,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1049 
1050   ierr = MatTranspose(user->LT,MAT_INITIAL_MATRIX,&user->L);CHKERRQ(ierr);
1051 
1052   /* Build work vectors and matrices */
1053   ierr = VecCreate(PETSC_COMM_WORLD,&user->lwork);CHKERRQ(ierr);
1054   ierr = VecSetType(user->lwork,VECMPI);CHKERRQ(ierr);
1055   ierr = VecSetSizes(user->lwork,PETSC_DECIDE,user->m);CHKERRQ(ierr);
1056   ierr = VecSetFromOptions(user->lwork);CHKERRQ(ierr);
1057 
1058   ierr = MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork);CHKERRQ(ierr);
1059 
1060   ierr = VecDuplicate(user->y,&user->ywork);CHKERRQ(ierr);
1061   ierr = VecDuplicate(user->u,&user->uwork);CHKERRQ(ierr);
1062   ierr = VecDuplicate(user->u,&user->vwork);CHKERRQ(ierr);
1063   ierr = VecDuplicate(user->u,&user->js_diag);CHKERRQ(ierr);
1064   ierr = VecDuplicate(user->c,&user->cwork);CHKERRQ(ierr);
1065 
1066   /* Create matrix-free shell user->Js for computing A*x */
1067   ierr = MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->Js);CHKERRQ(ierr);
1068   ierr = MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult);CHKERRQ(ierr);
1069   ierr = MatShellSetOperation(user->Js,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate);CHKERRQ(ierr);
1070   ierr = MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose);CHKERRQ(ierr);
1071   ierr = MatShellSetOperation(user->Js,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal);CHKERRQ(ierr);
1072 
1073   /* Diagonal blocks of user->Js */
1074   ierr = MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,n,n,user,&user->JsBlock);CHKERRQ(ierr);
1075   ierr = MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateMatBlockMult);CHKERRQ(ierr);
1076   ierr = MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockMultTranspose);CHKERRQ(ierr);
1077 
1078   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
1079      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
1080      This is an SOR preconditioner for user->JsBlock. */
1081   ierr = MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,n,n,user,&user->JsBlockPrec);CHKERRQ(ierr);
1082   ierr = MatShellSetOperation(user->JsBlockPrec,MATOP_MULT,(void(*)(void))StateMatBlockPrecMult);CHKERRQ(ierr);
1083   ierr = MatShellSetOperation(user->JsBlockPrec,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockPrecMultTranspose);CHKERRQ(ierr);
1084 
1085   /* Create a matrix-free shell user->Jd for computing B*x */
1086   ierr = MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->n-user->m,user,&user->Jd);CHKERRQ(ierr);
1087   ierr = MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult);CHKERRQ(ierr);
1088   ierr = MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose);CHKERRQ(ierr);
1089 
1090   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
1091   ierr = MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsInv);CHKERRQ(ierr);
1092   ierr = MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateMatInvMult);CHKERRQ(ierr);
1093   ierr = MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatInvTransposeMult);CHKERRQ(ierr);
1094 
1095   /* Build matrices for SOR preconditioner */
1096   ierr = Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);CHKERRQ(ierr);
1097   ierr = PetscMalloc1(5*n,&user->C);CHKERRQ(ierr);
1098   ierr = PetscMalloc1(2*n,&user->Cwork);CHKERRQ(ierr);
1099   for (i=0; i<user->nt; i++){
1100     ierr = MatDuplicate(user->Divxy[0],MAT_COPY_VALUES,&user->C[i]);CHKERRQ(ierr);
1101     ierr = MatDuplicate(user->Divxy[1],MAT_COPY_VALUES,&user->Cwork[i]);CHKERRQ(ierr);
1102 
1103     ierr = MatDiagonalScale(user->C[i],NULL,user->uxi[i]);CHKERRQ(ierr);
1104     ierr = MatDiagonalScale(user->Cwork[i],NULL,user->uyi[i]);CHKERRQ(ierr);
1105     ierr = MatAXPY(user->C[i],1.0,user->Cwork[i],DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1106     ierr = MatScale(user->C[i],user->ht);CHKERRQ(ierr);
1107     ierr = MatShift(user->C[i],1.0);CHKERRQ(ierr);
1108   }
1109 
1110   /* Solver options and tolerances */
1111   ierr = KSPCreate(PETSC_COMM_WORLD,&user->solver);CHKERRQ(ierr);
1112   ierr = KSPSetType(user->solver,KSPGMRES);CHKERRQ(ierr);
1113   ierr = KSPSetOperators(user->solver,user->JsBlock,user->JsBlockPrec);CHKERRQ(ierr);
1114   ierr = KSPSetTolerances(user->solver,1e-4,1e-20,1e3,500);CHKERRQ(ierr);
1115   /* ierr = KSPSetTolerances(user->solver,1e-8,1e-16,1e3,500);CHKERRQ(ierr); */
1116   ierr = KSPGetPC(user->solver,&user->prec);CHKERRQ(ierr);
1117   ierr = PCSetType(user->prec,PCSHELL);CHKERRQ(ierr);
1118 
1119   ierr = PCShellSetApply(user->prec,StateMatBlockPrecMult);CHKERRQ(ierr);
1120   ierr = PCShellSetApplyTranspose(user->prec,StateMatBlockPrecMultTranspose);CHKERRQ(ierr);
1121   ierr = PCShellSetContext(user->prec,user);CHKERRQ(ierr);
1122 
1123   /* Compute true state function yt given ut */
1124   ierr = VecCreate(PETSC_COMM_WORLD,&user->ytrue);CHKERRQ(ierr);
1125   ierr = VecSetSizes(user->ytrue,PETSC_DECIDE,n*user->nt);CHKERRQ(ierr);
1126   ierr = VecSetFromOptions(user->ytrue);CHKERRQ(ierr);
1127   user->c_formed = PETSC_TRUE;
1128   ierr = VecCopy(user->utrue,user->u);CHKERRQ(ierr); /*  Set u=utrue temporarily for StateMatInv */
1129   ierr = VecSet(user->ytrue,0.0);CHKERRQ(ierr); /*  Initial guess */
1130   ierr = StateMatInvMult(user->Js,user->q,user->ytrue);CHKERRQ(ierr);
1131   ierr = VecCopy(user->ur,user->u);CHKERRQ(ierr); /*  Reset u=ur */
1132 
1133   /* Initial guess y0 for state given u0 */
1134   ierr = StateMatInvMult(user->Js,user->q,user->y);CHKERRQ(ierr);
1135 
1136   /* Data discretization */
1137   ierr = MatCreate(PETSC_COMM_WORLD,&user->Q);CHKERRQ(ierr);
1138   ierr = MatSetSizes(user->Q,PETSC_DECIDE,PETSC_DECIDE,user->mx*user->mx,user->m);CHKERRQ(ierr);
1139   ierr = MatSetFromOptions(user->Q);CHKERRQ(ierr);
1140   ierr = MatMPIAIJSetPreallocation(user->Q,0,NULL,1,NULL);CHKERRQ(ierr);
1141   ierr = MatSeqAIJSetPreallocation(user->Q,1,NULL);CHKERRQ(ierr);
1142 
1143   ierr = MatGetOwnershipRange(user->Q,&istart,&iend);CHKERRQ(ierr);
1144 
1145   for (i=istart; i<iend; i++){
1146     j = i + user->m - user->mx*user->mx;
1147     ierr = MatSetValues(user->Q,1,&i,1,&j,&one,INSERT_VALUES);CHKERRQ(ierr);
1148   }
1149 
1150   ierr = MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151   ierr = MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152 
1153   ierr = MatTranspose(user->Q,MAT_INITIAL_MATRIX,&user->QT);CHKERRQ(ierr);
1154 
1155   ierr = VecDestroy(&XX);CHKERRQ(ierr);
1156   ierr = VecDestroy(&YY);CHKERRQ(ierr);
1157   ierr = VecDestroy(&XXwork);CHKERRQ(ierr);
1158   ierr = VecDestroy(&YYwork);CHKERRQ(ierr);
1159   ierr = VecDestroy(&yi);CHKERRQ(ierr);
1160   ierr = VecDestroy(&uxi);CHKERRQ(ierr);
1161   ierr = VecDestroy(&ui);CHKERRQ(ierr);
1162   ierr = VecDestroy(&bc);CHKERRQ(ierr);
1163 
1164   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1165   ierr = KSPSetFromOptions(user->solver);CHKERRQ(ierr);
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 PetscErrorCode HyperbolicDestroy(AppCtx *user)
1170 {
1171   PetscErrorCode ierr;
1172   PetscInt       i;
1173 
1174   PetscFunctionBegin;
1175   ierr = MatDestroy(&user->Q);CHKERRQ(ierr);
1176   ierr = MatDestroy(&user->QT);CHKERRQ(ierr);
1177   ierr = MatDestroy(&user->Div);CHKERRQ(ierr);
1178   ierr = MatDestroy(&user->Divwork);CHKERRQ(ierr);
1179   ierr = MatDestroy(&user->Grad);CHKERRQ(ierr);
1180   ierr = MatDestroy(&user->L);CHKERRQ(ierr);
1181   ierr = MatDestroy(&user->LT);CHKERRQ(ierr);
1182   ierr = KSPDestroy(&user->solver);CHKERRQ(ierr);
1183   ierr = MatDestroy(&user->Js);CHKERRQ(ierr);
1184   ierr = MatDestroy(&user->Jd);CHKERRQ(ierr);
1185   ierr = MatDestroy(&user->JsBlockPrec);CHKERRQ(ierr);
1186   ierr = MatDestroy(&user->JsInv);CHKERRQ(ierr);
1187   ierr = MatDestroy(&user->JsBlock);CHKERRQ(ierr);
1188   ierr = MatDestroy(&user->Divxy[0]);CHKERRQ(ierr);
1189   ierr = MatDestroy(&user->Divxy[1]);CHKERRQ(ierr);
1190   ierr = MatDestroy(&user->Gradxy[0]);CHKERRQ(ierr);
1191   ierr = MatDestroy(&user->Gradxy[1]);CHKERRQ(ierr);
1192   ierr = MatDestroy(&user->M);CHKERRQ(ierr);
1193   for (i=0; i<user->nt; i++){
1194     ierr = MatDestroy(&user->C[i]);CHKERRQ(ierr);
1195     ierr = MatDestroy(&user->Cwork[i]);CHKERRQ(ierr);
1196   }
1197   ierr = PetscFree(user->C);CHKERRQ(ierr);
1198   ierr = PetscFree(user->Cwork);CHKERRQ(ierr);
1199   ierr = VecDestroy(&user->u);CHKERRQ(ierr);
1200   ierr = VecDestroy(&user->uwork);CHKERRQ(ierr);
1201   ierr = VecDestroy(&user->vwork);CHKERRQ(ierr);
1202   ierr = VecDestroy(&user->utrue);CHKERRQ(ierr);
1203   ierr = VecDestroy(&user->y);CHKERRQ(ierr);
1204   ierr = VecDestroy(&user->ywork);CHKERRQ(ierr);
1205   ierr = VecDestroy(&user->ytrue);CHKERRQ(ierr);
1206   ierr = VecDestroyVecs(user->nt,&user->yi);CHKERRQ(ierr);
1207   ierr = VecDestroyVecs(user->nt,&user->yiwork);CHKERRQ(ierr);
1208   ierr = VecDestroyVecs(user->nt,&user->ziwork);CHKERRQ(ierr);
1209   ierr = VecDestroyVecs(user->nt,&user->uxi);CHKERRQ(ierr);
1210   ierr = VecDestroyVecs(user->nt,&user->uyi);CHKERRQ(ierr);
1211   ierr = VecDestroyVecs(user->nt,&user->uxiwork);CHKERRQ(ierr);
1212   ierr = VecDestroyVecs(user->nt,&user->uyiwork);CHKERRQ(ierr);
1213   ierr = VecDestroyVecs(user->nt,&user->ui);CHKERRQ(ierr);
1214   ierr = VecDestroyVecs(user->nt,&user->uiwork);CHKERRQ(ierr);
1215   ierr = VecDestroy(&user->c);CHKERRQ(ierr);
1216   ierr = VecDestroy(&user->cwork);CHKERRQ(ierr);
1217   ierr = VecDestroy(&user->ur);CHKERRQ(ierr);
1218   ierr = VecDestroy(&user->q);CHKERRQ(ierr);
1219   ierr = VecDestroy(&user->d);CHKERRQ(ierr);
1220   ierr = VecDestroy(&user->dwork);CHKERRQ(ierr);
1221   ierr = VecDestroy(&user->lwork);CHKERRQ(ierr);
1222   ierr = VecDestroy(&user->js_diag);CHKERRQ(ierr);
1223   ierr = ISDestroy(&user->s_is);CHKERRQ(ierr);
1224   ierr = ISDestroy(&user->d_is);CHKERRQ(ierr);
1225   ierr = VecScatterDestroy(&user->state_scatter);CHKERRQ(ierr);
1226   ierr = VecScatterDestroy(&user->design_scatter);CHKERRQ(ierr);
1227   for (i=0; i<user->nt; i++){
1228     ierr = VecScatterDestroy(&user->uxi_scatter[i]);CHKERRQ(ierr);
1229     ierr = VecScatterDestroy(&user->uyi_scatter[i]);CHKERRQ(ierr);
1230     ierr = VecScatterDestroy(&user->ux_scatter[i]);CHKERRQ(ierr);
1231     ierr = VecScatterDestroy(&user->uy_scatter[i]);CHKERRQ(ierr);
1232     ierr = VecScatterDestroy(&user->ui_scatter[i]);CHKERRQ(ierr);
1233     ierr = VecScatterDestroy(&user->yi_scatter[i]);CHKERRQ(ierr);
1234   }
1235   ierr = PetscFree(user->uxi_scatter);CHKERRQ(ierr);
1236   ierr = PetscFree(user->uyi_scatter);CHKERRQ(ierr);
1237   ierr = PetscFree(user->ux_scatter);CHKERRQ(ierr);
1238   ierr = PetscFree(user->uy_scatter);CHKERRQ(ierr);
1239   ierr = PetscFree(user->ui_scatter);CHKERRQ(ierr);
1240   ierr = PetscFree(user->yi_scatter);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode HyperbolicMonitor(Tao tao, void *ptr)
1245 {
1246   PetscErrorCode ierr;
1247   Vec            X;
1248   PetscReal      unorm,ynorm;
1249   AppCtx         *user = (AppCtx*)ptr;
1250 
1251   PetscFunctionBegin;
1252   ierr = TaoGetSolutionVector(tao,&X);CHKERRQ(ierr);
1253   ierr = Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter);CHKERRQ(ierr);
1254   ierr = VecAXPY(user->ywork,-1.0,user->ytrue);CHKERRQ(ierr);
1255   ierr = VecAXPY(user->uwork,-1.0,user->utrue);CHKERRQ(ierr);
1256   ierr = VecNorm(user->uwork,NORM_2,&unorm);CHKERRQ(ierr);
1257   ierr = VecNorm(user->ywork,NORM_2,&ynorm);CHKERRQ(ierr);
1258   ierr = PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 
1263 /*TEST
1264 
1265    build:
1266       requires: !complex
1267 
1268    test:
1269       requires: !single
1270       args: -tao_cmonitor -tao_max_funcs 10 -tao_type lcl -tao_gatol 1.e-5
1271 
1272    test:
1273       suffix: guess_pod
1274       requires: !single
1275       args: -tao_cmonitor -tao_max_funcs 10 -tao_type lcl -ksp_guess_type pod -tao_gatol 1.e-5
1276 
1277 TEST*/
1278