xref: /petsc/src/tao/pde_constrained/tutorials/parabolic.c (revision 9ded082c8565093b53c28d159a15093bb482abe7)
1 #include <petsc/private/taoimpl.h>
2 
3 typedef struct {
4   PetscInt n; /*  Number of variables */
5   PetscInt m; /*  Number of constraints per time step */
6   PetscInt mx; /*  grid points in each direction */
7   PetscInt nt; /*  Number of time steps; as of now, must be divisible by 8 */
8   PetscInt ndata; /*  Number of data points per sample */
9   PetscInt ns; /*  Number of samples */
10   PetscInt *sample_times; /*  Times of samples */
11   IS       s_is;
12   IS       d_is;
13 
14   VecScatter state_scatter;
15   VecScatter design_scatter;
16   VecScatter *yi_scatter;
17   VecScatter *di_scatter;
18 
19   Mat       Js,Jd,JsBlockPrec,JsInv,JsBlock;
20   PetscBool jformed,dsg_formed;
21 
22   PetscReal alpha; /*  Regularization parameter */
23   PetscReal beta; /*  Weight attributed to ||u||^2 in regularization functional */
24   PetscReal noise; /*  Amount of noise to add to data */
25   PetscReal ht; /*  Time step */
26 
27   Mat Qblock,QblockT;
28   Mat L,LT;
29   Mat Div,Divwork;
30   Mat Grad;
31   Mat Av,Avwork,AvT;
32   Mat DSG;
33   Vec q;
34   Vec ur; /*  reference */
35 
36   Vec d;
37   Vec dwork;
38   Vec *di;
39 
40   Vec y; /*  state variables */
41   Vec ywork;
42 
43   Vec ytrue;
44   Vec *yi,*yiwork;
45 
46   Vec u; /*  design variables */
47   Vec uwork;
48 
49   Vec utrue;
50   Vec js_diag;
51   Vec c; /*  constraint vector */
52   Vec cwork;
53 
54   Vec lwork;
55   Vec S;
56   Vec Rwork,Swork,Twork;
57   Vec Av_u;
58 
59   KSP solver;
60   PC prec;
61 
62   PetscInt ksp_its;
63   PetscInt ksp_its_initial;
64 } AppCtx;
65 
66 PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
67 PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
68 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
69 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
70 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat, void*);
71 PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
72 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
73 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
74 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
75 PetscErrorCode ParabolicInitialize(AppCtx *user);
76 PetscErrorCode ParabolicDestroy(AppCtx *user);
77 PetscErrorCode ParabolicMonitor(Tao, void*);
78 
79 PetscErrorCode StateMatMult(Mat,Vec,Vec);
80 PetscErrorCode StateMatBlockMult(Mat,Vec,Vec);
81 PetscErrorCode StateMatMultTranspose(Mat,Vec,Vec);
82 PetscErrorCode StateMatGetDiagonal(Mat,Vec);
83 PetscErrorCode StateMatDuplicate(Mat,MatDuplicateOption,Mat*);
84 PetscErrorCode StateMatInvMult(Mat,Vec,Vec);
85 PetscErrorCode StateMatInvTransposeMult(Mat,Vec,Vec);
86 PetscErrorCode StateMatBlockPrecMult(PC,Vec,Vec);
87 
88 PetscErrorCode DesignMatMult(Mat,Vec,Vec);
89 PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);
90 
91 PetscErrorCode Gather_i(Vec,Vec*,VecScatter*,PetscInt);
92 PetscErrorCode Scatter_i(Vec,Vec*,VecScatter*,PetscInt);
93 
94 static  char help[]="";
95 
96 int main(int argc, char **argv)
97 {
98   Vec                x,x0;
99   Tao                tao;
100   AppCtx             user;
101   IS                 is_allstate,is_alldesign;
102   PetscInt           lo,hi,hi2,lo2,ksp_old;
103   PetscInt           ntests = 1;
104   PetscInt           i;
105 #if defined(PETSC_USE_LOG)
106   PetscLogStage      stages[1];
107 #endif
108 
109   PetscFunctionBeginUser;
110   PetscCall(PetscInitialize(&argc, &argv, (char*)0,help));
111   user.mx = 8;
112   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"parabolic example",NULL);
113   PetscCall(PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL));
114   user.nt = 8;
115   PetscCall(PetscOptionsInt("-nt","Number of time steps","",user.nt,&user.nt,NULL));
116   user.ndata = 64;
117   PetscCall(PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL));
118   user.ns = 8;
119   PetscCall(PetscOptionsInt("-ns","Number of samples","",user.ns,&user.ns,NULL));
120   user.alpha = 1.0;
121   PetscCall(PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL));
122   user.beta = 0.01;
123   PetscCall(PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL));
124   user.noise = 0.01;
125   PetscCall(PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL));
126   PetscCall(PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL));
127   PetscOptionsEnd();
128 
129   user.m = user.mx*user.mx*user.mx; /*  number of constraints per time step */
130   user.n = user.m*(user.nt+1); /*  number of variables */
131   user.ht = (PetscReal)1/user.nt;
132 
133   PetscCall(VecCreate(PETSC_COMM_WORLD,&user.u));
134   PetscCall(VecCreate(PETSC_COMM_WORLD,&user.y));
135   PetscCall(VecCreate(PETSC_COMM_WORLD,&user.c));
136   PetscCall(VecSetSizes(user.u,PETSC_DECIDE,user.n-user.m*user.nt));
137   PetscCall(VecSetSizes(user.y,PETSC_DECIDE,user.m*user.nt));
138   PetscCall(VecSetSizes(user.c,PETSC_DECIDE,user.m*user.nt));
139   PetscCall(VecSetFromOptions(user.u));
140   PetscCall(VecSetFromOptions(user.y));
141   PetscCall(VecSetFromOptions(user.c));
142 
143   /* Create scatters for reduced spaces.
144      If the state vector y and design vector u are partitioned as
145      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
146      then the solution vector x is organized as
147      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
148      The index sets user.s_is and user.d_is correspond to the indices of the
149      state and design variables owned by the current processor.
150   */
151   PetscCall(VecCreate(PETSC_COMM_WORLD,&x));
152 
153   PetscCall(VecGetOwnershipRange(user.y,&lo,&hi));
154   PetscCall(VecGetOwnershipRange(user.u,&lo2,&hi2));
155 
156   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate));
157   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user.s_is));
158 
159   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign));
160   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user.d_is));
161 
162   PetscCall(VecSetSizes(x,hi-lo+hi2-lo2,user.n));
163   PetscCall(VecSetFromOptions(x));
164 
165   PetscCall(VecScatterCreate(x,user.s_is,user.y,is_allstate,&user.state_scatter));
166   PetscCall(VecScatterCreate(x,user.d_is,user.u,is_alldesign,&user.design_scatter));
167   PetscCall(ISDestroy(&is_alldesign));
168   PetscCall(ISDestroy(&is_allstate));
169 
170   /* Create TAO solver and set desired solution method */
171   PetscCall(TaoCreate(PETSC_COMM_WORLD,&tao));
172   PetscCall(TaoSetType(tao,TAOLCL));
173 
174   /* Set up initial vectors and matrices */
175   PetscCall(ParabolicInitialize(&user));
176 
177   PetscCall(Gather(x,user.y,user.state_scatter,user.u,user.design_scatter));
178   PetscCall(VecDuplicate(x,&x0));
179   PetscCall(VecCopy(x,x0));
180 
181   /* Set solution vector with an initial guess */
182   PetscCall(TaoSetSolution(tao,x));
183   PetscCall(TaoSetObjective(tao, FormFunction, &user));
184   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
185   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
186 
187   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, user.JsBlockPrec, user.JsInv, FormJacobianState, &user));
188   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
189 
190   PetscCall(TaoSetFromOptions(tao));
191   PetscCall(TaoSetStateDesignIS(tao,user.s_is,user.d_is));
192 
193  /* SOLVE THE APPLICATION */
194   PetscCall(PetscLogStageRegister("Trials",&stages[0]));
195   PetscCall(PetscLogStagePush(stages[0]));
196   user.ksp_its_initial = user.ksp_its;
197   ksp_old = user.ksp_its;
198   for (i=0; i<ntests; i++) {
199     PetscCall(TaoSolve(tao));
200     PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %" PetscInt_FMT "\n",user.ksp_its-ksp_old));
201     PetscCall(VecCopy(x0,x));
202     PetscCall(TaoSetSolution(tao,x));
203   }
204   PetscCall(PetscLogStagePop());
205   PetscCall(PetscBarrier((PetscObject)x));
206   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: "));
207   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%" PetscInt_FMT "\n",user.ksp_its_initial));
208   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Total KSP iterations over %" PetscInt_FMT " trial(s): ",ntests));
209   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%" PetscInt_FMT "\n",user.ksp_its));
210   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations per trial: "));
211   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%" PetscInt_FMT "\n",(user.ksp_its-user.ksp_its_initial)/ntests));
212 
213   PetscCall(TaoDestroy(&tao));
214   PetscCall(VecDestroy(&x));
215   PetscCall(VecDestroy(&x0));
216   PetscCall(ParabolicDestroy(&user));
217 
218   PetscCall(PetscFinalize());
219   return 0;
220 }
221 /* ------------------------------------------------------------------- */
222 /*
223    dwork = Qy - d
224    lwork = L*(u-ur)
225    f = 1/2 * (dwork.dork + alpha*lwork.lwork)
226 */
227 PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
228 {
229   PetscReal      d1=0,d2=0;
230   PetscInt       i,j;
231   AppCtx         *user = (AppCtx*)ptr;
232 
233   PetscFunctionBegin;
234   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
235   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
236   for (j=0; j<user->ns; j++) {
237     i = user->sample_times[j];
238     PetscCall(MatMult(user->Qblock,user->yi[i],user->di[j]));
239   }
240   PetscCall(Gather_i(user->dwork,user->di,user->di_scatter,user->ns));
241   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
242   PetscCall(VecDot(user->dwork,user->dwork,&d1));
243 
244   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
245   PetscCall(MatMult(user->L,user->uwork,user->lwork));
246   PetscCall(VecDot(user->lwork,user->lwork,&d2));
247 
248   *f = 0.5 * (d1 + user->alpha*d2);
249   PetscFunctionReturn(0);
250 }
251 
252 /* ------------------------------------------------------------------- */
253 /*
254     state: g_s = Q' *(Qy - d)
255     design: g_d = alpha*L'*L*(u-ur)
256 */
257 PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
258 {
259   PetscInt       i,j;
260   AppCtx         *user = (AppCtx*)ptr;
261 
262   PetscFunctionBegin;
263   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
264   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
265   for (j=0; j<user->ns; j++) {
266     i = user->sample_times[j];
267     PetscCall(MatMult(user->Qblock,user->yi[i],user->di[j]));
268   }
269   PetscCall(Gather_i(user->dwork,user->di,user->di_scatter,user->ns));
270   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
271   PetscCall(Scatter_i(user->dwork,user->di,user->di_scatter,user->ns));
272   PetscCall(VecSet(user->ywork,0.0));
273   PetscCall(Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
274   for (j=0; j<user->ns; j++) {
275     i = user->sample_times[j];
276     PetscCall(MatMult(user->QblockT,user->di[j],user->yiwork[i]));
277   }
278   PetscCall(Gather_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
279 
280   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
281   PetscCall(MatMult(user->L,user->uwork,user->lwork));
282   PetscCall(MatMult(user->LT,user->lwork,user->uwork));
283   PetscCall(VecScale(user->uwork, user->alpha));
284   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
285   PetscFunctionReturn(0);
286 }
287 
288 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
289 {
290   PetscReal      d1,d2;
291   PetscInt       i,j;
292   AppCtx         *user = (AppCtx*)ptr;
293 
294   PetscFunctionBegin;
295   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
296   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
297   for (j=0; j<user->ns; j++) {
298     i = user->sample_times[j];
299     PetscCall(MatMult(user->Qblock,user->yi[i],user->di[j]));
300   }
301   PetscCall(Gather_i(user->dwork,user->di,user->di_scatter,user->ns));
302   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
303   PetscCall(VecDot(user->dwork,user->dwork,&d1));
304   PetscCall(Scatter_i(user->dwork,user->di,user->di_scatter,user->ns));
305   PetscCall(VecSet(user->ywork,0.0));
306   PetscCall(Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
307   for (j=0; j<user->ns; j++) {
308     i = user->sample_times[j];
309     PetscCall(MatMult(user->QblockT,user->di[j],user->yiwork[i]));
310   }
311   PetscCall(Gather_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
312 
313   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
314   PetscCall(MatMult(user->L,user->uwork,user->lwork));
315   PetscCall(VecDot(user->lwork,user->lwork,&d2));
316   PetscCall(MatMult(user->LT,user->lwork,user->uwork));
317   PetscCall(VecScale(user->uwork, user->alpha));
318   *f = 0.5 * (d1 + user->alpha*d2);
319 
320   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
321   PetscFunctionReturn(0);
322 }
323 
324 /* ------------------------------------------------------------------- */
325 /* A
326 MatShell object
327 */
328 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
329 {
330   AppCtx         *user = (AppCtx*)ptr;
331 
332   PetscFunctionBegin;
333   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
334   PetscCall(VecSet(user->uwork,0));
335   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
336   PetscCall(VecExp(user->uwork));
337   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
338   PetscCall(VecCopy(user->Av_u,user->Swork));
339   PetscCall(VecReciprocal(user->Swork));
340   PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
341   PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
342   if (user->dsg_formed) {
343     PetscCall(MatProductNumeric(user->DSG));
344   } else {
345     PetscCall(MatMatMult(user->Divwork,user->Grad,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&user->DSG));
346     user->dsg_formed = PETSC_TRUE;
347   }
348 
349   /* B = speye(nx^3) + ht*DSG; */
350   PetscCall(MatScale(user->DSG,user->ht));
351   PetscCall(MatShift(user->DSG,1.0));
352   PetscFunctionReturn(0);
353 }
354 
355 /* ------------------------------------------------------------------- */
356 /* B */
357 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
358 {
359   AppCtx         *user = (AppCtx*)ptr;
360 
361   PetscFunctionBegin;
362   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
363   PetscFunctionReturn(0);
364 }
365 
366 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
367 {
368   PetscInt       i;
369   AppCtx         *user;
370 
371   PetscFunctionBegin;
372   PetscCall(MatShellGetContext(J_shell,&user));
373   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
374   PetscCall(MatMult(user->JsBlock,user->yi[0],user->yiwork[0]));
375   for (i=1; i<user->nt; i++) {
376     PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
377     PetscCall(VecAXPY(user->yiwork[i],-1.0,user->yi[i-1]));
378   }
379   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
380   PetscFunctionReturn(0);
381 }
382 
383 PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y)
384 {
385   PetscInt       i;
386   AppCtx         *user;
387 
388   PetscFunctionBegin;
389   PetscCall(MatShellGetContext(J_shell,&user));
390   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
391   for (i=0; i<user->nt-1; i++) {
392     PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
393     PetscCall(VecAXPY(user->yiwork[i],-1.0,user->yi[i+1]));
394   }
395   i = user->nt-1;
396   PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
397   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
398   PetscFunctionReturn(0);
399 }
400 
401 PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y)
402 {
403   AppCtx         *user;
404 
405   PetscFunctionBegin;
406   PetscCall(MatShellGetContext(J_shell,&user));
407   PetscCall(MatMult(user->Grad,X,user->Swork));
408   PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
409   PetscCall(MatMult(user->Div,user->Swork,Y));
410   PetscCall(VecAYPX(Y,user->ht,X));
411   PetscFunctionReturn(0);
412 }
413 
414 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
415 {
416   PetscInt       i;
417   AppCtx         *user;
418 
419   PetscFunctionBegin;
420   PetscCall(MatShellGetContext(J_shell,&user));
421 
422   /* sdiag(1./v) */
423   PetscCall(VecSet(user->uwork,0));
424   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
425   PetscCall(VecExp(user->uwork));
426 
427   /* sdiag(1./((Av*(1./v)).^2)) */
428   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
429   PetscCall(VecPointwiseMult(user->Swork,user->Swork,user->Swork));
430   PetscCall(VecReciprocal(user->Swork));
431 
432   /* (Av * (sdiag(1./v) * b)) */
433   PetscCall(VecPointwiseMult(user->uwork,user->uwork,X));
434   PetscCall(MatMult(user->Av,user->uwork,user->Twork));
435 
436   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
437   PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
438 
439   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
440   for (i=0; i<user->nt; i++) {
441     /* (sdiag(Grad*y(:,i)) */
442     PetscCall(MatMult(user->Grad,user->yi[i],user->Twork));
443 
444     /* ht * Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
445     PetscCall(VecPointwiseMult(user->Twork,user->Twork,user->Swork));
446     PetscCall(MatMult(user->Div,user->Twork,user->yiwork[i]));
447     PetscCall(VecScale(user->yiwork[i],user->ht));
448   }
449   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
450 
451   PetscFunctionReturn(0);
452 }
453 
454 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
455 {
456   PetscInt       i;
457   AppCtx         *user;
458 
459   PetscFunctionBegin;
460   PetscCall(MatShellGetContext(J_shell,&user));
461 
462   /* sdiag(1./((Av*(1./v)).^2)) */
463   PetscCall(VecSet(user->uwork,0));
464   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
465   PetscCall(VecExp(user->uwork));
466   PetscCall(MatMult(user->Av,user->uwork,user->Rwork));
467   PetscCall(VecPointwiseMult(user->Rwork,user->Rwork,user->Rwork));
468   PetscCall(VecReciprocal(user->Rwork));
469 
470   PetscCall(VecSet(Y,0.0));
471   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
472   PetscCall(Scatter_i(X,user->yiwork,user->yi_scatter,user->nt));
473   for (i=0; i<user->nt; i++) {
474     /* (Div' * b(:,i)) */
475     PetscCall(MatMult(user->Grad,user->yiwork[i],user->Swork));
476 
477     /* sdiag(Grad*y(:,i)) */
478     PetscCall(MatMult(user->Grad,user->yi[i],user->Twork));
479 
480     /* (sdiag(Grad*y(:,i)) * (Div' * b(:,i))) */
481     PetscCall(VecPointwiseMult(user->Twork,user->Swork,user->Twork));
482 
483     /* (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i)))) */
484     PetscCall(VecPointwiseMult(user->Twork,user->Rwork,user->Twork));
485 
486     /* (Av' * (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i))))) */
487     PetscCall(MatMult(user->AvT,user->Twork,user->yiwork[i]));
488 
489     /* sdiag(1./v) * (Av' * (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i))))) */
490     PetscCall(VecPointwiseMult(user->yiwork[i],user->uwork,user->yiwork[i]));
491     PetscCall(VecAXPY(Y,user->ht,user->yiwork[i]));
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y)
497 {
498   AppCtx         *user;
499 
500   PetscFunctionBegin;
501   PetscCall(PCShellGetContext(PC_shell,&user));
502 
503   if (user->dsg_formed) {
504     PetscCall(MatSOR(user->DSG,X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y));
505   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"DSG not formed");
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
510 {
511   AppCtx         *user;
512   PetscInt       its,i;
513 
514   PetscFunctionBegin;
515   PetscCall(MatShellGetContext(J_shell,&user));
516 
517   if (Y == user->ytrue) {
518     /* First solve is done with true solution to set up problem */
519     PetscCall(KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
520   } else {
521     PetscCall(KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
522   }
523 
524   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
525   PetscCall(KSPSolve(user->solver,user->yi[0],user->yiwork[0]));
526   PetscCall(KSPGetIterationNumber(user->solver,&its));
527   user->ksp_its = user->ksp_its + its;
528 
529   for (i=1; i<user->nt; i++) {
530     PetscCall(VecAXPY(user->yi[i],1.0,user->yiwork[i-1]));
531     PetscCall(KSPSolve(user->solver,user->yi[i],user->yiwork[i]));
532     PetscCall(KSPGetIterationNumber(user->solver,&its));
533     user->ksp_its = user->ksp_its + its;
534   }
535   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
540 {
541   AppCtx         *user;
542   PetscInt       its,i;
543 
544   PetscFunctionBegin;
545   PetscCall(MatShellGetContext(J_shell,&user));
546 
547   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
548 
549   i = user->nt - 1;
550   PetscCall(KSPSolve(user->solver,user->yi[i],user->yiwork[i]));
551 
552   PetscCall(KSPGetIterationNumber(user->solver,&its));
553   user->ksp_its = user->ksp_its + its;
554 
555   for (i=user->nt-2; i>=0; i--) {
556     PetscCall(VecAXPY(user->yi[i],1.0,user->yiwork[i+1]));
557     PetscCall(KSPSolve(user->solver,user->yi[i],user->yiwork[i]));
558 
559     PetscCall(KSPGetIterationNumber(user->solver,&its));
560     user->ksp_its = user->ksp_its + its;
561 
562   }
563 
564   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
565   PetscFunctionReturn(0);
566 }
567 
568 PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
569 {
570   AppCtx         *user;
571 
572   PetscFunctionBegin;
573   PetscCall(MatShellGetContext(J_shell,&user));
574 
575   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,new_shell));
576   PetscCall(MatShellSetOperation(*new_shell,MATOP_MULT,(void(*)(void))StateMatMult));
577   PetscCall(MatShellSetOperation(*new_shell,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate));
578   PetscCall(MatShellSetOperation(*new_shell,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose));
579   PetscCall(MatShellSetOperation(*new_shell,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal));
580   PetscFunctionReturn(0);
581 }
582 
583 PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
584 {
585   AppCtx         *user;
586 
587   PetscFunctionBegin;
588   PetscCall(MatShellGetContext(J_shell,&user));
589   PetscCall(VecCopy(user->js_diag,X));
590   PetscFunctionReturn(0);
591 
592 }
593 
594 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
595 {
596   /* con = Ay - q, A = [B  0  0 ... 0;
597                        -I  B  0 ... 0;
598                         0 -I  B ... 0;
599                              ...     ;
600                         0    ... -I B]
601      B = ht * Div * Sigma * Grad + eye */
602   PetscInt       i;
603   AppCtx         *user = (AppCtx*)ptr;
604 
605   PetscFunctionBegin;
606   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
607   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
608   PetscCall(MatMult(user->JsBlock,user->yi[0],user->yiwork[0]));
609   for (i=1; i<user->nt; i++) {
610     PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
611     PetscCall(VecAXPY(user->yiwork[i],-1.0,user->yi[i-1]));
612   }
613   PetscCall(Gather_i(C,user->yiwork,user->yi_scatter,user->nt));
614   PetscCall(VecAXPY(C,-1.0,user->q));
615   PetscFunctionReturn(0);
616 }
617 
618 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
619 {
620   PetscFunctionBegin;
621   PetscCall(VecScatterBegin(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD));
622   PetscCall(VecScatterEnd(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD));
623   PetscCall(VecScatterBegin(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD));
624   PetscCall(VecScatterEnd(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD));
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode Scatter_i(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
629 {
630   PetscInt       i;
631 
632   PetscFunctionBegin;
633   for (i=0; i<nt; i++) {
634     PetscCall(VecScatterBegin(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD));
635     PetscCall(VecScatterEnd(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD));
636   }
637   PetscFunctionReturn(0);
638 }
639 
640 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
641 {
642   PetscFunctionBegin;
643   PetscCall(VecScatterBegin(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE));
644   PetscCall(VecScatterEnd(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE));
645   PetscCall(VecScatterBegin(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE));
646   PetscCall(VecScatterEnd(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE));
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode Gather_i(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
651 {
652   PetscInt       i;
653 
654   PetscFunctionBegin;
655   for (i=0; i<nt; i++) {
656     PetscCall(VecScatterBegin(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE));
657     PetscCall(VecScatterEnd(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE));
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode ParabolicInitialize(AppCtx *user)
663 {
664   PetscInt       m,n,i,j,k,linear_index,istart,iend,iblock,lo,hi,lo2,hi2;
665   Vec            XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork,yi,di,bc;
666   PetscReal      *x, *y, *z;
667   PetscReal      h,stime;
668   PetscScalar    hinv,neg_hinv,half = 0.5,sqrt_beta;
669   PetscInt       im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
670   PetscReal      xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
671   PetscScalar    v,vx,vy,vz;
672   IS             is_from_y,is_to_yi,is_from_d,is_to_di;
673   /* Data locations */
674   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,
675                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,
676                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,
677                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,
678                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,
679                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,
680                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,
681                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
682 
683   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,
684                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,
685                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,
686                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,
687                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,
688                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,
689                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,
690                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
691 
692   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,
693                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,
694                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,
695                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,
696                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,
697                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,
698                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,
699                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};
700 
701   PetscFunctionBegin;
702   PetscCall(PetscMalloc1(user->mx,&x));
703   PetscCall(PetscMalloc1(user->mx,&y));
704   PetscCall(PetscMalloc1(user->mx,&z));
705   user->jformed = PETSC_FALSE;
706   user->dsg_formed = PETSC_FALSE;
707 
708   n = user->mx * user->mx * user->mx;
709   m = 3 * user->mx * user->mx * (user->mx-1);
710   sqrt_beta = PetscSqrtScalar(user->beta);
711 
712   user->ksp_its = 0;
713   user->ksp_its_initial = 0;
714 
715   stime = (PetscReal)user->nt/user->ns;
716   PetscCall(PetscMalloc1(user->ns,&user->sample_times));
717   for (i=0; i<user->ns; i++) {
718     user->sample_times[i] = (PetscInt)(stime*((PetscReal)i+1.0)-0.5);
719   }
720 
721   PetscCall(VecCreate(PETSC_COMM_WORLD,&XX));
722   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->q));
723   PetscCall(VecSetSizes(XX,PETSC_DECIDE,n));
724   PetscCall(VecSetSizes(user->q,PETSC_DECIDE,n*user->nt));
725   PetscCall(VecSetFromOptions(XX));
726   PetscCall(VecSetFromOptions(user->q));
727 
728   PetscCall(VecDuplicate(XX,&YY));
729   PetscCall(VecDuplicate(XX,&ZZ));
730   PetscCall(VecDuplicate(XX,&XXwork));
731   PetscCall(VecDuplicate(XX,&YYwork));
732   PetscCall(VecDuplicate(XX,&ZZwork));
733   PetscCall(VecDuplicate(XX,&UTwork));
734   PetscCall(VecDuplicate(XX,&user->utrue));
735   PetscCall(VecDuplicate(XX,&bc));
736 
737   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
738   h = 1.0/user->mx;
739   hinv = user->mx;
740   neg_hinv = -hinv;
741 
742   PetscCall(VecGetOwnershipRange(XX,&istart,&iend));
743   for (linear_index=istart; linear_index<iend; linear_index++) {
744     i = linear_index % user->mx;
745     j = ((linear_index-i)/user->mx) % user->mx;
746     k = ((linear_index-i)/user->mx-j) / user->mx;
747     vx = h*(i+0.5);
748     vy = h*(j+0.5);
749     vz = h*(k+0.5);
750     PetscCall(VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES));
751     PetscCall(VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES));
752     PetscCall(VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES));
753     if ((vx<0.6) && (vx>0.4) && (vy<0.6) && (vy>0.4) && (vy<0.6) && (vz<0.6) && (vz>0.4)) {
754       v = 1000.0;
755       PetscCall(VecSetValues(bc,1,&linear_index,&v,INSERT_VALUES));
756     }
757   }
758 
759   PetscCall(VecAssemblyBegin(XX));
760   PetscCall(VecAssemblyEnd(XX));
761   PetscCall(VecAssemblyBegin(YY));
762   PetscCall(VecAssemblyEnd(YY));
763   PetscCall(VecAssemblyBegin(ZZ));
764   PetscCall(VecAssemblyEnd(ZZ));
765   PetscCall(VecAssemblyBegin(bc));
766   PetscCall(VecAssemblyEnd(bc));
767 
768   /* Compute true parameter function
769      ut = 0.5 + exp(-10*((x-0.5)^2+(y-0.5)^2+(z-0.5)^2)) */
770   PetscCall(VecCopy(XX,XXwork));
771   PetscCall(VecCopy(YY,YYwork));
772   PetscCall(VecCopy(ZZ,ZZwork));
773 
774   PetscCall(VecShift(XXwork,-0.5));
775   PetscCall(VecShift(YYwork,-0.5));
776   PetscCall(VecShift(ZZwork,-0.5));
777 
778   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
779   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
780   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
781 
782   PetscCall(VecCopy(XXwork,user->utrue));
783   PetscCall(VecAXPY(user->utrue,1.0,YYwork));
784   PetscCall(VecAXPY(user->utrue,1.0,ZZwork));
785   PetscCall(VecScale(user->utrue,-10.0));
786   PetscCall(VecExp(user->utrue));
787   PetscCall(VecShift(user->utrue,0.5));
788 
789   PetscCall(VecDestroy(&XX));
790   PetscCall(VecDestroy(&YY));
791   PetscCall(VecDestroy(&ZZ));
792   PetscCall(VecDestroy(&XXwork));
793   PetscCall(VecDestroy(&YYwork));
794   PetscCall(VecDestroy(&ZZwork));
795   PetscCall(VecDestroy(&UTwork));
796 
797   /* Initial guess and reference model */
798   PetscCall(VecDuplicate(user->utrue,&user->ur));
799   PetscCall(VecSet(user->ur,0.0));
800 
801   /* Generate Grad matrix */
802   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Grad));
803   PetscCall(MatSetSizes(user->Grad,PETSC_DECIDE,PETSC_DECIDE,m,n));
804   PetscCall(MatSetFromOptions(user->Grad));
805   PetscCall(MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL));
806   PetscCall(MatSeqAIJSetPreallocation(user->Grad,2,NULL));
807   PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
808 
809   for (i=istart; i<iend; i++) {
810     if (i<m/3) {
811       iblock = i / (user->mx-1);
812       j = iblock*user->mx + (i % (user->mx-1));
813       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
814       j = j+1;
815       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
816     }
817     if (i>=m/3 && i<2*m/3) {
818       iblock = (i-m/3) / (user->mx*(user->mx-1));
819       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
820       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
821       j = j + user->mx;
822       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
823     }
824     if (i>=2*m/3) {
825       j = i-2*m/3;
826       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
827       j = j + user->mx*user->mx;
828       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
829     }
830   }
831 
832   PetscCall(MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY));
833   PetscCall(MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY));
834 
835   /* Generate arithmetic averaging matrix Av */
836   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Av));
837   PetscCall(MatSetSizes(user->Av,PETSC_DECIDE,PETSC_DECIDE,m,n));
838   PetscCall(MatSetFromOptions(user->Av));
839   PetscCall(MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL));
840   PetscCall(MatSeqAIJSetPreallocation(user->Av,2,NULL));
841   PetscCall(MatGetOwnershipRange(user->Av,&istart,&iend));
842 
843   for (i=istart; i<iend; i++) {
844     if (i<m/3) {
845       iblock = i / (user->mx-1);
846       j = iblock*user->mx + (i % (user->mx-1));
847       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
848       j = j+1;
849       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
850     }
851     if (i>=m/3 && i<2*m/3) {
852       iblock = (i-m/3) / (user->mx*(user->mx-1));
853       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
854       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
855       j = j + user->mx;
856       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
857     }
858     if (i>=2*m/3) {
859       j = i-2*m/3;
860       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
861       j = j + user->mx*user->mx;
862       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
863     }
864   }
865 
866   PetscCall(MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY));
867   PetscCall(MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY));
868 
869   /* Generate transpose of averaging matrix Av */
870   PetscCall(MatTranspose(user->Av,MAT_INITIAL_MATRIX,&user->AvT));
871 
872   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->L));
873   PetscCall(MatSetSizes(user->L,PETSC_DECIDE,PETSC_DECIDE,m+n,n));
874   PetscCall(MatSetFromOptions(user->L));
875   PetscCall(MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL));
876   PetscCall(MatSeqAIJSetPreallocation(user->L,2,NULL));
877   PetscCall(MatGetOwnershipRange(user->L,&istart,&iend));
878 
879   for (i=istart; i<iend; i++) {
880     if (i<m/3) {
881       iblock = i / (user->mx-1);
882       j = iblock*user->mx + (i % (user->mx-1));
883       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
884       j = j+1;
885       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
886     }
887     if (i>=m/3 && i<2*m/3) {
888       iblock = (i-m/3) / (user->mx*(user->mx-1));
889       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
890       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
891       j = j + user->mx;
892       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
893     }
894     if (i>=2*m/3 && i<m) {
895       j = i-2*m/3;
896       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
897       j = j + user->mx*user->mx;
898       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
899     }
900     if (i>=m) {
901       j = i - m;
902       PetscCall(MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES));
903     }
904   }
905 
906   PetscCall(MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY));
907   PetscCall(MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY));
908 
909   PetscCall(MatScale(user->L,PetscPowScalar(h,1.5)));
910 
911   /* Generate Div matrix */
912   PetscCall(MatTranspose(user->Grad,MAT_INITIAL_MATRIX,&user->Div));
913 
914   /* Build work vectors and matrices */
915   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->S));
916   PetscCall(VecSetSizes(user->S, PETSC_DECIDE, user->mx*user->mx*(user->mx-1)*3));
917   PetscCall(VecSetFromOptions(user->S));
918 
919   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->lwork));
920   PetscCall(VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx));
921   PetscCall(VecSetFromOptions(user->lwork));
922 
923   PetscCall(MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork));
924   PetscCall(MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork));
925 
926   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->d));
927   PetscCall(VecSetSizes(user->d,PETSC_DECIDE,user->ndata*user->nt));
928   PetscCall(VecSetFromOptions(user->d));
929 
930   PetscCall(VecDuplicate(user->S,&user->Swork));
931   PetscCall(VecDuplicate(user->S,&user->Av_u));
932   PetscCall(VecDuplicate(user->S,&user->Twork));
933   PetscCall(VecDuplicate(user->S,&user->Rwork));
934   PetscCall(VecDuplicate(user->y,&user->ywork));
935   PetscCall(VecDuplicate(user->u,&user->uwork));
936   PetscCall(VecDuplicate(user->u,&user->js_diag));
937   PetscCall(VecDuplicate(user->c,&user->cwork));
938   PetscCall(VecDuplicate(user->d,&user->dwork));
939 
940   /* Create matrix-free shell user->Js for computing A*x */
941   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m*user->nt,user,&user->Js));
942   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult));
943   PetscCall(MatShellSetOperation(user->Js,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate));
944   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose));
945   PetscCall(MatShellSetOperation(user->Js,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal));
946 
947   /* Diagonal blocks of user->Js */
948   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsBlock));
949   PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateMatBlockMult));
950   /* Blocks are symmetric */
951   PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockMult));
952 
953   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
954      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
955      This is an SSOR preconditioner for user->JsBlock. */
956   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsBlockPrec));
957   PetscCall(MatShellSetOperation(user->JsBlockPrec,MATOP_MULT,(void(*)(void))StateMatBlockPrecMult));
958   /* JsBlockPrec is symmetric */
959   PetscCall(MatShellSetOperation(user->JsBlockPrec,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockPrecMult));
960   PetscCall(MatSetOption(user->JsBlockPrec,MAT_SYMMETRIC,PETSC_TRUE));
961   PetscCall(MatSetOption(user->JsBlockPrec,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
962 
963   /* Create a matrix-free shell user->Jd for computing B*x */
964   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m,user,&user->Jd));
965   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult));
966   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose));
967 
968   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
969   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m*user->nt,user,&user->JsInv));
970   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateMatInvMult));
971   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatInvTransposeMult));
972 
973   /* Solver options and tolerances */
974   PetscCall(KSPCreate(PETSC_COMM_WORLD,&user->solver));
975   PetscCall(KSPSetType(user->solver,KSPCG));
976   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->JsBlockPrec));
977   PetscCall(KSPSetInitialGuessNonzero(user->solver,PETSC_FALSE));
978   PetscCall(KSPSetTolerances(user->solver,1e-4,1e-20,1e3,500));
979   PetscCall(KSPSetFromOptions(user->solver));
980   PetscCall(KSPGetPC(user->solver,&user->prec));
981   PetscCall(PCSetType(user->prec,PCSHELL));
982 
983   PetscCall(PCShellSetApply(user->prec,StateMatBlockPrecMult));
984   PetscCall(PCShellSetApplyTranspose(user->prec,StateMatBlockPrecMult));
985   PetscCall(PCShellSetContext(user->prec,user));
986 
987   /* Create scatter from y to y_1,y_2,...,y_nt */
988   PetscCall(PetscMalloc1(user->nt*user->m,&user->yi_scatter));
989   PetscCall(VecCreate(PETSC_COMM_WORLD,&yi));
990   PetscCall(VecSetSizes(yi,PETSC_DECIDE,user->mx*user->mx*user->mx));
991   PetscCall(VecSetFromOptions(yi));
992   PetscCall(VecDuplicateVecs(yi,user->nt,&user->yi));
993   PetscCall(VecDuplicateVecs(yi,user->nt,&user->yiwork));
994 
995   PetscCall(VecGetOwnershipRange(user->y,&lo2,&hi2));
996   istart = 0;
997   for (i=0; i<user->nt; i++) {
998     PetscCall(VecGetOwnershipRange(user->yi[i],&lo,&hi));
999     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_yi));
1000     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y));
1001     PetscCall(VecScatterCreate(user->y,is_from_y,user->yi[i],is_to_yi,&user->yi_scatter[i]));
1002     istart = istart + hi-lo;
1003     PetscCall(ISDestroy(&is_to_yi));
1004     PetscCall(ISDestroy(&is_from_y));
1005   }
1006   PetscCall(VecDestroy(&yi));
1007 
1008   /* Create scatter from d to d_1,d_2,...,d_ns */
1009   PetscCall(PetscMalloc1(user->ns*user->ndata,&user->di_scatter));
1010   PetscCall(VecCreate(PETSC_COMM_WORLD,&di));
1011   PetscCall(VecSetSizes(di,PETSC_DECIDE,user->ndata));
1012   PetscCall(VecSetFromOptions(di));
1013   PetscCall(VecDuplicateVecs(di,user->ns,&user->di));
1014   PetscCall(VecGetOwnershipRange(user->d,&lo2,&hi2));
1015   istart = 0;
1016   for (i=0; i<user->ns; i++) {
1017     PetscCall(VecGetOwnershipRange(user->di[i],&lo,&hi));
1018     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_di));
1019     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d));
1020     PetscCall(VecScatterCreate(user->d,is_from_d,user->di[i],is_to_di,&user->di_scatter[i]));
1021     istart = istart + hi-lo;
1022     PetscCall(ISDestroy(&is_to_di));
1023     PetscCall(ISDestroy(&is_from_d));
1024   }
1025   PetscCall(VecDestroy(&di));
1026 
1027   /* Assemble RHS of forward problem */
1028   PetscCall(VecCopy(bc,user->yiwork[0]));
1029   for (i=1; i<user->nt; i++) {
1030     PetscCall(VecSet(user->yiwork[i],0.0));
1031   }
1032   PetscCall(Gather_i(user->q,user->yiwork,user->yi_scatter,user->nt));
1033   PetscCall(VecDestroy(&bc));
1034 
1035   /* Compute true state function yt given ut */
1036   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->ytrue));
1037   PetscCall(VecSetSizes(user->ytrue,PETSC_DECIDE,n*user->nt));
1038   PetscCall(VecSetFromOptions(user->ytrue));
1039 
1040   /* First compute Av_u = Av*exp(-u) */
1041   PetscCall(VecSet(user->uwork,0));
1042   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue)); /*  Note: user->utrue */
1043   PetscCall(VecExp(user->uwork));
1044   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1045 
1046   /* Symbolic DSG = Div * Grad */
1047   PetscCall(MatProductCreate(user->Div,user->Grad,NULL,&user->DSG));
1048   PetscCall(MatProductSetType(user->DSG,MATPRODUCT_AB));
1049   PetscCall(MatProductSetAlgorithm(user->DSG,"default"));
1050   PetscCall(MatProductSetFill(user->DSG,PETSC_DEFAULT));
1051   PetscCall(MatProductSetFromOptions(user->DSG));
1052   PetscCall(MatProductSymbolic(user->DSG));
1053 
1054   user->dsg_formed = PETSC_TRUE;
1055 
1056   /* Next form DSG = Div*Grad */
1057   PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1058   PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u));
1059   if (user->dsg_formed) {
1060     PetscCall(MatProductNumeric(user->DSG));
1061   } else {
1062     PetscCall(MatMatMult(user->Div,user->Grad,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&user->DSG));
1063     user->dsg_formed = PETSC_TRUE;
1064   }
1065   /* B = speye(nx^3) + ht*DSG; */
1066   PetscCall(MatScale(user->DSG,user->ht));
1067   PetscCall(MatShift(user->DSG,1.0));
1068 
1069   /* Now solve for ytrue */
1070   PetscCall(StateMatInvMult(user->Js,user->q,user->ytrue));
1071 
1072   /* Initial guess y0 for state given u0 */
1073 
1074   /* First compute Av_u = Av*exp(-u) */
1075   PetscCall(VecSet(user->uwork,0));
1076   PetscCall(VecAXPY(user->uwork,-1.0,user->u)); /*  Note: user->u */
1077   PetscCall(VecExp(user->uwork));
1078   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1079 
1080   /* Next form DSG = Div*S*Grad */
1081   PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1082   PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u));
1083   if (user->dsg_formed) {
1084     PetscCall(MatProductNumeric(user->DSG));
1085   } else {
1086     PetscCall(MatMatMult(user->Div,user->Grad,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&user->DSG));
1087 
1088     user->dsg_formed = PETSC_TRUE;
1089   }
1090   /* B = speye(nx^3) + ht*DSG; */
1091   PetscCall(MatScale(user->DSG,user->ht));
1092   PetscCall(MatShift(user->DSG,1.0));
1093 
1094   /* Now solve for y */
1095   PetscCall(StateMatInvMult(user->Js,user->q,user->y));
1096 
1097   /* Construct projection matrix Q, a block diagonal matrix consisting of nt copies of Qblock along the diagonal */
1098   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Qblock));
1099   PetscCall(MatSetSizes(user->Qblock,PETSC_DECIDE,PETSC_DECIDE,user->ndata,n));
1100   PetscCall(MatSetFromOptions(user->Qblock));
1101   PetscCall(MatMPIAIJSetPreallocation(user->Qblock,8,NULL,8,NULL));
1102   PetscCall(MatSeqAIJSetPreallocation(user->Qblock,8,NULL));
1103 
1104   for (i=0; i<user->mx; i++) {
1105     x[i] = h*(i+0.5);
1106     y[i] = h*(i+0.5);
1107     z[i] = h*(i+0.5);
1108   }
1109 
1110   PetscCall(MatGetOwnershipRange(user->Qblock,&istart,&iend));
1111   nx = user->mx; ny = user->mx; nz = user->mx;
1112   for (i=istart; i<iend; i++) {
1113     xri = xr[i];
1114     im = 0;
1115     xim = x[im];
1116     while (xri>xim && im<nx) {
1117       im = im+1;
1118       xim = x[im];
1119     }
1120     indx1 = im-1;
1121     indx2 = im;
1122     dx1 = xri - x[indx1];
1123     dx2 = x[indx2] - xri;
1124 
1125     yri = yr[i];
1126     im = 0;
1127     yim = y[im];
1128     while (yri>yim && im<ny) {
1129       im = im+1;
1130       yim = y[im];
1131     }
1132     indy1 = im-1;
1133     indy2 = im;
1134     dy1 = yri - y[indy1];
1135     dy2 = y[indy2] - yri;
1136 
1137     zri = zr[i];
1138     im = 0;
1139     zim = z[im];
1140     while (zri>zim && im<nz) {
1141       im = im+1;
1142       zim = z[im];
1143     }
1144     indz1 = im-1;
1145     indz2 = im;
1146     dz1 = zri - z[indz1];
1147     dz2 = z[indz2] - zri;
1148 
1149     Dx = x[indx2] - x[indx1];
1150     Dy = y[indy2] - y[indy1];
1151     Dz = z[indz2] - z[indz1];
1152 
1153     j = indx1 + indy1*nx + indz1*nx*ny;
1154     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1155     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1156 
1157     j = indx1 + indy1*nx + indz2*nx*ny;
1158     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1159     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1160 
1161     j = indx1 + indy2*nx + indz1*nx*ny;
1162     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1163     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1164 
1165     j = indx1 + indy2*nx + indz2*nx*ny;
1166     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1167     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1168 
1169     j = indx2 + indy1*nx + indz1*nx*ny;
1170     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1171     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1172 
1173     j = indx2 + indy1*nx + indz2*nx*ny;
1174     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1175     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1176 
1177     j = indx2 + indy2*nx + indz1*nx*ny;
1178     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1179     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1180 
1181     j = indx2 + indy2*nx + indz2*nx*ny;
1182     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1183     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1184   }
1185   PetscCall(MatAssemblyBegin(user->Qblock,MAT_FINAL_ASSEMBLY));
1186   PetscCall(MatAssemblyEnd(user->Qblock,MAT_FINAL_ASSEMBLY));
1187 
1188   PetscCall(MatTranspose(user->Qblock,MAT_INITIAL_MATRIX,&user->QblockT));
1189   PetscCall(MatTranspose(user->L,MAT_INITIAL_MATRIX,&user->LT));
1190 
1191   /* Add noise to the measurement data */
1192   PetscCall(VecSet(user->ywork,1.0));
1193   PetscCall(VecAYPX(user->ywork,user->noise,user->ytrue));
1194   PetscCall(Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
1195   for (j=0; j<user->ns; j++) {
1196     i = user->sample_times[j];
1197     PetscCall(MatMult(user->Qblock,user->yiwork[i],user->di[j]));
1198   }
1199   PetscCall(Gather_i(user->d,user->di,user->di_scatter,user->ns));
1200 
1201   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1202   PetscCall(KSPSetFromOptions(user->solver));
1203   PetscCall(PetscFree(x));
1204   PetscCall(PetscFree(y));
1205   PetscCall(PetscFree(z));
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 PetscErrorCode ParabolicDestroy(AppCtx *user)
1210 {
1211   PetscInt       i;
1212 
1213   PetscFunctionBegin;
1214   PetscCall(MatDestroy(&user->Qblock));
1215   PetscCall(MatDestroy(&user->QblockT));
1216   PetscCall(MatDestroy(&user->Div));
1217   PetscCall(MatDestroy(&user->Divwork));
1218   PetscCall(MatDestroy(&user->Grad));
1219   PetscCall(MatDestroy(&user->Av));
1220   PetscCall(MatDestroy(&user->Avwork));
1221   PetscCall(MatDestroy(&user->AvT));
1222   PetscCall(MatDestroy(&user->DSG));
1223   PetscCall(MatDestroy(&user->L));
1224   PetscCall(MatDestroy(&user->LT));
1225   PetscCall(KSPDestroy(&user->solver));
1226   PetscCall(MatDestroy(&user->Js));
1227   PetscCall(MatDestroy(&user->Jd));
1228   PetscCall(MatDestroy(&user->JsInv));
1229   PetscCall(MatDestroy(&user->JsBlock));
1230   PetscCall(MatDestroy(&user->JsBlockPrec));
1231   PetscCall(VecDestroy(&user->u));
1232   PetscCall(VecDestroy(&user->uwork));
1233   PetscCall(VecDestroy(&user->utrue));
1234   PetscCall(VecDestroy(&user->y));
1235   PetscCall(VecDestroy(&user->ywork));
1236   PetscCall(VecDestroy(&user->ytrue));
1237   PetscCall(VecDestroyVecs(user->nt,&user->yi));
1238   PetscCall(VecDestroyVecs(user->nt,&user->yiwork));
1239   PetscCall(VecDestroyVecs(user->ns,&user->di));
1240   PetscCall(PetscFree(user->yi));
1241   PetscCall(PetscFree(user->yiwork));
1242   PetscCall(PetscFree(user->di));
1243   PetscCall(VecDestroy(&user->c));
1244   PetscCall(VecDestroy(&user->cwork));
1245   PetscCall(VecDestroy(&user->ur));
1246   PetscCall(VecDestroy(&user->q));
1247   PetscCall(VecDestroy(&user->d));
1248   PetscCall(VecDestroy(&user->dwork));
1249   PetscCall(VecDestroy(&user->lwork));
1250   PetscCall(VecDestroy(&user->S));
1251   PetscCall(VecDestroy(&user->Swork));
1252   PetscCall(VecDestroy(&user->Av_u));
1253   PetscCall(VecDestroy(&user->Twork));
1254   PetscCall(VecDestroy(&user->Rwork));
1255   PetscCall(VecDestroy(&user->js_diag));
1256   PetscCall(ISDestroy(&user->s_is));
1257   PetscCall(ISDestroy(&user->d_is));
1258   PetscCall(VecScatterDestroy(&user->state_scatter));
1259   PetscCall(VecScatterDestroy(&user->design_scatter));
1260   for (i=0; i<user->nt; i++) {
1261     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1262   }
1263   for (i=0; i<user->ns; i++) {
1264     PetscCall(VecScatterDestroy(&user->di_scatter[i]));
1265   }
1266   PetscCall(PetscFree(user->yi_scatter));
1267   PetscCall(PetscFree(user->di_scatter));
1268   PetscCall(PetscFree(user->sample_times));
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 PetscErrorCode ParabolicMonitor(Tao tao, void *ptr)
1273 {
1274   Vec            X;
1275   PetscReal      unorm,ynorm;
1276   AppCtx         *user = (AppCtx*)ptr;
1277 
1278   PetscFunctionBegin;
1279   PetscCall(TaoGetSolution(tao,&X));
1280   PetscCall(Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
1281   PetscCall(VecAXPY(user->ywork,-1.0,user->ytrue));
1282   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue));
1283   PetscCall(VecNorm(user->uwork,NORM_2,&unorm));
1284   PetscCall(VecNorm(user->ywork,NORM_2,&ynorm));
1285   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm));
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /*TEST
1290 
1291    build:
1292       requires: !complex
1293 
1294    test:
1295       args: -tao_cmonitor -tao_type lcl -ns 1 -tao_gatol 1.e-4 -ksp_max_it 30
1296       requires: !single
1297 
1298 TEST*/
1299