xref: /petsc/src/tao/pde_constrained/tutorials/parabolic.c (revision 2e65eb737b5b07432530db55f6b2a145ebc548b2)
1 #include <petsc/private/taoimpl.h>
2 
3 typedef struct {
4   PetscInt n; /*  Number of variables */
5   PetscInt m; /*  Number of constraints per time step */
6   PetscInt mx; /*  grid points in each direction */
7   PetscInt nt; /*  Number of time steps; as of now, must be divisible by 8 */
8   PetscInt ndata; /*  Number of data points per sample */
9   PetscInt ns; /*  Number of samples */
10   PetscInt *sample_times; /*  Times of samples */
11   IS       s_is;
12   IS       d_is;
13 
14   VecScatter state_scatter;
15   VecScatter design_scatter;
16   VecScatter *yi_scatter;
17   VecScatter *di_scatter;
18 
19   Mat       Js,Jd,JsBlockPrec,JsInv,JsBlock;
20   PetscBool jformed,dsg_formed;
21 
22   PetscReal alpha; /*  Regularization parameter */
23   PetscReal beta; /*  Weight attributed to ||u||^2 in regularization functional */
24   PetscReal noise; /*  Amount of noise to add to data */
25   PetscReal ht; /*  Time step */
26 
27   Mat Qblock,QblockT;
28   Mat L,LT;
29   Mat Div,Divwork;
30   Mat Grad;
31   Mat Av,Avwork,AvT;
32   Mat DSG;
33   Vec q;
34   Vec ur; /*  reference */
35 
36   Vec d;
37   Vec dwork;
38   Vec *di;
39 
40   Vec y; /*  state variables */
41   Vec ywork;
42 
43   Vec ytrue;
44   Vec *yi,*yiwork;
45 
46   Vec u; /*  design variables */
47   Vec uwork;
48 
49   Vec utrue;
50   Vec js_diag;
51   Vec c; /*  constraint vector */
52   Vec cwork;
53 
54   Vec lwork;
55   Vec S;
56   Vec Rwork,Swork,Twork;
57   Vec Av_u;
58 
59   KSP solver;
60   PC prec;
61 
62   PetscInt ksp_its;
63   PetscInt ksp_its_initial;
64 } AppCtx;
65 
66 PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
67 PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
68 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
69 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
70 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat, void*);
71 PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
72 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
73 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
74 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
75 PetscErrorCode ParabolicInitialize(AppCtx *user);
76 PetscErrorCode ParabolicDestroy(AppCtx *user);
77 PetscErrorCode ParabolicMonitor(Tao, void*);
78 
79 PetscErrorCode StateMatMult(Mat,Vec,Vec);
80 PetscErrorCode StateMatBlockMult(Mat,Vec,Vec);
81 PetscErrorCode StateMatMultTranspose(Mat,Vec,Vec);
82 PetscErrorCode StateMatGetDiagonal(Mat,Vec);
83 PetscErrorCode StateMatDuplicate(Mat,MatDuplicateOption,Mat*);
84 PetscErrorCode StateMatInvMult(Mat,Vec,Vec);
85 PetscErrorCode StateMatInvTransposeMult(Mat,Vec,Vec);
86 PetscErrorCode StateMatBlockPrecMult(PC,Vec,Vec);
87 
88 PetscErrorCode DesignMatMult(Mat,Vec,Vec);
89 PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);
90 
91 PetscErrorCode Gather_i(Vec,Vec*,VecScatter*,PetscInt);
92 PetscErrorCode Scatter_i(Vec,Vec*,VecScatter*,PetscInt);
93 
94 static  char help[]="";
95 
96 int main(int argc, char **argv)
97 {
98   Vec                x,x0;
99   Tao                tao;
100   AppCtx             user;
101   IS                 is_allstate,is_alldesign;
102   PetscInt           lo,hi,hi2,lo2,ksp_old;
103   PetscInt           ntests = 1;
104   PetscInt           i;
105 #if defined(PETSC_USE_LOG)
106   PetscLogStage      stages[1];
107 #endif
108 
109   PetscCall(PetscInitialize(&argc, &argv, (char*)0,help));
110   user.mx = 8;
111   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"parabolic example",NULL);
112   PetscCall(PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL));
113   user.nt = 8;
114   PetscCall(PetscOptionsInt("-nt","Number of time steps","",user.nt,&user.nt,NULL));
115   user.ndata = 64;
116   PetscCall(PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL));
117   user.ns = 8;
118   PetscCall(PetscOptionsInt("-ns","Number of samples","",user.ns,&user.ns,NULL));
119   user.alpha = 1.0;
120   PetscCall(PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL));
121   user.beta = 0.01;
122   PetscCall(PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL));
123   user.noise = 0.01;
124   PetscCall(PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL));
125   PetscCall(PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL));
126   PetscOptionsEnd();
127 
128   user.m = user.mx*user.mx*user.mx; /*  number of constraints per time step */
129   user.n = user.m*(user.nt+1); /*  number of variables */
130   user.ht = (PetscReal)1/user.nt;
131 
132   PetscCall(VecCreate(PETSC_COMM_WORLD,&user.u));
133   PetscCall(VecCreate(PETSC_COMM_WORLD,&user.y));
134   PetscCall(VecCreate(PETSC_COMM_WORLD,&user.c));
135   PetscCall(VecSetSizes(user.u,PETSC_DECIDE,user.n-user.m*user.nt));
136   PetscCall(VecSetSizes(user.y,PETSC_DECIDE,user.m*user.nt));
137   PetscCall(VecSetSizes(user.c,PETSC_DECIDE,user.m*user.nt));
138   PetscCall(VecSetFromOptions(user.u));
139   PetscCall(VecSetFromOptions(user.y));
140   PetscCall(VecSetFromOptions(user.c));
141 
142   /* Create scatters for reduced spaces.
143      If the state vector y and design vector u are partitioned as
144      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
145      then the solution vector x is organized as
146      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
147      The index sets user.s_is and user.d_is correspond to the indices of the
148      state and design variables owned by the current processor.
149   */
150   PetscCall(VecCreate(PETSC_COMM_WORLD,&x));
151 
152   PetscCall(VecGetOwnershipRange(user.y,&lo,&hi));
153   PetscCall(VecGetOwnershipRange(user.u,&lo2,&hi2));
154 
155   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate));
156   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user.s_is));
157 
158   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign));
159   PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user.d_is));
160 
161   PetscCall(VecSetSizes(x,hi-lo+hi2-lo2,user.n));
162   PetscCall(VecSetFromOptions(x));
163 
164   PetscCall(VecScatterCreate(x,user.s_is,user.y,is_allstate,&user.state_scatter));
165   PetscCall(VecScatterCreate(x,user.d_is,user.u,is_alldesign,&user.design_scatter));
166   PetscCall(ISDestroy(&is_alldesign));
167   PetscCall(ISDestroy(&is_allstate));
168 
169   /* Create TAO solver and set desired solution method */
170   PetscCall(TaoCreate(PETSC_COMM_WORLD,&tao));
171   PetscCall(TaoSetType(tao,TAOLCL));
172 
173   /* Set up initial vectors and matrices */
174   PetscCall(ParabolicInitialize(&user));
175 
176   PetscCall(Gather(x,user.y,user.state_scatter,user.u,user.design_scatter));
177   PetscCall(VecDuplicate(x,&x0));
178   PetscCall(VecCopy(x,x0));
179 
180   /* Set solution vector with an initial guess */
181   PetscCall(TaoSetSolution(tao,x));
182   PetscCall(TaoSetObjective(tao, FormFunction, &user));
183   PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user));
184   PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user));
185 
186   PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, user.JsBlockPrec, user.JsInv, FormJacobianState, &user));
187   PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user));
188 
189   PetscCall(TaoSetFromOptions(tao));
190   PetscCall(TaoSetStateDesignIS(tao,user.s_is,user.d_is));
191 
192  /* SOLVE THE APPLICATION */
193   PetscCall(PetscLogStageRegister("Trials",&stages[0]));
194   PetscCall(PetscLogStagePush(stages[0]));
195   user.ksp_its_initial = user.ksp_its;
196   ksp_old = user.ksp_its;
197   for (i=0; i<ntests; i++) {
198     PetscCall(TaoSolve(tao));
199     PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %" PetscInt_FMT "\n",user.ksp_its-ksp_old));
200     PetscCall(VecCopy(x0,x));
201     PetscCall(TaoSetSolution(tao,x));
202   }
203   PetscCall(PetscLogStagePop());
204   PetscCall(PetscBarrier((PetscObject)x));
205   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: "));
206   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%" PetscInt_FMT "\n",user.ksp_its_initial));
207   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Total KSP iterations over %" PetscInt_FMT " trial(s): ",ntests));
208   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%" PetscInt_FMT "\n",user.ksp_its));
209   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations per trial: "));
210   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%" PetscInt_FMT "\n",(user.ksp_its-user.ksp_its_initial)/ntests));
211 
212   PetscCall(TaoDestroy(&tao));
213   PetscCall(VecDestroy(&x));
214   PetscCall(VecDestroy(&x0));
215   PetscCall(ParabolicDestroy(&user));
216 
217   PetscCall(PetscFinalize());
218   return 0;
219 }
220 /* ------------------------------------------------------------------- */
221 /*
222    dwork = Qy - d
223    lwork = L*(u-ur)
224    f = 1/2 * (dwork.dork + alpha*lwork.lwork)
225 */
226 PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
227 {
228   PetscReal      d1=0,d2=0;
229   PetscInt       i,j;
230   AppCtx         *user = (AppCtx*)ptr;
231 
232   PetscFunctionBegin;
233   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
234   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
235   for (j=0; j<user->ns; j++) {
236     i = user->sample_times[j];
237     PetscCall(MatMult(user->Qblock,user->yi[i],user->di[j]));
238   }
239   PetscCall(Gather_i(user->dwork,user->di,user->di_scatter,user->ns));
240   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
241   PetscCall(VecDot(user->dwork,user->dwork,&d1));
242 
243   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
244   PetscCall(MatMult(user->L,user->uwork,user->lwork));
245   PetscCall(VecDot(user->lwork,user->lwork,&d2));
246 
247   *f = 0.5 * (d1 + user->alpha*d2);
248   PetscFunctionReturn(0);
249 }
250 
251 /* ------------------------------------------------------------------- */
252 /*
253     state: g_s = Q' *(Qy - d)
254     design: g_d = alpha*L'*L*(u-ur)
255 */
256 PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
257 {
258   PetscInt       i,j;
259   AppCtx         *user = (AppCtx*)ptr;
260 
261   PetscFunctionBegin;
262   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
263   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
264   for (j=0; j<user->ns; j++) {
265     i = user->sample_times[j];
266     PetscCall(MatMult(user->Qblock,user->yi[i],user->di[j]));
267   }
268   PetscCall(Gather_i(user->dwork,user->di,user->di_scatter,user->ns));
269   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
270   PetscCall(Scatter_i(user->dwork,user->di,user->di_scatter,user->ns));
271   PetscCall(VecSet(user->ywork,0.0));
272   PetscCall(Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
273   for (j=0; j<user->ns; j++) {
274     i = user->sample_times[j];
275     PetscCall(MatMult(user->QblockT,user->di[j],user->yiwork[i]));
276   }
277   PetscCall(Gather_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
278 
279   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
280   PetscCall(MatMult(user->L,user->uwork,user->lwork));
281   PetscCall(MatMult(user->LT,user->lwork,user->uwork));
282   PetscCall(VecScale(user->uwork, user->alpha));
283   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
284   PetscFunctionReturn(0);
285 }
286 
287 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
288 {
289   PetscReal      d1,d2;
290   PetscInt       i,j;
291   AppCtx         *user = (AppCtx*)ptr;
292 
293   PetscFunctionBegin;
294   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
295   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
296   for (j=0; j<user->ns; j++) {
297     i = user->sample_times[j];
298     PetscCall(MatMult(user->Qblock,user->yi[i],user->di[j]));
299   }
300   PetscCall(Gather_i(user->dwork,user->di,user->di_scatter,user->ns));
301   PetscCall(VecAXPY(user->dwork,-1.0,user->d));
302   PetscCall(VecDot(user->dwork,user->dwork,&d1));
303   PetscCall(Scatter_i(user->dwork,user->di,user->di_scatter,user->ns));
304   PetscCall(VecSet(user->ywork,0.0));
305   PetscCall(Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
306   for (j=0; j<user->ns; j++) {
307     i = user->sample_times[j];
308     PetscCall(MatMult(user->QblockT,user->di[j],user->yiwork[i]));
309   }
310   PetscCall(Gather_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
311 
312   PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u));
313   PetscCall(MatMult(user->L,user->uwork,user->lwork));
314   PetscCall(VecDot(user->lwork,user->lwork,&d2));
315   PetscCall(MatMult(user->LT,user->lwork,user->uwork));
316   PetscCall(VecScale(user->uwork, user->alpha));
317   *f = 0.5 * (d1 + user->alpha*d2);
318 
319   PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
320   PetscFunctionReturn(0);
321 }
322 
323 /* ------------------------------------------------------------------- */
324 /* A
325 MatShell object
326 */
327 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
328 {
329   AppCtx         *user = (AppCtx*)ptr;
330 
331   PetscFunctionBegin;
332   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
333   PetscCall(VecSet(user->uwork,0));
334   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
335   PetscCall(VecExp(user->uwork));
336   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
337   PetscCall(VecCopy(user->Av_u,user->Swork));
338   PetscCall(VecReciprocal(user->Swork));
339   PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
340   PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork));
341   if (user->dsg_formed) {
342     PetscCall(MatProductNumeric(user->DSG));
343   } else {
344     PetscCall(MatMatMult(user->Divwork,user->Grad,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&user->DSG));
345     user->dsg_formed = PETSC_TRUE;
346   }
347 
348   /* B = speye(nx^3) + ht*DSG; */
349   PetscCall(MatScale(user->DSG,user->ht));
350   PetscCall(MatShift(user->DSG,1.0));
351   PetscFunctionReturn(0);
352 }
353 
354 /* ------------------------------------------------------------------- */
355 /* B */
356 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
357 {
358   AppCtx         *user = (AppCtx*)ptr;
359 
360   PetscFunctionBegin;
361   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
362   PetscFunctionReturn(0);
363 }
364 
365 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
366 {
367   PetscInt       i;
368   AppCtx         *user;
369 
370   PetscFunctionBegin;
371   PetscCall(MatShellGetContext(J_shell,&user));
372   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
373   PetscCall(MatMult(user->JsBlock,user->yi[0],user->yiwork[0]));
374   for (i=1; i<user->nt; i++) {
375     PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
376     PetscCall(VecAXPY(user->yiwork[i],-1.0,user->yi[i-1]));
377   }
378   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
379   PetscFunctionReturn(0);
380 }
381 
382 PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y)
383 {
384   PetscInt       i;
385   AppCtx         *user;
386 
387   PetscFunctionBegin;
388   PetscCall(MatShellGetContext(J_shell,&user));
389   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
390   for (i=0; i<user->nt-1; i++) {
391     PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
392     PetscCall(VecAXPY(user->yiwork[i],-1.0,user->yi[i+1]));
393   }
394   i = user->nt-1;
395   PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
396   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
397   PetscFunctionReturn(0);
398 }
399 
400 PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y)
401 {
402   AppCtx         *user;
403 
404   PetscFunctionBegin;
405   PetscCall(MatShellGetContext(J_shell,&user));
406   PetscCall(MatMult(user->Grad,X,user->Swork));
407   PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u));
408   PetscCall(MatMult(user->Div,user->Swork,Y));
409   PetscCall(VecAYPX(Y,user->ht,X));
410   PetscFunctionReturn(0);
411 }
412 
413 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
414 {
415   PetscInt       i;
416   AppCtx         *user;
417 
418   PetscFunctionBegin;
419   PetscCall(MatShellGetContext(J_shell,&user));
420 
421   /* sdiag(1./v) */
422   PetscCall(VecSet(user->uwork,0));
423   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
424   PetscCall(VecExp(user->uwork));
425 
426   /* sdiag(1./((Av*(1./v)).^2)) */
427   PetscCall(MatMult(user->Av,user->uwork,user->Swork));
428   PetscCall(VecPointwiseMult(user->Swork,user->Swork,user->Swork));
429   PetscCall(VecReciprocal(user->Swork));
430 
431   /* (Av * (sdiag(1./v) * b)) */
432   PetscCall(VecPointwiseMult(user->uwork,user->uwork,X));
433   PetscCall(MatMult(user->Av,user->uwork,user->Twork));
434 
435   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
436   PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork));
437 
438   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
439   for (i=0; i<user->nt; i++) {
440     /* (sdiag(Grad*y(:,i)) */
441     PetscCall(MatMult(user->Grad,user->yi[i],user->Twork));
442 
443     /* ht * Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
444     PetscCall(VecPointwiseMult(user->Twork,user->Twork,user->Swork));
445     PetscCall(MatMult(user->Div,user->Twork,user->yiwork[i]));
446     PetscCall(VecScale(user->yiwork[i],user->ht));
447   }
448   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
449 
450   PetscFunctionReturn(0);
451 }
452 
453 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
454 {
455   PetscInt       i;
456   AppCtx         *user;
457 
458   PetscFunctionBegin;
459   PetscCall(MatShellGetContext(J_shell,&user));
460 
461   /* sdiag(1./((Av*(1./v)).^2)) */
462   PetscCall(VecSet(user->uwork,0));
463   PetscCall(VecAXPY(user->uwork,-1.0,user->u));
464   PetscCall(VecExp(user->uwork));
465   PetscCall(MatMult(user->Av,user->uwork,user->Rwork));
466   PetscCall(VecPointwiseMult(user->Rwork,user->Rwork,user->Rwork));
467   PetscCall(VecReciprocal(user->Rwork));
468 
469   PetscCall(VecSet(Y,0.0));
470   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
471   PetscCall(Scatter_i(X,user->yiwork,user->yi_scatter,user->nt));
472   for (i=0; i<user->nt; i++) {
473     /* (Div' * b(:,i)) */
474     PetscCall(MatMult(user->Grad,user->yiwork[i],user->Swork));
475 
476     /* sdiag(Grad*y(:,i)) */
477     PetscCall(MatMult(user->Grad,user->yi[i],user->Twork));
478 
479     /* (sdiag(Grad*y(:,i)) * (Div' * b(:,i))) */
480     PetscCall(VecPointwiseMult(user->Twork,user->Swork,user->Twork));
481 
482     /* (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i)))) */
483     PetscCall(VecPointwiseMult(user->Twork,user->Rwork,user->Twork));
484 
485     /* (Av' * (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i))))) */
486     PetscCall(MatMult(user->AvT,user->Twork,user->yiwork[i]));
487 
488     /* sdiag(1./v) * (Av' * (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i))))) */
489     PetscCall(VecPointwiseMult(user->yiwork[i],user->uwork,user->yiwork[i]));
490     PetscCall(VecAXPY(Y,user->ht,user->yiwork[i]));
491   }
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y)
496 {
497   AppCtx         *user;
498 
499   PetscFunctionBegin;
500   PetscCall(PCShellGetContext(PC_shell,&user));
501 
502   if (user->dsg_formed) {
503     PetscCall(MatSOR(user->DSG,X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y));
504   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"DSG not formed");
505   PetscFunctionReturn(0);
506 }
507 
508 PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
509 {
510   AppCtx         *user;
511   PetscInt       its,i;
512 
513   PetscFunctionBegin;
514   PetscCall(MatShellGetContext(J_shell,&user));
515 
516   if (Y == user->ytrue) {
517     /* First solve is done with true solution to set up problem */
518     PetscCall(KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
519   } else {
520     PetscCall(KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT));
521   }
522 
523   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
524   PetscCall(KSPSolve(user->solver,user->yi[0],user->yiwork[0]));
525   PetscCall(KSPGetIterationNumber(user->solver,&its));
526   user->ksp_its = user->ksp_its + its;
527 
528   for (i=1; i<user->nt; i++) {
529     PetscCall(VecAXPY(user->yi[i],1.0,user->yiwork[i-1]));
530     PetscCall(KSPSolve(user->solver,user->yi[i],user->yiwork[i]));
531     PetscCall(KSPGetIterationNumber(user->solver,&its));
532     user->ksp_its = user->ksp_its + its;
533   }
534   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
535   PetscFunctionReturn(0);
536 }
537 
538 PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
539 {
540   AppCtx         *user;
541   PetscInt       its,i;
542 
543   PetscFunctionBegin;
544   PetscCall(MatShellGetContext(J_shell,&user));
545 
546   PetscCall(Scatter_i(X,user->yi,user->yi_scatter,user->nt));
547 
548   i = user->nt - 1;
549   PetscCall(KSPSolve(user->solver,user->yi[i],user->yiwork[i]));
550 
551   PetscCall(KSPGetIterationNumber(user->solver,&its));
552   user->ksp_its = user->ksp_its + its;
553 
554   for (i=user->nt-2; i>=0; i--) {
555     PetscCall(VecAXPY(user->yi[i],1.0,user->yiwork[i+1]));
556     PetscCall(KSPSolve(user->solver,user->yi[i],user->yiwork[i]));
557 
558     PetscCall(KSPGetIterationNumber(user->solver,&its));
559     user->ksp_its = user->ksp_its + its;
560 
561   }
562 
563   PetscCall(Gather_i(Y,user->yiwork,user->yi_scatter,user->nt));
564   PetscFunctionReturn(0);
565 }
566 
567 PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
568 {
569   AppCtx         *user;
570 
571   PetscFunctionBegin;
572   PetscCall(MatShellGetContext(J_shell,&user));
573 
574   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,new_shell));
575   PetscCall(MatShellSetOperation(*new_shell,MATOP_MULT,(void(*)(void))StateMatMult));
576   PetscCall(MatShellSetOperation(*new_shell,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate));
577   PetscCall(MatShellSetOperation(*new_shell,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose));
578   PetscCall(MatShellSetOperation(*new_shell,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal));
579   PetscFunctionReturn(0);
580 }
581 
582 PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
583 {
584   AppCtx         *user;
585 
586   PetscFunctionBegin;
587   PetscCall(MatShellGetContext(J_shell,&user));
588   PetscCall(VecCopy(user->js_diag,X));
589   PetscFunctionReturn(0);
590 
591 }
592 
593 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
594 {
595   /* con = Ay - q, A = [B  0  0 ... 0;
596                        -I  B  0 ... 0;
597                         0 -I  B ... 0;
598                              ...     ;
599                         0    ... -I B]
600      B = ht * Div * Sigma * Grad + eye */
601   PetscInt       i;
602   AppCtx         *user = (AppCtx*)ptr;
603 
604   PetscFunctionBegin;
605   PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter));
606   PetscCall(Scatter_i(user->y,user->yi,user->yi_scatter,user->nt));
607   PetscCall(MatMult(user->JsBlock,user->yi[0],user->yiwork[0]));
608   for (i=1; i<user->nt; i++) {
609     PetscCall(MatMult(user->JsBlock,user->yi[i],user->yiwork[i]));
610     PetscCall(VecAXPY(user->yiwork[i],-1.0,user->yi[i-1]));
611   }
612   PetscCall(Gather_i(C,user->yiwork,user->yi_scatter,user->nt));
613   PetscCall(VecAXPY(C,-1.0,user->q));
614   PetscFunctionReturn(0);
615 }
616 
617 PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
618 {
619   PetscFunctionBegin;
620   PetscCall(VecScatterBegin(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD));
621   PetscCall(VecScatterEnd(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD));
622   PetscCall(VecScatterBegin(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD));
623   PetscCall(VecScatterEnd(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD));
624   PetscFunctionReturn(0);
625 }
626 
627 PetscErrorCode Scatter_i(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
628 {
629   PetscInt       i;
630 
631   PetscFunctionBegin;
632   for (i=0; i<nt; i++) {
633     PetscCall(VecScatterBegin(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD));
634     PetscCall(VecScatterEnd(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD));
635   }
636   PetscFunctionReturn(0);
637 }
638 
639 PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
640 {
641   PetscFunctionBegin;
642   PetscCall(VecScatterBegin(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE));
643   PetscCall(VecScatterEnd(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE));
644   PetscCall(VecScatterBegin(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE));
645   PetscCall(VecScatterEnd(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE));
646   PetscFunctionReturn(0);
647 }
648 
649 PetscErrorCode Gather_i(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
650 {
651   PetscInt       i;
652 
653   PetscFunctionBegin;
654   for (i=0; i<nt; i++) {
655     PetscCall(VecScatterBegin(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE));
656     PetscCall(VecScatterEnd(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE));
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode ParabolicInitialize(AppCtx *user)
662 {
663   PetscInt       m,n,i,j,k,linear_index,istart,iend,iblock,lo,hi,lo2,hi2;
664   Vec            XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork,yi,di,bc;
665   PetscReal      *x, *y, *z;
666   PetscReal      h,stime;
667   PetscScalar    hinv,neg_hinv,half = 0.5,sqrt_beta;
668   PetscInt       im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
669   PetscReal      xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
670   PetscScalar    v,vx,vy,vz;
671   IS             is_from_y,is_to_yi,is_from_d,is_to_di;
672   /* Data locations */
673   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,
674                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,
675                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,
676                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,
677                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,
678                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,
679                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,
680                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
681 
682   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,
683                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,
684                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,
685                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,
686                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,
687                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,
688                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,
689                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
690 
691   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,
692                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,
693                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,
694                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,
695                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,
696                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,
697                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,
698                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};
699 
700   PetscFunctionBegin;
701   PetscCall(PetscMalloc1(user->mx,&x));
702   PetscCall(PetscMalloc1(user->mx,&y));
703   PetscCall(PetscMalloc1(user->mx,&z));
704   user->jformed = PETSC_FALSE;
705   user->dsg_formed = PETSC_FALSE;
706 
707   n = user->mx * user->mx * user->mx;
708   m = 3 * user->mx * user->mx * (user->mx-1);
709   sqrt_beta = PetscSqrtScalar(user->beta);
710 
711   user->ksp_its = 0;
712   user->ksp_its_initial = 0;
713 
714   stime = (PetscReal)user->nt/user->ns;
715   PetscCall(PetscMalloc1(user->ns,&user->sample_times));
716   for (i=0; i<user->ns; i++) {
717     user->sample_times[i] = (PetscInt)(stime*((PetscReal)i+1.0)-0.5);
718   }
719 
720   PetscCall(VecCreate(PETSC_COMM_WORLD,&XX));
721   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->q));
722   PetscCall(VecSetSizes(XX,PETSC_DECIDE,n));
723   PetscCall(VecSetSizes(user->q,PETSC_DECIDE,n*user->nt));
724   PetscCall(VecSetFromOptions(XX));
725   PetscCall(VecSetFromOptions(user->q));
726 
727   PetscCall(VecDuplicate(XX,&YY));
728   PetscCall(VecDuplicate(XX,&ZZ));
729   PetscCall(VecDuplicate(XX,&XXwork));
730   PetscCall(VecDuplicate(XX,&YYwork));
731   PetscCall(VecDuplicate(XX,&ZZwork));
732   PetscCall(VecDuplicate(XX,&UTwork));
733   PetscCall(VecDuplicate(XX,&user->utrue));
734   PetscCall(VecDuplicate(XX,&bc));
735 
736   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
737   h = 1.0/user->mx;
738   hinv = user->mx;
739   neg_hinv = -hinv;
740 
741   PetscCall(VecGetOwnershipRange(XX,&istart,&iend));
742   for (linear_index=istart; linear_index<iend; linear_index++) {
743     i = linear_index % user->mx;
744     j = ((linear_index-i)/user->mx) % user->mx;
745     k = ((linear_index-i)/user->mx-j) / user->mx;
746     vx = h*(i+0.5);
747     vy = h*(j+0.5);
748     vz = h*(k+0.5);
749     PetscCall(VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES));
750     PetscCall(VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES));
751     PetscCall(VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES));
752     if ((vx<0.6) && (vx>0.4) && (vy<0.6) && (vy>0.4) && (vy<0.6) && (vz<0.6) && (vz>0.4)) {
753       v = 1000.0;
754       PetscCall(VecSetValues(bc,1,&linear_index,&v,INSERT_VALUES));
755     }
756   }
757 
758   PetscCall(VecAssemblyBegin(XX));
759   PetscCall(VecAssemblyEnd(XX));
760   PetscCall(VecAssemblyBegin(YY));
761   PetscCall(VecAssemblyEnd(YY));
762   PetscCall(VecAssemblyBegin(ZZ));
763   PetscCall(VecAssemblyEnd(ZZ));
764   PetscCall(VecAssemblyBegin(bc));
765   PetscCall(VecAssemblyEnd(bc));
766 
767   /* Compute true parameter function
768      ut = 0.5 + exp(-10*((x-0.5)^2+(y-0.5)^2+(z-0.5)^2)) */
769   PetscCall(VecCopy(XX,XXwork));
770   PetscCall(VecCopy(YY,YYwork));
771   PetscCall(VecCopy(ZZ,ZZwork));
772 
773   PetscCall(VecShift(XXwork,-0.5));
774   PetscCall(VecShift(YYwork,-0.5));
775   PetscCall(VecShift(ZZwork,-0.5));
776 
777   PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork));
778   PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork));
779   PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork));
780 
781   PetscCall(VecCopy(XXwork,user->utrue));
782   PetscCall(VecAXPY(user->utrue,1.0,YYwork));
783   PetscCall(VecAXPY(user->utrue,1.0,ZZwork));
784   PetscCall(VecScale(user->utrue,-10.0));
785   PetscCall(VecExp(user->utrue));
786   PetscCall(VecShift(user->utrue,0.5));
787 
788   PetscCall(VecDestroy(&XX));
789   PetscCall(VecDestroy(&YY));
790   PetscCall(VecDestroy(&ZZ));
791   PetscCall(VecDestroy(&XXwork));
792   PetscCall(VecDestroy(&YYwork));
793   PetscCall(VecDestroy(&ZZwork));
794   PetscCall(VecDestroy(&UTwork));
795 
796   /* Initial guess and reference model */
797   PetscCall(VecDuplicate(user->utrue,&user->ur));
798   PetscCall(VecSet(user->ur,0.0));
799 
800   /* Generate Grad matrix */
801   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Grad));
802   PetscCall(MatSetSizes(user->Grad,PETSC_DECIDE,PETSC_DECIDE,m,n));
803   PetscCall(MatSetFromOptions(user->Grad));
804   PetscCall(MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL));
805   PetscCall(MatSeqAIJSetPreallocation(user->Grad,2,NULL));
806   PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend));
807 
808   for (i=istart; i<iend; i++) {
809     if (i<m/3) {
810       iblock = i / (user->mx-1);
811       j = iblock*user->mx + (i % (user->mx-1));
812       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
813       j = j+1;
814       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
815     }
816     if (i>=m/3 && i<2*m/3) {
817       iblock = (i-m/3) / (user->mx*(user->mx-1));
818       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
819       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
820       j = j + user->mx;
821       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
822     }
823     if (i>=2*m/3) {
824       j = i-2*m/3;
825       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
826       j = j + user->mx*user->mx;
827       PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES));
828     }
829   }
830 
831   PetscCall(MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY));
832   PetscCall(MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY));
833 
834   /* Generate arithmetic averaging matrix Av */
835   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Av));
836   PetscCall(MatSetSizes(user->Av,PETSC_DECIDE,PETSC_DECIDE,m,n));
837   PetscCall(MatSetFromOptions(user->Av));
838   PetscCall(MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL));
839   PetscCall(MatSeqAIJSetPreallocation(user->Av,2,NULL));
840   PetscCall(MatGetOwnershipRange(user->Av,&istart,&iend));
841 
842   for (i=istart; i<iend; i++) {
843     if (i<m/3) {
844       iblock = i / (user->mx-1);
845       j = iblock*user->mx + (i % (user->mx-1));
846       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
847       j = j+1;
848       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
849     }
850     if (i>=m/3 && i<2*m/3) {
851       iblock = (i-m/3) / (user->mx*(user->mx-1));
852       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
853       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
854       j = j + user->mx;
855       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
856     }
857     if (i>=2*m/3) {
858       j = i-2*m/3;
859       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
860       j = j + user->mx*user->mx;
861       PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES));
862     }
863   }
864 
865   PetscCall(MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY));
866   PetscCall(MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY));
867 
868   /* Generate transpose of averaging matrix Av */
869   PetscCall(MatTranspose(user->Av,MAT_INITIAL_MATRIX,&user->AvT));
870 
871   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->L));
872   PetscCall(MatSetSizes(user->L,PETSC_DECIDE,PETSC_DECIDE,m+n,n));
873   PetscCall(MatSetFromOptions(user->L));
874   PetscCall(MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL));
875   PetscCall(MatSeqAIJSetPreallocation(user->L,2,NULL));
876   PetscCall(MatGetOwnershipRange(user->L,&istart,&iend));
877 
878   for (i=istart; i<iend; i++) {
879     if (i<m/3) {
880       iblock = i / (user->mx-1);
881       j = iblock*user->mx + (i % (user->mx-1));
882       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
883       j = j+1;
884       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
885     }
886     if (i>=m/3 && i<2*m/3) {
887       iblock = (i-m/3) / (user->mx*(user->mx-1));
888       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
889       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
890       j = j + user->mx;
891       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
892     }
893     if (i>=2*m/3 && i<m) {
894       j = i-2*m/3;
895       PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES));
896       j = j + user->mx*user->mx;
897       PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES));
898     }
899     if (i>=m) {
900       j = i - m;
901       PetscCall(MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES));
902     }
903   }
904 
905   PetscCall(MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY));
906   PetscCall(MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY));
907 
908   PetscCall(MatScale(user->L,PetscPowScalar(h,1.5)));
909 
910   /* Generate Div matrix */
911   PetscCall(MatTranspose(user->Grad,MAT_INITIAL_MATRIX,&user->Div));
912 
913   /* Build work vectors and matrices */
914   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->S));
915   PetscCall(VecSetSizes(user->S, PETSC_DECIDE, user->mx*user->mx*(user->mx-1)*3));
916   PetscCall(VecSetFromOptions(user->S));
917 
918   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->lwork));
919   PetscCall(VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx));
920   PetscCall(VecSetFromOptions(user->lwork));
921 
922   PetscCall(MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork));
923   PetscCall(MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork));
924 
925   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->d));
926   PetscCall(VecSetSizes(user->d,PETSC_DECIDE,user->ndata*user->nt));
927   PetscCall(VecSetFromOptions(user->d));
928 
929   PetscCall(VecDuplicate(user->S,&user->Swork));
930   PetscCall(VecDuplicate(user->S,&user->Av_u));
931   PetscCall(VecDuplicate(user->S,&user->Twork));
932   PetscCall(VecDuplicate(user->S,&user->Rwork));
933   PetscCall(VecDuplicate(user->y,&user->ywork));
934   PetscCall(VecDuplicate(user->u,&user->uwork));
935   PetscCall(VecDuplicate(user->u,&user->js_diag));
936   PetscCall(VecDuplicate(user->c,&user->cwork));
937   PetscCall(VecDuplicate(user->d,&user->dwork));
938 
939   /* Create matrix-free shell user->Js for computing A*x */
940   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m*user->nt,user,&user->Js));
941   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult));
942   PetscCall(MatShellSetOperation(user->Js,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate));
943   PetscCall(MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose));
944   PetscCall(MatShellSetOperation(user->Js,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal));
945 
946   /* Diagonal blocks of user->Js */
947   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsBlock));
948   PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateMatBlockMult));
949   /* Blocks are symmetric */
950   PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockMult));
951 
952   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
953      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
954      This is an SSOR preconditioner for user->JsBlock. */
955   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsBlockPrec));
956   PetscCall(MatShellSetOperation(user->JsBlockPrec,MATOP_MULT,(void(*)(void))StateMatBlockPrecMult));
957   /* JsBlockPrec is symmetric */
958   PetscCall(MatShellSetOperation(user->JsBlockPrec,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockPrecMult));
959   PetscCall(MatSetOption(user->JsBlockPrec,MAT_SYMMETRY_ETERNAL,PETSC_TRUE));
960 
961   /* Create a matrix-free shell user->Jd for computing B*x */
962   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m,user,&user->Jd));
963   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult));
964   PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose));
965 
966   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
967   PetscCall(MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m*user->nt,user,&user->JsInv));
968   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateMatInvMult));
969   PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatInvTransposeMult));
970 
971   /* Solver options and tolerances */
972   PetscCall(KSPCreate(PETSC_COMM_WORLD,&user->solver));
973   PetscCall(KSPSetType(user->solver,KSPCG));
974   PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->JsBlockPrec));
975   PetscCall(KSPSetInitialGuessNonzero(user->solver,PETSC_FALSE));
976   PetscCall(KSPSetTolerances(user->solver,1e-4,1e-20,1e3,500));
977   PetscCall(KSPSetFromOptions(user->solver));
978   PetscCall(KSPGetPC(user->solver,&user->prec));
979   PetscCall(PCSetType(user->prec,PCSHELL));
980 
981   PetscCall(PCShellSetApply(user->prec,StateMatBlockPrecMult));
982   PetscCall(PCShellSetApplyTranspose(user->prec,StateMatBlockPrecMult));
983   PetscCall(PCShellSetContext(user->prec,user));
984 
985   /* Create scatter from y to y_1,y_2,...,y_nt */
986   PetscCall(PetscMalloc1(user->nt*user->m,&user->yi_scatter));
987   PetscCall(VecCreate(PETSC_COMM_WORLD,&yi));
988   PetscCall(VecSetSizes(yi,PETSC_DECIDE,user->mx*user->mx*user->mx));
989   PetscCall(VecSetFromOptions(yi));
990   PetscCall(VecDuplicateVecs(yi,user->nt,&user->yi));
991   PetscCall(VecDuplicateVecs(yi,user->nt,&user->yiwork));
992 
993   PetscCall(VecGetOwnershipRange(user->y,&lo2,&hi2));
994   istart = 0;
995   for (i=0; i<user->nt; i++) {
996     PetscCall(VecGetOwnershipRange(user->yi[i],&lo,&hi));
997     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_yi));
998     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y));
999     PetscCall(VecScatterCreate(user->y,is_from_y,user->yi[i],is_to_yi,&user->yi_scatter[i]));
1000     istart = istart + hi-lo;
1001     PetscCall(ISDestroy(&is_to_yi));
1002     PetscCall(ISDestroy(&is_from_y));
1003   }
1004   PetscCall(VecDestroy(&yi));
1005 
1006   /* Create scatter from d to d_1,d_2,...,d_ns */
1007   PetscCall(PetscMalloc1(user->ns*user->ndata,&user->di_scatter));
1008   PetscCall(VecCreate(PETSC_COMM_WORLD,&di));
1009   PetscCall(VecSetSizes(di,PETSC_DECIDE,user->ndata));
1010   PetscCall(VecSetFromOptions(di));
1011   PetscCall(VecDuplicateVecs(di,user->ns,&user->di));
1012   PetscCall(VecGetOwnershipRange(user->d,&lo2,&hi2));
1013   istart = 0;
1014   for (i=0; i<user->ns; i++) {
1015     PetscCall(VecGetOwnershipRange(user->di[i],&lo,&hi));
1016     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_di));
1017     PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d));
1018     PetscCall(VecScatterCreate(user->d,is_from_d,user->di[i],is_to_di,&user->di_scatter[i]));
1019     istart = istart + hi-lo;
1020     PetscCall(ISDestroy(&is_to_di));
1021     PetscCall(ISDestroy(&is_from_d));
1022   }
1023   PetscCall(VecDestroy(&di));
1024 
1025   /* Assemble RHS of forward problem */
1026   PetscCall(VecCopy(bc,user->yiwork[0]));
1027   for (i=1; i<user->nt; i++) {
1028     PetscCall(VecSet(user->yiwork[i],0.0));
1029   }
1030   PetscCall(Gather_i(user->q,user->yiwork,user->yi_scatter,user->nt));
1031   PetscCall(VecDestroy(&bc));
1032 
1033   /* Compute true state function yt given ut */
1034   PetscCall(VecCreate(PETSC_COMM_WORLD,&user->ytrue));
1035   PetscCall(VecSetSizes(user->ytrue,PETSC_DECIDE,n*user->nt));
1036   PetscCall(VecSetFromOptions(user->ytrue));
1037 
1038   /* First compute Av_u = Av*exp(-u) */
1039   PetscCall(VecSet(user->uwork,0));
1040   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue)); /*  Note: user->utrue */
1041   PetscCall(VecExp(user->uwork));
1042   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1043 
1044   /* Symbolic DSG = Div * Grad */
1045   PetscCall(MatProductCreate(user->Div,user->Grad,NULL,&user->DSG));
1046   PetscCall(MatProductSetType(user->DSG,MATPRODUCT_AB));
1047   PetscCall(MatProductSetAlgorithm(user->DSG,"default"));
1048   PetscCall(MatProductSetFill(user->DSG,PETSC_DEFAULT));
1049   PetscCall(MatProductSetFromOptions(user->DSG));
1050   PetscCall(MatProductSymbolic(user->DSG));
1051 
1052   user->dsg_formed = PETSC_TRUE;
1053 
1054   /* Next form DSG = Div*Grad */
1055   PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1056   PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u));
1057   if (user->dsg_formed) {
1058     PetscCall(MatProductNumeric(user->DSG));
1059   } else {
1060     PetscCall(MatMatMult(user->Div,user->Grad,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&user->DSG));
1061     user->dsg_formed = PETSC_TRUE;
1062   }
1063   /* B = speye(nx^3) + ht*DSG; */
1064   PetscCall(MatScale(user->DSG,user->ht));
1065   PetscCall(MatShift(user->DSG,1.0));
1066 
1067   /* Now solve for ytrue */
1068   PetscCall(StateMatInvMult(user->Js,user->q,user->ytrue));
1069 
1070   /* Initial guess y0 for state given u0 */
1071 
1072   /* First compute Av_u = Av*exp(-u) */
1073   PetscCall(VecSet(user->uwork,0));
1074   PetscCall(VecAXPY(user->uwork,-1.0,user->u)); /*  Note: user->u */
1075   PetscCall(VecExp(user->uwork));
1076   PetscCall(MatMult(user->Av,user->uwork,user->Av_u));
1077 
1078   /* Next form DSG = Div*S*Grad */
1079   PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN));
1080   PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u));
1081   if (user->dsg_formed) {
1082     PetscCall(MatProductNumeric(user->DSG));
1083   } else {
1084     PetscCall(MatMatMult(user->Div,user->Grad,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&user->DSG));
1085 
1086     user->dsg_formed = PETSC_TRUE;
1087   }
1088   /* B = speye(nx^3) + ht*DSG; */
1089   PetscCall(MatScale(user->DSG,user->ht));
1090   PetscCall(MatShift(user->DSG,1.0));
1091 
1092   /* Now solve for y */
1093   PetscCall(StateMatInvMult(user->Js,user->q,user->y));
1094 
1095   /* Construct projection matrix Q, a block diagonal matrix consisting of nt copies of Qblock along the diagonal */
1096   PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Qblock));
1097   PetscCall(MatSetSizes(user->Qblock,PETSC_DECIDE,PETSC_DECIDE,user->ndata,n));
1098   PetscCall(MatSetFromOptions(user->Qblock));
1099   PetscCall(MatMPIAIJSetPreallocation(user->Qblock,8,NULL,8,NULL));
1100   PetscCall(MatSeqAIJSetPreallocation(user->Qblock,8,NULL));
1101 
1102   for (i=0; i<user->mx; i++) {
1103     x[i] = h*(i+0.5);
1104     y[i] = h*(i+0.5);
1105     z[i] = h*(i+0.5);
1106   }
1107 
1108   PetscCall(MatGetOwnershipRange(user->Qblock,&istart,&iend));
1109   nx = user->mx; ny = user->mx; nz = user->mx;
1110   for (i=istart; i<iend; i++) {
1111     xri = xr[i];
1112     im = 0;
1113     xim = x[im];
1114     while (xri>xim && im<nx) {
1115       im = im+1;
1116       xim = x[im];
1117     }
1118     indx1 = im-1;
1119     indx2 = im;
1120     dx1 = xri - x[indx1];
1121     dx2 = x[indx2] - xri;
1122 
1123     yri = yr[i];
1124     im = 0;
1125     yim = y[im];
1126     while (yri>yim && im<ny) {
1127       im = im+1;
1128       yim = y[im];
1129     }
1130     indy1 = im-1;
1131     indy2 = im;
1132     dy1 = yri - y[indy1];
1133     dy2 = y[indy2] - yri;
1134 
1135     zri = zr[i];
1136     im = 0;
1137     zim = z[im];
1138     while (zri>zim && im<nz) {
1139       im = im+1;
1140       zim = z[im];
1141     }
1142     indz1 = im-1;
1143     indz2 = im;
1144     dz1 = zri - z[indz1];
1145     dz2 = z[indz2] - zri;
1146 
1147     Dx = x[indx2] - x[indx1];
1148     Dy = y[indy2] - y[indy1];
1149     Dz = z[indz2] - z[indz1];
1150 
1151     j = indx1 + indy1*nx + indz1*nx*ny;
1152     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1153     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1154 
1155     j = indx1 + indy1*nx + indz2*nx*ny;
1156     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1157     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1158 
1159     j = indx1 + indy2*nx + indz1*nx*ny;
1160     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1161     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1162 
1163     j = indx1 + indy2*nx + indz2*nx*ny;
1164     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1165     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1166 
1167     j = indx2 + indy1*nx + indz1*nx*ny;
1168     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1169     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1170 
1171     j = indx2 + indy1*nx + indz2*nx*ny;
1172     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1173     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1174 
1175     j = indx2 + indy2*nx + indz1*nx*ny;
1176     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1177     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1178 
1179     j = indx2 + indy2*nx + indz2*nx*ny;
1180     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1181     PetscCall(MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES));
1182   }
1183   PetscCall(MatAssemblyBegin(user->Qblock,MAT_FINAL_ASSEMBLY));
1184   PetscCall(MatAssemblyEnd(user->Qblock,MAT_FINAL_ASSEMBLY));
1185 
1186   PetscCall(MatTranspose(user->Qblock,MAT_INITIAL_MATRIX,&user->QblockT));
1187   PetscCall(MatTranspose(user->L,MAT_INITIAL_MATRIX,&user->LT));
1188 
1189   /* Add noise to the measurement data */
1190   PetscCall(VecSet(user->ywork,1.0));
1191   PetscCall(VecAYPX(user->ywork,user->noise,user->ytrue));
1192   PetscCall(Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt));
1193   for (j=0; j<user->ns; j++) {
1194     i = user->sample_times[j];
1195     PetscCall(MatMult(user->Qblock,user->yiwork[i],user->di[j]));
1196   }
1197   PetscCall(Gather_i(user->d,user->di,user->di_scatter,user->ns));
1198 
1199   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1200   PetscCall(KSPSetFromOptions(user->solver));
1201   PetscCall(PetscFree(x));
1202   PetscCall(PetscFree(y));
1203   PetscCall(PetscFree(z));
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode ParabolicDestroy(AppCtx *user)
1208 {
1209   PetscInt       i;
1210 
1211   PetscFunctionBegin;
1212   PetscCall(MatDestroy(&user->Qblock));
1213   PetscCall(MatDestroy(&user->QblockT));
1214   PetscCall(MatDestroy(&user->Div));
1215   PetscCall(MatDestroy(&user->Divwork));
1216   PetscCall(MatDestroy(&user->Grad));
1217   PetscCall(MatDestroy(&user->Av));
1218   PetscCall(MatDestroy(&user->Avwork));
1219   PetscCall(MatDestroy(&user->AvT));
1220   PetscCall(MatDestroy(&user->DSG));
1221   PetscCall(MatDestroy(&user->L));
1222   PetscCall(MatDestroy(&user->LT));
1223   PetscCall(KSPDestroy(&user->solver));
1224   PetscCall(MatDestroy(&user->Js));
1225   PetscCall(MatDestroy(&user->Jd));
1226   PetscCall(MatDestroy(&user->JsInv));
1227   PetscCall(MatDestroy(&user->JsBlock));
1228   PetscCall(MatDestroy(&user->JsBlockPrec));
1229   PetscCall(VecDestroy(&user->u));
1230   PetscCall(VecDestroy(&user->uwork));
1231   PetscCall(VecDestroy(&user->utrue));
1232   PetscCall(VecDestroy(&user->y));
1233   PetscCall(VecDestroy(&user->ywork));
1234   PetscCall(VecDestroy(&user->ytrue));
1235   PetscCall(VecDestroyVecs(user->nt,&user->yi));
1236   PetscCall(VecDestroyVecs(user->nt,&user->yiwork));
1237   PetscCall(VecDestroyVecs(user->ns,&user->di));
1238   PetscCall(PetscFree(user->yi));
1239   PetscCall(PetscFree(user->yiwork));
1240   PetscCall(PetscFree(user->di));
1241   PetscCall(VecDestroy(&user->c));
1242   PetscCall(VecDestroy(&user->cwork));
1243   PetscCall(VecDestroy(&user->ur));
1244   PetscCall(VecDestroy(&user->q));
1245   PetscCall(VecDestroy(&user->d));
1246   PetscCall(VecDestroy(&user->dwork));
1247   PetscCall(VecDestroy(&user->lwork));
1248   PetscCall(VecDestroy(&user->S));
1249   PetscCall(VecDestroy(&user->Swork));
1250   PetscCall(VecDestroy(&user->Av_u));
1251   PetscCall(VecDestroy(&user->Twork));
1252   PetscCall(VecDestroy(&user->Rwork));
1253   PetscCall(VecDestroy(&user->js_diag));
1254   PetscCall(ISDestroy(&user->s_is));
1255   PetscCall(ISDestroy(&user->d_is));
1256   PetscCall(VecScatterDestroy(&user->state_scatter));
1257   PetscCall(VecScatterDestroy(&user->design_scatter));
1258   for (i=0; i<user->nt; i++) {
1259     PetscCall(VecScatterDestroy(&user->yi_scatter[i]));
1260   }
1261   for (i=0; i<user->ns; i++) {
1262     PetscCall(VecScatterDestroy(&user->di_scatter[i]));
1263   }
1264   PetscCall(PetscFree(user->yi_scatter));
1265   PetscCall(PetscFree(user->di_scatter));
1266   PetscCall(PetscFree(user->sample_times));
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 PetscErrorCode ParabolicMonitor(Tao tao, void *ptr)
1271 {
1272   Vec            X;
1273   PetscReal      unorm,ynorm;
1274   AppCtx         *user = (AppCtx*)ptr;
1275 
1276   PetscFunctionBegin;
1277   PetscCall(TaoGetSolution(tao,&X));
1278   PetscCall(Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter));
1279   PetscCall(VecAXPY(user->ywork,-1.0,user->ytrue));
1280   PetscCall(VecAXPY(user->uwork,-1.0,user->utrue));
1281   PetscCall(VecNorm(user->uwork,NORM_2,&unorm));
1282   PetscCall(VecNorm(user->ywork,NORM_2,&ynorm));
1283   PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm));
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 /*TEST
1288 
1289    build:
1290       requires: !complex
1291 
1292    test:
1293       args: -tao_cmonitor -tao_type lcl -ns 1 -tao_gatol 1.e-4 -ksp_max_it 30
1294       requires: !single
1295 
1296 TEST*/
1297